diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..47d99ca8d6ad0d50ff03bb723134c0f5c3f8d60f 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-5500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-5600/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-5676/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53e81daab83e3bcca086d899906089022cd21590 --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +--- +library_name: peft +license: other +base_model: Qwen/Qwen2.5-VL-7B-Instruct +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +pipeline_tag: text-generation +model-index: +- name: Qwen2.5-VL-7B-sft-invalid + results: [] +--- + + + +# Qwen2.5-VL-7B-sft-invalid + +This model is a fine-tuned version of [Qwen/Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) on the rule_filter_invalid dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 2e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- gradient_accumulation_steps: 2 +- total_train_batch_size: 4 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 0.05 +- num_epochs: 2 + +### Training results + + + +### Framework versions + +- PEFT 0.18.1 +- Transformers 5.2.0 +- Pytorch 2.5.1+cu124 +- Datasets 4.0.0 +- Tokenizers 0.22.2 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8224bce2b962b82e55c954c000a28629995b1870 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.8.mlp.gate_proj", + "layers.20.mlp.up_proj", + "layers.25.mlp.down_proj", + "layers.21.mlp.down_proj", + "layers.22.mlp.gate_proj", + "layers.23.mlp.gate_proj", + "layers.11.mlp.gate_proj", + "layers.13.mlp.down_proj", + "layers.8.mlp.down_proj", + "layers.18.mlp.gate_proj", + "layers.7.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.20.mlp.gate_proj", + "layers.16.mlp.down_proj", + "layers.23.mlp.down_proj", + "layers.12.mlp.gate_proj", + "layers.12.mlp.down_proj", + "layers.1.mlp.up_proj", + "layers.2.mlp.gate_proj", + "layers.15.mlp.down_proj", + "layers.25.mlp.up_proj", + "layers.27.mlp.down_proj", + "layers.19.mlp.down_proj", + "layers.14.mlp.gate_proj", + "layers.27.mlp.gate_proj", + "layers.18.mlp.up_proj", + "layers.2.mlp.up_proj", + "layers.0.mlp.gate_proj", + "q_proj", + "layers.10.mlp.down_proj", + "layers.11.mlp.up_proj", + "layers.15.mlp.up_proj", + "layers.7.mlp.up_proj", + "layers.8.mlp.up_proj", + "layers.3.mlp.up_proj", + "layers.10.mlp.up_proj", + "layers.23.mlp.up_proj", + "layers.19.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.0.mlp.up_proj", + "layers.12.mlp.up_proj", + "layers.13.mlp.up_proj", + "o_proj", + "layers.5.mlp.down_proj", + "layers.14.mlp.down_proj", + "layers.2.mlp.down_proj", + "layers.6.mlp.down_proj", + "layers.6.mlp.gate_proj", + "layers.25.mlp.gate_proj", + "layers.3.mlp.down_proj", + "k_proj", + "layers.17.mlp.gate_proj", + "layers.24.mlp.gate_proj", + "layers.1.mlp.gate_proj", + "layers.4.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.0.mlp.down_proj", + "layers.9.mlp.gate_proj", + "layers.5.mlp.gate_proj", + "layers.24.mlp.up_proj", + "layers.20.mlp.down_proj", + "layers.16.mlp.gate_proj", + "layers.26.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.22.mlp.down_proj", + "layers.16.mlp.up_proj", + "v_proj", + "layers.27.mlp.up_proj", + "layers.13.mlp.gate_proj", + "layers.21.mlp.up_proj", + "layers.4.mlp.gate_proj", + "layers.9.mlp.up_proj", + "layers.3.mlp.gate_proj", + "layers.7.mlp.gate_proj", + "layers.4.mlp.up_proj", + "layers.24.mlp.down_proj", + "layers.26.mlp.gate_proj", + "layers.22.mlp.up_proj", + "layers.11.mlp.down_proj", + "layers.10.mlp.gate_proj", + "layers.5.mlp.up_proj", + "layers.21.mlp.gate_proj", + "layers.6.mlp.up_proj", + "layers.19.mlp.up_proj", + "layers.14.mlp.up_proj", + "layers.26.mlp.down_proj", + "layers.17.mlp.up_proj", + "layers.15.mlp.gate_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a21515a12bad5a118247ac5cba120ba0c896fe7c --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96d3fb56e6df5a684a21c26f774fc41f3263352eede9bf1cdb9bf42fec9c1296 +size 323020440 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3d979498e22845874be30ee7b489effa2a917acc --- /dev/null +++ b/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 2.0, + "total_flos": 1754791774076928.0, + "train_loss": 0.6725200974527508, + "train_runtime": 30148.3569, + "train_samples_per_second": 0.753, + "train_steps_per_second": 0.188 +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-5500/README.md b/checkpoint-5500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4005c4d8e7a819833408da4794e4e74d2ced6553 --- /dev/null +++ b/checkpoint-5500/README.md @@ -0,0 +1,208 @@ +--- +base_model: Qwen/Qwen2.5-VL-7B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-5500/adapter_config.json b/checkpoint-5500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8224bce2b962b82e55c954c000a28629995b1870 --- /dev/null +++ b/checkpoint-5500/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.8.mlp.gate_proj", + "layers.20.mlp.up_proj", + "layers.25.mlp.down_proj", + "layers.21.mlp.down_proj", + "layers.22.mlp.gate_proj", + "layers.23.mlp.gate_proj", + "layers.11.mlp.gate_proj", + "layers.13.mlp.down_proj", + "layers.8.mlp.down_proj", + "layers.18.mlp.gate_proj", + "layers.7.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.20.mlp.gate_proj", + "layers.16.mlp.down_proj", + "layers.23.mlp.down_proj", + "layers.12.mlp.gate_proj", + "layers.12.mlp.down_proj", + "layers.1.mlp.up_proj", + "layers.2.mlp.gate_proj", + "layers.15.mlp.down_proj", + "layers.25.mlp.up_proj", + "layers.27.mlp.down_proj", + "layers.19.mlp.down_proj", + "layers.14.mlp.gate_proj", + "layers.27.mlp.gate_proj", + "layers.18.mlp.up_proj", + "layers.2.mlp.up_proj", + "layers.0.mlp.gate_proj", + "q_proj", + "layers.10.mlp.down_proj", + "layers.11.mlp.up_proj", + "layers.15.mlp.up_proj", + "layers.7.mlp.up_proj", + "layers.8.mlp.up_proj", + "layers.3.mlp.up_proj", + "layers.10.mlp.up_proj", + "layers.23.mlp.up_proj", + "layers.19.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.0.mlp.up_proj", + "layers.12.mlp.up_proj", + "layers.13.mlp.up_proj", + "o_proj", + "layers.5.mlp.down_proj", + "layers.14.mlp.down_proj", + "layers.2.mlp.down_proj", + "layers.6.mlp.down_proj", + "layers.6.mlp.gate_proj", + "layers.25.mlp.gate_proj", + "layers.3.mlp.down_proj", + "k_proj", + "layers.17.mlp.gate_proj", + "layers.24.mlp.gate_proj", + "layers.1.mlp.gate_proj", + "layers.4.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.0.mlp.down_proj", + "layers.9.mlp.gate_proj", + "layers.5.mlp.gate_proj", + "layers.24.mlp.up_proj", + "layers.20.mlp.down_proj", + "layers.16.mlp.gate_proj", + "layers.26.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.22.mlp.down_proj", + "layers.16.mlp.up_proj", + "v_proj", + "layers.27.mlp.up_proj", + "layers.13.mlp.gate_proj", + "layers.21.mlp.up_proj", + "layers.4.mlp.gate_proj", + "layers.9.mlp.up_proj", + "layers.3.mlp.gate_proj", + "layers.7.mlp.gate_proj", + "layers.4.mlp.up_proj", + "layers.24.mlp.down_proj", + "layers.26.mlp.gate_proj", + "layers.22.mlp.up_proj", + "layers.11.mlp.down_proj", + "layers.10.mlp.gate_proj", + "layers.5.mlp.up_proj", + "layers.21.mlp.gate_proj", + "layers.6.mlp.up_proj", + "layers.19.mlp.up_proj", + "layers.14.mlp.up_proj", + "layers.26.mlp.down_proj", + "layers.17.mlp.up_proj", + "layers.15.mlp.gate_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-5500/adapter_model.safetensors b/checkpoint-5500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..22c978cb4725434ac19e0d709a663648b1901596 --- /dev/null +++ b/checkpoint-5500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a8178ca4c7532707b2b54222a6f31bb395c607fda2b405f15a37d6955a1f246 +size 323020440 diff --git a/checkpoint-5500/chat_template.jinja b/checkpoint-5500/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/checkpoint-5500/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-5500/global_step5500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-5500/global_step5500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cda276d0ab41e47e339f9cde817c295d78a0a50f --- /dev/null +++ b/checkpoint-5500/global_step5500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc2d2ff586da9a72f910a1f6438248360b20ef06238a24131caabd40f9572b3b +size 1937772272 diff --git a/checkpoint-5500/global_step5500/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-5500/global_step5500/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b769e3566b1020a2a566f40b750161cf7207190d --- /dev/null +++ b/checkpoint-5500/global_step5500/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c472f840f7dabdee339d1a4af8c8ecdef10559844bfc423aee9f7df20e9abdf9 +size 460630 diff --git a/checkpoint-5500/latest b/checkpoint-5500/latest new file mode 100644 index 0000000000000000000000000000000000000000..41839c45b4fb5c8ea5347bfa9d3bcaf41fddc2a7 --- /dev/null +++ b/checkpoint-5500/latest @@ -0,0 +1 @@ +global_step5500 \ No newline at end of file diff --git a/checkpoint-5500/processor_config.json b/checkpoint-5500/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/checkpoint-5500/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/checkpoint-5500/rng_state.pth b/checkpoint-5500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9fb4f0f90727d0cbf5db72f1513b46bfb3df5563 --- /dev/null +++ b/checkpoint-5500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4319449f0ba2cda22460e265490d63f3f3364bd349a676dd90ad602c38f32a1 +size 14244 diff --git a/checkpoint-5500/scheduler.pt b/checkpoint-5500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..735fce48cafc058006e9ac7b67bd7540d9b6ac0f --- /dev/null +++ b/checkpoint-5500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c38aed90a6b23292fbd580db4f3ec8784ebd8674e1ecdc5a0166d240809f4513 +size 1000 diff --git a/checkpoint-5500/tokenizer.json b/checkpoint-5500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/checkpoint-5500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/checkpoint-5500/tokenizer_config.json b/checkpoint-5500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/checkpoint-5500/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-5500/trainer_state.json b/checkpoint-5500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2918cd0a871494eef4d0bb513b972b7b59e66bf0 --- /dev/null +++ b/checkpoint-5500/trainer_state.json @@ -0,0 +1,38534 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9381497797356828, + "eval_steps": 500, + "global_step": 5500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0003524229074889868, + "grad_norm": 1.6512674233185107, + "learning_rate": 0.0, + "loss": 1.493973731994629, + "step": 1 + }, + { + "epoch": 0.0007048458149779736, + "grad_norm": 1.4463228571593894, + "learning_rate": 7.042253521126761e-08, + "loss": 1.3692013025283813, + "step": 2 + }, + { + "epoch": 0.0010572687224669603, + "grad_norm": 1.4036766254408197, + "learning_rate": 1.4084507042253522e-07, + "loss": 1.3996260166168213, + "step": 3 + }, + { + "epoch": 0.0014096916299559472, + "grad_norm": 1.29446596506829, + "learning_rate": 2.1126760563380284e-07, + "loss": 1.3011515140533447, + "step": 4 + }, + { + "epoch": 0.001762114537444934, + "grad_norm": 1.5130555881795185, + "learning_rate": 2.8169014084507043e-07, + "loss": 1.3736083507537842, + "step": 5 + }, + { + "epoch": 0.0021145374449339205, + "grad_norm": 1.247517750517551, + "learning_rate": 3.521126760563381e-07, + "loss": 1.051241159439087, + "step": 6 + }, + { + "epoch": 0.0024669603524229075, + "grad_norm": 1.611437944890658, + "learning_rate": 4.225352112676057e-07, + "loss": 1.2594621181488037, + "step": 7 + }, + { + "epoch": 0.0028193832599118945, + "grad_norm": 1.4604380967241444, + "learning_rate": 4.929577464788733e-07, + "loss": 1.0498416423797607, + "step": 8 + }, + { + "epoch": 0.003171806167400881, + "grad_norm": 1.367174801368101, + "learning_rate": 5.633802816901409e-07, + "loss": 1.3313459157943726, + "step": 9 + }, + { + "epoch": 0.003524229074889868, + "grad_norm": 1.4378623823320218, + "learning_rate": 6.338028169014085e-07, + "loss": 1.2484922409057617, + "step": 10 + }, + { + "epoch": 0.0038766519823788545, + "grad_norm": 1.197911167360161, + "learning_rate": 7.042253521126762e-07, + "loss": 1.097194790840149, + "step": 11 + }, + { + "epoch": 0.004229074889867841, + "grad_norm": 1.3767897701080816, + "learning_rate": 7.746478873239437e-07, + "loss": 1.3065136671066284, + "step": 12 + }, + { + "epoch": 0.0045814977973568285, + "grad_norm": 1.2501177622273731, + "learning_rate": 8.450704225352114e-07, + "loss": 1.1574026346206665, + "step": 13 + }, + { + "epoch": 0.004933920704845815, + "grad_norm": 1.3002699887597202, + "learning_rate": 9.154929577464789e-07, + "loss": 1.1509445905685425, + "step": 14 + }, + { + "epoch": 0.0052863436123348016, + "grad_norm": 1.3458236321153771, + "learning_rate": 9.859154929577465e-07, + "loss": 1.069403886795044, + "step": 15 + }, + { + "epoch": 0.005638766519823789, + "grad_norm": 1.52712721337833, + "learning_rate": 1.0563380281690142e-06, + "loss": 1.1731287240982056, + "step": 16 + }, + { + "epoch": 0.0059911894273127755, + "grad_norm": 1.5628075837505453, + "learning_rate": 1.1267605633802817e-06, + "loss": 0.9314254522323608, + "step": 17 + }, + { + "epoch": 0.006343612334801762, + "grad_norm": 1.3686084350519343, + "learning_rate": 1.1971830985915492e-06, + "loss": 1.2915008068084717, + "step": 18 + }, + { + "epoch": 0.006696035242290749, + "grad_norm": 1.2653916141417434, + "learning_rate": 1.267605633802817e-06, + "loss": 1.1088309288024902, + "step": 19 + }, + { + "epoch": 0.007048458149779736, + "grad_norm": 1.362753082153478, + "learning_rate": 1.3380281690140844e-06, + "loss": 1.21511709690094, + "step": 20 + }, + { + "epoch": 0.0074008810572687225, + "grad_norm": 1.3054604275805306, + "learning_rate": 1.4084507042253523e-06, + "loss": 1.241409420967102, + "step": 21 + }, + { + "epoch": 0.007753303964757709, + "grad_norm": 1.3646723208790772, + "learning_rate": 1.4788732394366198e-06, + "loss": 1.2170014381408691, + "step": 22 + }, + { + "epoch": 0.008105726872246696, + "grad_norm": 1.424586503093174, + "learning_rate": 1.5492957746478873e-06, + "loss": 1.1405870914459229, + "step": 23 + }, + { + "epoch": 0.008458149779735682, + "grad_norm": 1.429368633092772, + "learning_rate": 1.6197183098591552e-06, + "loss": 1.122542381286621, + "step": 24 + }, + { + "epoch": 0.00881057268722467, + "grad_norm": 1.2201478884239083, + "learning_rate": 1.6901408450704227e-06, + "loss": 1.1686937808990479, + "step": 25 + }, + { + "epoch": 0.009162995594713657, + "grad_norm": 1.4065678272985154, + "learning_rate": 1.7605633802816902e-06, + "loss": 1.215955376625061, + "step": 26 + }, + { + "epoch": 0.009515418502202643, + "grad_norm": 1.3879787249393913, + "learning_rate": 1.8309859154929579e-06, + "loss": 1.075179100036621, + "step": 27 + }, + { + "epoch": 0.00986784140969163, + "grad_norm": 1.2313632017619234, + "learning_rate": 1.9014084507042254e-06, + "loss": 1.198237419128418, + "step": 28 + }, + { + "epoch": 0.010220264317180617, + "grad_norm": 1.6833211669458825, + "learning_rate": 1.971830985915493e-06, + "loss": 1.2356700897216797, + "step": 29 + }, + { + "epoch": 0.010572687224669603, + "grad_norm": 1.3637967517131555, + "learning_rate": 2.0422535211267608e-06, + "loss": 1.2373592853546143, + "step": 30 + }, + { + "epoch": 0.01092511013215859, + "grad_norm": 1.377232613936239, + "learning_rate": 2.1126760563380285e-06, + "loss": 1.1857718229293823, + "step": 31 + }, + { + "epoch": 0.011277533039647578, + "grad_norm": 1.3566319214936433, + "learning_rate": 2.1830985915492958e-06, + "loss": 1.1844017505645752, + "step": 32 + }, + { + "epoch": 0.011629955947136564, + "grad_norm": 1.2486508447822717, + "learning_rate": 2.2535211267605635e-06, + "loss": 1.275226354598999, + "step": 33 + }, + { + "epoch": 0.011982378854625551, + "grad_norm": 1.3044888735575617, + "learning_rate": 2.323943661971831e-06, + "loss": 1.169473648071289, + "step": 34 + }, + { + "epoch": 0.012334801762114538, + "grad_norm": 1.2608655384056326, + "learning_rate": 2.3943661971830984e-06, + "loss": 1.2182841300964355, + "step": 35 + }, + { + "epoch": 0.012687224669603524, + "grad_norm": 1.3780698009940295, + "learning_rate": 2.4647887323943666e-06, + "loss": 1.2110469341278076, + "step": 36 + }, + { + "epoch": 0.01303964757709251, + "grad_norm": 1.3829042894220551, + "learning_rate": 2.535211267605634e-06, + "loss": 1.2886571884155273, + "step": 37 + }, + { + "epoch": 0.013392070484581497, + "grad_norm": 1.2954566526081723, + "learning_rate": 2.6056338028169015e-06, + "loss": 1.0740901231765747, + "step": 38 + }, + { + "epoch": 0.013744493392070485, + "grad_norm": 1.2079072281757672, + "learning_rate": 2.676056338028169e-06, + "loss": 1.0119279623031616, + "step": 39 + }, + { + "epoch": 0.014096916299559472, + "grad_norm": 1.1460333237155051, + "learning_rate": 2.746478873239437e-06, + "loss": 1.0752044916152954, + "step": 40 + }, + { + "epoch": 0.014449339207048459, + "grad_norm": 1.3690776364650978, + "learning_rate": 2.8169014084507046e-06, + "loss": 1.345343828201294, + "step": 41 + }, + { + "epoch": 0.014801762114537445, + "grad_norm": 1.0813865739605455, + "learning_rate": 2.887323943661972e-06, + "loss": 1.102332353591919, + "step": 42 + }, + { + "epoch": 0.015154185022026432, + "grad_norm": 1.1643083589428873, + "learning_rate": 2.9577464788732396e-06, + "loss": 1.006919264793396, + "step": 43 + }, + { + "epoch": 0.015506607929515418, + "grad_norm": 1.1582412568670832, + "learning_rate": 3.0281690140845073e-06, + "loss": 1.104026436805725, + "step": 44 + }, + { + "epoch": 0.015859030837004406, + "grad_norm": 1.3060563783851553, + "learning_rate": 3.0985915492957746e-06, + "loss": 1.299152135848999, + "step": 45 + }, + { + "epoch": 0.01621145374449339, + "grad_norm": 1.4304085919726754, + "learning_rate": 3.1690140845070427e-06, + "loss": 1.1075072288513184, + "step": 46 + }, + { + "epoch": 0.01656387665198238, + "grad_norm": 0.9865545367526579, + "learning_rate": 3.2394366197183104e-06, + "loss": 1.0296107530593872, + "step": 47 + }, + { + "epoch": 0.016916299559471364, + "grad_norm": 1.1960961939132708, + "learning_rate": 3.3098591549295777e-06, + "loss": 1.1097803115844727, + "step": 48 + }, + { + "epoch": 0.017268722466960353, + "grad_norm": 1.0974682037636356, + "learning_rate": 3.3802816901408454e-06, + "loss": 0.945678174495697, + "step": 49 + }, + { + "epoch": 0.01762114537444934, + "grad_norm": 0.9924343523024514, + "learning_rate": 3.4507042253521127e-06, + "loss": 1.075556993484497, + "step": 50 + }, + { + "epoch": 0.017973568281938326, + "grad_norm": 1.0849849170905757, + "learning_rate": 3.5211267605633804e-06, + "loss": 1.0790367126464844, + "step": 51 + }, + { + "epoch": 0.018325991189427314, + "grad_norm": 1.220415189867698, + "learning_rate": 3.5915492957746485e-06, + "loss": 1.2567799091339111, + "step": 52 + }, + { + "epoch": 0.0186784140969163, + "grad_norm": 1.1058732491316554, + "learning_rate": 3.6619718309859158e-06, + "loss": 1.1437780857086182, + "step": 53 + }, + { + "epoch": 0.019030837004405287, + "grad_norm": 1.0871981925234313, + "learning_rate": 3.7323943661971835e-06, + "loss": 1.0962307453155518, + "step": 54 + }, + { + "epoch": 0.019383259911894272, + "grad_norm": 0.9603250960542756, + "learning_rate": 3.8028169014084508e-06, + "loss": 1.0149122476577759, + "step": 55 + }, + { + "epoch": 0.01973568281938326, + "grad_norm": 0.9630324155849409, + "learning_rate": 3.873239436619718e-06, + "loss": 0.9029096364974976, + "step": 56 + }, + { + "epoch": 0.02008810572687225, + "grad_norm": 1.1449327271146603, + "learning_rate": 3.943661971830986e-06, + "loss": 1.1290819644927979, + "step": 57 + }, + { + "epoch": 0.020440528634361233, + "grad_norm": 1.1046082203063978, + "learning_rate": 4.014084507042254e-06, + "loss": 1.0965365171432495, + "step": 58 + }, + { + "epoch": 0.02079295154185022, + "grad_norm": 1.2553158733514387, + "learning_rate": 4.0845070422535216e-06, + "loss": 1.2854020595550537, + "step": 59 + }, + { + "epoch": 0.021145374449339206, + "grad_norm": 1.0484971235480365, + "learning_rate": 4.154929577464789e-06, + "loss": 1.0303996801376343, + "step": 60 + }, + { + "epoch": 0.021497797356828195, + "grad_norm": 0.9670460326314384, + "learning_rate": 4.225352112676057e-06, + "loss": 1.0811198949813843, + "step": 61 + }, + { + "epoch": 0.02185022026431718, + "grad_norm": 0.992548164971829, + "learning_rate": 4.295774647887324e-06, + "loss": 1.1373648643493652, + "step": 62 + }, + { + "epoch": 0.022202643171806168, + "grad_norm": 1.009220008285868, + "learning_rate": 4.3661971830985915e-06, + "loss": 0.8717563152313232, + "step": 63 + }, + { + "epoch": 0.022555066079295156, + "grad_norm": 0.9171432664885892, + "learning_rate": 4.43661971830986e-06, + "loss": 0.9939290881156921, + "step": 64 + }, + { + "epoch": 0.02290748898678414, + "grad_norm": 1.139248361968882, + "learning_rate": 4.507042253521127e-06, + "loss": 1.1776926517486572, + "step": 65 + }, + { + "epoch": 0.02325991189427313, + "grad_norm": 0.8971048282009709, + "learning_rate": 4.577464788732395e-06, + "loss": 0.9149726629257202, + "step": 66 + }, + { + "epoch": 0.023612334801762114, + "grad_norm": 0.9597323965843616, + "learning_rate": 4.647887323943662e-06, + "loss": 0.996609091758728, + "step": 67 + }, + { + "epoch": 0.023964757709251102, + "grad_norm": 1.0074979173506051, + "learning_rate": 4.71830985915493e-06, + "loss": 1.102593183517456, + "step": 68 + }, + { + "epoch": 0.024317180616740087, + "grad_norm": 0.8938780612317906, + "learning_rate": 4.788732394366197e-06, + "loss": 1.0912048816680908, + "step": 69 + }, + { + "epoch": 0.024669603524229075, + "grad_norm": 0.936561005612989, + "learning_rate": 4.859154929577465e-06, + "loss": 1.1192498207092285, + "step": 70 + }, + { + "epoch": 0.025022026431718063, + "grad_norm": 1.059387656590118, + "learning_rate": 4.929577464788733e-06, + "loss": 1.0358459949493408, + "step": 71 + }, + { + "epoch": 0.025374449339207048, + "grad_norm": 0.9588756664450253, + "learning_rate": 5e-06, + "loss": 1.076169490814209, + "step": 72 + }, + { + "epoch": 0.025726872246696036, + "grad_norm": 1.355361750045824, + "learning_rate": 5.070422535211268e-06, + "loss": 0.9906084537506104, + "step": 73 + }, + { + "epoch": 0.02607929515418502, + "grad_norm": 1.187443908189842, + "learning_rate": 5.140845070422536e-06, + "loss": 0.8163654804229736, + "step": 74 + }, + { + "epoch": 0.02643171806167401, + "grad_norm": 0.9764403954844053, + "learning_rate": 5.211267605633803e-06, + "loss": 1.140099048614502, + "step": 75 + }, + { + "epoch": 0.026784140969162994, + "grad_norm": 0.863156257130764, + "learning_rate": 5.28169014084507e-06, + "loss": 0.7654916048049927, + "step": 76 + }, + { + "epoch": 0.027136563876651983, + "grad_norm": 1.0935626721226286, + "learning_rate": 5.352112676056338e-06, + "loss": 0.9476499557495117, + "step": 77 + }, + { + "epoch": 0.02748898678414097, + "grad_norm": 1.0875377862843238, + "learning_rate": 5.422535211267607e-06, + "loss": 1.120811939239502, + "step": 78 + }, + { + "epoch": 0.027841409691629956, + "grad_norm": 0.9578356099138406, + "learning_rate": 5.492957746478874e-06, + "loss": 0.9745736122131348, + "step": 79 + }, + { + "epoch": 0.028193832599118944, + "grad_norm": 1.074452545035149, + "learning_rate": 5.563380281690142e-06, + "loss": 1.086181879043579, + "step": 80 + }, + { + "epoch": 0.02854625550660793, + "grad_norm": 0.9510910839018534, + "learning_rate": 5.633802816901409e-06, + "loss": 0.9904681444168091, + "step": 81 + }, + { + "epoch": 0.028898678414096917, + "grad_norm": 1.0958191892945044, + "learning_rate": 5.7042253521126766e-06, + "loss": 1.0311436653137207, + "step": 82 + }, + { + "epoch": 0.029251101321585902, + "grad_norm": 1.066849780091366, + "learning_rate": 5.774647887323944e-06, + "loss": 0.996998131275177, + "step": 83 + }, + { + "epoch": 0.02960352422907489, + "grad_norm": 1.1397250117300832, + "learning_rate": 5.845070422535212e-06, + "loss": 1.1526594161987305, + "step": 84 + }, + { + "epoch": 0.029955947136563875, + "grad_norm": 1.145115830862378, + "learning_rate": 5.915492957746479e-06, + "loss": 1.0914695262908936, + "step": 85 + }, + { + "epoch": 0.030308370044052863, + "grad_norm": 1.1551265503738541, + "learning_rate": 5.9859154929577465e-06, + "loss": 0.9558745622634888, + "step": 86 + }, + { + "epoch": 0.03066079295154185, + "grad_norm": 1.0734015706063305, + "learning_rate": 6.056338028169015e-06, + "loss": 0.9668983221054077, + "step": 87 + }, + { + "epoch": 0.031013215859030836, + "grad_norm": 1.2231010171085557, + "learning_rate": 6.126760563380282e-06, + "loss": 1.0132758617401123, + "step": 88 + }, + { + "epoch": 0.03136563876651982, + "grad_norm": 0.9911917466596063, + "learning_rate": 6.197183098591549e-06, + "loss": 1.0816935300827026, + "step": 89 + }, + { + "epoch": 0.03171806167400881, + "grad_norm": 1.1311803004782939, + "learning_rate": 6.267605633802818e-06, + "loss": 1.03245210647583, + "step": 90 + }, + { + "epoch": 0.0320704845814978, + "grad_norm": 1.0820347157611818, + "learning_rate": 6.3380281690140855e-06, + "loss": 0.9812602400779724, + "step": 91 + }, + { + "epoch": 0.03242290748898678, + "grad_norm": 1.0039329578342324, + "learning_rate": 6.408450704225353e-06, + "loss": 0.9303219318389893, + "step": 92 + }, + { + "epoch": 0.032775330396475774, + "grad_norm": 1.0308477549900932, + "learning_rate": 6.478873239436621e-06, + "loss": 1.175403356552124, + "step": 93 + }, + { + "epoch": 0.03312775330396476, + "grad_norm": 1.0822064194108554, + "learning_rate": 6.549295774647888e-06, + "loss": 1.1863958835601807, + "step": 94 + }, + { + "epoch": 0.033480176211453744, + "grad_norm": 1.0560683839166303, + "learning_rate": 6.619718309859155e-06, + "loss": 0.8630557060241699, + "step": 95 + }, + { + "epoch": 0.03383259911894273, + "grad_norm": 1.1683225259655636, + "learning_rate": 6.690140845070423e-06, + "loss": 1.0499619245529175, + "step": 96 + }, + { + "epoch": 0.03418502202643172, + "grad_norm": 0.9766019012274652, + "learning_rate": 6.760563380281691e-06, + "loss": 0.9443086981773376, + "step": 97 + }, + { + "epoch": 0.034537444933920705, + "grad_norm": 1.1779098792527396, + "learning_rate": 6.830985915492958e-06, + "loss": 1.0011450052261353, + "step": 98 + }, + { + "epoch": 0.03488986784140969, + "grad_norm": 1.0941166094999715, + "learning_rate": 6.901408450704225e-06, + "loss": 1.0239083766937256, + "step": 99 + }, + { + "epoch": 0.03524229074889868, + "grad_norm": 1.1605003575433563, + "learning_rate": 6.9718309859154935e-06, + "loss": 1.1335347890853882, + "step": 100 + }, + { + "epoch": 0.035594713656387666, + "grad_norm": 1.1410420869639502, + "learning_rate": 7.042253521126761e-06, + "loss": 0.9650854468345642, + "step": 101 + }, + { + "epoch": 0.03594713656387665, + "grad_norm": 1.0366491839089684, + "learning_rate": 7.112676056338029e-06, + "loss": 0.9284406900405884, + "step": 102 + }, + { + "epoch": 0.036299559471365636, + "grad_norm": 1.0368314441443032, + "learning_rate": 7.183098591549297e-06, + "loss": 0.989676296710968, + "step": 103 + }, + { + "epoch": 0.03665198237885463, + "grad_norm": 1.0475480945800932, + "learning_rate": 7.253521126760564e-06, + "loss": 0.9149842262268066, + "step": 104 + }, + { + "epoch": 0.03700440528634361, + "grad_norm": 1.0115568298427282, + "learning_rate": 7.3239436619718316e-06, + "loss": 0.9793657064437866, + "step": 105 + }, + { + "epoch": 0.0373568281938326, + "grad_norm": 1.0923401558071288, + "learning_rate": 7.3943661971831e-06, + "loss": 0.9508543014526367, + "step": 106 + }, + { + "epoch": 0.03770925110132159, + "grad_norm": 1.123373083563155, + "learning_rate": 7.464788732394367e-06, + "loss": 1.0623283386230469, + "step": 107 + }, + { + "epoch": 0.038061674008810574, + "grad_norm": 1.0472469474411819, + "learning_rate": 7.535211267605634e-06, + "loss": 1.0039314031600952, + "step": 108 + }, + { + "epoch": 0.03841409691629956, + "grad_norm": 1.1301029490219276, + "learning_rate": 7.6056338028169015e-06, + "loss": 0.9315502643585205, + "step": 109 + }, + { + "epoch": 0.038766519823788544, + "grad_norm": 0.9958020122553335, + "learning_rate": 7.67605633802817e-06, + "loss": 0.936677098274231, + "step": 110 + }, + { + "epoch": 0.039118942731277535, + "grad_norm": 1.0684105284421879, + "learning_rate": 7.746478873239436e-06, + "loss": 0.9373410940170288, + "step": 111 + }, + { + "epoch": 0.03947136563876652, + "grad_norm": 1.155598878121798, + "learning_rate": 7.816901408450704e-06, + "loss": 1.01617431640625, + "step": 112 + }, + { + "epoch": 0.039823788546255505, + "grad_norm": 1.1889006080727076, + "learning_rate": 7.887323943661972e-06, + "loss": 1.1713547706604004, + "step": 113 + }, + { + "epoch": 0.0401762114537445, + "grad_norm": 1.1256223667919436, + "learning_rate": 7.95774647887324e-06, + "loss": 0.8982350826263428, + "step": 114 + }, + { + "epoch": 0.04052863436123348, + "grad_norm": 1.0914199985412718, + "learning_rate": 8.028169014084509e-06, + "loss": 0.8123869895935059, + "step": 115 + }, + { + "epoch": 0.040881057268722466, + "grad_norm": 1.1505365914239516, + "learning_rate": 8.098591549295775e-06, + "loss": 1.0762536525726318, + "step": 116 + }, + { + "epoch": 0.04123348017621145, + "grad_norm": 1.0367170014557934, + "learning_rate": 8.169014084507043e-06, + "loss": 1.004841923713684, + "step": 117 + }, + { + "epoch": 0.04158590308370044, + "grad_norm": 1.0966724197265187, + "learning_rate": 8.239436619718311e-06, + "loss": 0.9237936735153198, + "step": 118 + }, + { + "epoch": 0.04193832599118943, + "grad_norm": 1.0785540239343763, + "learning_rate": 8.309859154929578e-06, + "loss": 0.9038913249969482, + "step": 119 + }, + { + "epoch": 0.04229074889867841, + "grad_norm": 1.1784096317090726, + "learning_rate": 8.380281690140846e-06, + "loss": 0.9488446712493896, + "step": 120 + }, + { + "epoch": 0.042643171806167404, + "grad_norm": 1.1559534491366574, + "learning_rate": 8.450704225352114e-06, + "loss": 1.0862706899642944, + "step": 121 + }, + { + "epoch": 0.04299559471365639, + "grad_norm": 1.5143452874154766, + "learning_rate": 8.52112676056338e-06, + "loss": 0.8882313966751099, + "step": 122 + }, + { + "epoch": 0.043348017621145374, + "grad_norm": 1.1412568707979918, + "learning_rate": 8.591549295774648e-06, + "loss": 0.9125900268554688, + "step": 123 + }, + { + "epoch": 0.04370044052863436, + "grad_norm": 1.403727281403332, + "learning_rate": 8.661971830985915e-06, + "loss": 0.944568395614624, + "step": 124 + }, + { + "epoch": 0.04405286343612335, + "grad_norm": 1.2993905510610635, + "learning_rate": 8.732394366197183e-06, + "loss": 0.9303089380264282, + "step": 125 + }, + { + "epoch": 0.044405286343612335, + "grad_norm": 1.1184314169128153, + "learning_rate": 8.802816901408451e-06, + "loss": 1.0983362197875977, + "step": 126 + }, + { + "epoch": 0.04475770925110132, + "grad_norm": 1.40811546312751, + "learning_rate": 8.87323943661972e-06, + "loss": 1.002477765083313, + "step": 127 + }, + { + "epoch": 0.04511013215859031, + "grad_norm": 1.1638063617076078, + "learning_rate": 8.943661971830987e-06, + "loss": 0.9994120001792908, + "step": 128 + }, + { + "epoch": 0.045462555066079297, + "grad_norm": 1.2118035451866538, + "learning_rate": 9.014084507042254e-06, + "loss": 1.0785832405090332, + "step": 129 + }, + { + "epoch": 0.04581497797356828, + "grad_norm": 1.0820277493757582, + "learning_rate": 9.084507042253522e-06, + "loss": 0.779441237449646, + "step": 130 + }, + { + "epoch": 0.046167400881057266, + "grad_norm": 1.1766256779195974, + "learning_rate": 9.15492957746479e-06, + "loss": 1.0052348375320435, + "step": 131 + }, + { + "epoch": 0.04651982378854626, + "grad_norm": 1.0771619013639089, + "learning_rate": 9.225352112676057e-06, + "loss": 1.0327996015548706, + "step": 132 + }, + { + "epoch": 0.04687224669603524, + "grad_norm": 1.501276619683034, + "learning_rate": 9.295774647887325e-06, + "loss": 1.0643246173858643, + "step": 133 + }, + { + "epoch": 0.04722466960352423, + "grad_norm": 1.1427145785080848, + "learning_rate": 9.366197183098593e-06, + "loss": 0.8449216485023499, + "step": 134 + }, + { + "epoch": 0.04757709251101322, + "grad_norm": 1.2684019730338143, + "learning_rate": 9.43661971830986e-06, + "loss": 0.8867055177688599, + "step": 135 + }, + { + "epoch": 0.047929515418502204, + "grad_norm": 1.4156875615017863, + "learning_rate": 9.507042253521127e-06, + "loss": 1.048499584197998, + "step": 136 + }, + { + "epoch": 0.04828193832599119, + "grad_norm": 1.2120768691141688, + "learning_rate": 9.577464788732394e-06, + "loss": 1.0548617839813232, + "step": 137 + }, + { + "epoch": 0.048634361233480174, + "grad_norm": 1.0679337780928526, + "learning_rate": 9.647887323943664e-06, + "loss": 0.8882845044136047, + "step": 138 + }, + { + "epoch": 0.048986784140969165, + "grad_norm": 1.622342973826323, + "learning_rate": 9.71830985915493e-06, + "loss": 1.032647967338562, + "step": 139 + }, + { + "epoch": 0.04933920704845815, + "grad_norm": 1.058782348686911, + "learning_rate": 9.788732394366198e-06, + "loss": 1.039523959159851, + "step": 140 + }, + { + "epoch": 0.049691629955947135, + "grad_norm": 1.1234982994751406, + "learning_rate": 9.859154929577466e-06, + "loss": 0.8451036214828491, + "step": 141 + }, + { + "epoch": 0.05004405286343613, + "grad_norm": 1.1376284406077708, + "learning_rate": 9.929577464788733e-06, + "loss": 0.8285897970199585, + "step": 142 + }, + { + "epoch": 0.05039647577092511, + "grad_norm": 1.2057497016168632, + "learning_rate": 1e-05, + "loss": 0.9998278021812439, + "step": 143 + }, + { + "epoch": 0.050748898678414096, + "grad_norm": 1.4528082359287422, + "learning_rate": 1.0070422535211269e-05, + "loss": 0.9782301187515259, + "step": 144 + }, + { + "epoch": 0.05110132158590308, + "grad_norm": 1.1663459671948497, + "learning_rate": 1.0140845070422535e-05, + "loss": 1.0557070970535278, + "step": 145 + }, + { + "epoch": 0.05145374449339207, + "grad_norm": 1.159146071512081, + "learning_rate": 1.0211267605633803e-05, + "loss": 0.9516133069992065, + "step": 146 + }, + { + "epoch": 0.05180616740088106, + "grad_norm": 1.1694686158556986, + "learning_rate": 1.0281690140845072e-05, + "loss": 0.8965041637420654, + "step": 147 + }, + { + "epoch": 0.05215859030837004, + "grad_norm": 1.2713520268346183, + "learning_rate": 1.0352112676056338e-05, + "loss": 0.8627057075500488, + "step": 148 + }, + { + "epoch": 0.052511013215859034, + "grad_norm": 1.0456048049111641, + "learning_rate": 1.0422535211267606e-05, + "loss": 0.7627567648887634, + "step": 149 + }, + { + "epoch": 0.05286343612334802, + "grad_norm": 1.2332422487154633, + "learning_rate": 1.0492957746478873e-05, + "loss": 0.8522504568099976, + "step": 150 + }, + { + "epoch": 0.053215859030837004, + "grad_norm": 1.1106957565365498, + "learning_rate": 1.056338028169014e-05, + "loss": 0.7164312601089478, + "step": 151 + }, + { + "epoch": 0.05356828193832599, + "grad_norm": 1.0487512934158103, + "learning_rate": 1.0633802816901409e-05, + "loss": 0.9141941070556641, + "step": 152 + }, + { + "epoch": 0.05392070484581498, + "grad_norm": 1.5228596875919753, + "learning_rate": 1.0704225352112675e-05, + "loss": 0.9145504832267761, + "step": 153 + }, + { + "epoch": 0.054273127753303965, + "grad_norm": 1.19745569358961, + "learning_rate": 1.0774647887323943e-05, + "loss": 0.9851646423339844, + "step": 154 + }, + { + "epoch": 0.05462555066079295, + "grad_norm": 1.1547769204431162, + "learning_rate": 1.0845070422535213e-05, + "loss": 0.9319474697113037, + "step": 155 + }, + { + "epoch": 0.05497797356828194, + "grad_norm": 1.242055483054837, + "learning_rate": 1.0915492957746481e-05, + "loss": 0.995783269405365, + "step": 156 + }, + { + "epoch": 0.05533039647577093, + "grad_norm": 1.270129466753014, + "learning_rate": 1.0985915492957748e-05, + "loss": 0.8636226654052734, + "step": 157 + }, + { + "epoch": 0.05568281938325991, + "grad_norm": 1.26388911778751, + "learning_rate": 1.1056338028169016e-05, + "loss": 0.8860869407653809, + "step": 158 + }, + { + "epoch": 0.056035242290748896, + "grad_norm": 1.24911279543244, + "learning_rate": 1.1126760563380284e-05, + "loss": 0.9256196618080139, + "step": 159 + }, + { + "epoch": 0.05638766519823789, + "grad_norm": 1.1684473229538663, + "learning_rate": 1.119718309859155e-05, + "loss": 0.8217915296554565, + "step": 160 + }, + { + "epoch": 0.05674008810572687, + "grad_norm": 1.2938227991615623, + "learning_rate": 1.1267605633802819e-05, + "loss": 0.9808465838432312, + "step": 161 + }, + { + "epoch": 0.05709251101321586, + "grad_norm": 1.2234654171305366, + "learning_rate": 1.1338028169014087e-05, + "loss": 0.7733014822006226, + "step": 162 + }, + { + "epoch": 0.05744493392070485, + "grad_norm": 1.1428802626649461, + "learning_rate": 1.1408450704225353e-05, + "loss": 0.8581304550170898, + "step": 163 + }, + { + "epoch": 0.057797356828193834, + "grad_norm": 1.3252890457476052, + "learning_rate": 1.1478873239436621e-05, + "loss": 0.9242054224014282, + "step": 164 + }, + { + "epoch": 0.05814977973568282, + "grad_norm": 1.3695567443378234, + "learning_rate": 1.1549295774647888e-05, + "loss": 1.0302021503448486, + "step": 165 + }, + { + "epoch": 0.058502202643171804, + "grad_norm": 1.2950143159958714, + "learning_rate": 1.1619718309859156e-05, + "loss": 0.8954275846481323, + "step": 166 + }, + { + "epoch": 0.058854625550660795, + "grad_norm": 1.1779404187828553, + "learning_rate": 1.1690140845070424e-05, + "loss": 0.891846776008606, + "step": 167 + }, + { + "epoch": 0.05920704845814978, + "grad_norm": 1.1837706775348158, + "learning_rate": 1.176056338028169e-05, + "loss": 0.887005627155304, + "step": 168 + }, + { + "epoch": 0.059559471365638765, + "grad_norm": 1.289448297537656, + "learning_rate": 1.1830985915492958e-05, + "loss": 0.9020301103591919, + "step": 169 + }, + { + "epoch": 0.05991189427312775, + "grad_norm": 1.2185831955131692, + "learning_rate": 1.1901408450704227e-05, + "loss": 0.7925454378128052, + "step": 170 + }, + { + "epoch": 0.06026431718061674, + "grad_norm": 1.31750363404193, + "learning_rate": 1.1971830985915493e-05, + "loss": 0.8058332800865173, + "step": 171 + }, + { + "epoch": 0.060616740088105726, + "grad_norm": 1.2435062872951204, + "learning_rate": 1.2042253521126761e-05, + "loss": 0.892992377281189, + "step": 172 + }, + { + "epoch": 0.06096916299559471, + "grad_norm": 1.0835922361658872, + "learning_rate": 1.211267605633803e-05, + "loss": 0.8482734560966492, + "step": 173 + }, + { + "epoch": 0.0613215859030837, + "grad_norm": 1.2806384537102478, + "learning_rate": 1.2183098591549296e-05, + "loss": 0.8652878999710083, + "step": 174 + }, + { + "epoch": 0.06167400881057269, + "grad_norm": 1.183930720799068, + "learning_rate": 1.2253521126760564e-05, + "loss": 0.8590051531791687, + "step": 175 + }, + { + "epoch": 0.06202643171806167, + "grad_norm": 1.1264180921527844, + "learning_rate": 1.232394366197183e-05, + "loss": 0.7106916904449463, + "step": 176 + }, + { + "epoch": 0.06237885462555066, + "grad_norm": 1.5304901042334342, + "learning_rate": 1.2394366197183098e-05, + "loss": 0.9298936128616333, + "step": 177 + }, + { + "epoch": 0.06273127753303964, + "grad_norm": 1.3380597134261425, + "learning_rate": 1.2464788732394367e-05, + "loss": 1.027758240699768, + "step": 178 + }, + { + "epoch": 0.06308370044052863, + "grad_norm": 1.4071851827143296, + "learning_rate": 1.2535211267605636e-05, + "loss": 0.9576354026794434, + "step": 179 + }, + { + "epoch": 0.06343612334801763, + "grad_norm": 1.476054189108656, + "learning_rate": 1.2605633802816903e-05, + "loss": 0.6881245374679565, + "step": 180 + }, + { + "epoch": 0.0637885462555066, + "grad_norm": 1.736658600923819, + "learning_rate": 1.2676056338028171e-05, + "loss": 0.9629781246185303, + "step": 181 + }, + { + "epoch": 0.0641409691629956, + "grad_norm": 1.3990061114909895, + "learning_rate": 1.2746478873239439e-05, + "loss": 0.849892258644104, + "step": 182 + }, + { + "epoch": 0.06449339207048459, + "grad_norm": 1.2776276047787312, + "learning_rate": 1.2816901408450705e-05, + "loss": 0.9294229745864868, + "step": 183 + }, + { + "epoch": 0.06484581497797356, + "grad_norm": 1.346185395248099, + "learning_rate": 1.2887323943661974e-05, + "loss": 0.9534600973129272, + "step": 184 + }, + { + "epoch": 0.06519823788546256, + "grad_norm": 1.2547825941083024, + "learning_rate": 1.2957746478873242e-05, + "loss": 0.7937755584716797, + "step": 185 + }, + { + "epoch": 0.06555066079295155, + "grad_norm": 1.215372024356157, + "learning_rate": 1.3028169014084508e-05, + "loss": 0.9188590049743652, + "step": 186 + }, + { + "epoch": 0.06590308370044053, + "grad_norm": 1.3372931395210206, + "learning_rate": 1.3098591549295776e-05, + "loss": 0.8775123357772827, + "step": 187 + }, + { + "epoch": 0.06625550660792952, + "grad_norm": 1.2703292803517752, + "learning_rate": 1.3169014084507044e-05, + "loss": 0.8562190532684326, + "step": 188 + }, + { + "epoch": 0.0666079295154185, + "grad_norm": 1.1593142823065046, + "learning_rate": 1.323943661971831e-05, + "loss": 0.9427295327186584, + "step": 189 + }, + { + "epoch": 0.06696035242290749, + "grad_norm": 1.1080518257913534, + "learning_rate": 1.3309859154929579e-05, + "loss": 0.6142286062240601, + "step": 190 + }, + { + "epoch": 0.06731277533039648, + "grad_norm": 1.416041365414943, + "learning_rate": 1.3380281690140845e-05, + "loss": 0.7480863332748413, + "step": 191 + }, + { + "epoch": 0.06766519823788546, + "grad_norm": 1.6287312517465182, + "learning_rate": 1.3450704225352114e-05, + "loss": 0.898857593536377, + "step": 192 + }, + { + "epoch": 0.06801762114537445, + "grad_norm": 1.4737642135415263, + "learning_rate": 1.3521126760563382e-05, + "loss": 0.8584127426147461, + "step": 193 + }, + { + "epoch": 0.06837004405286344, + "grad_norm": 1.2178631494207084, + "learning_rate": 1.3591549295774648e-05, + "loss": 0.9400655031204224, + "step": 194 + }, + { + "epoch": 0.06872246696035242, + "grad_norm": 1.2698602238237462, + "learning_rate": 1.3661971830985916e-05, + "loss": 0.7750787734985352, + "step": 195 + }, + { + "epoch": 0.06907488986784141, + "grad_norm": 1.2474557266398312, + "learning_rate": 1.3732394366197184e-05, + "loss": 0.8530284762382507, + "step": 196 + }, + { + "epoch": 0.0694273127753304, + "grad_norm": 1.3191630227557989, + "learning_rate": 1.380281690140845e-05, + "loss": 0.9019994735717773, + "step": 197 + }, + { + "epoch": 0.06977973568281938, + "grad_norm": 1.1994310415476668, + "learning_rate": 1.3873239436619719e-05, + "loss": 0.7749642133712769, + "step": 198 + }, + { + "epoch": 0.07013215859030837, + "grad_norm": 1.3060142025317714, + "learning_rate": 1.3943661971830987e-05, + "loss": 0.956200122833252, + "step": 199 + }, + { + "epoch": 0.07048458149779736, + "grad_norm": 1.3510407726181874, + "learning_rate": 1.4014084507042253e-05, + "loss": 0.8544470071792603, + "step": 200 + }, + { + "epoch": 0.07083700440528634, + "grad_norm": 1.447521091304659, + "learning_rate": 1.4084507042253522e-05, + "loss": 0.8776387572288513, + "step": 201 + }, + { + "epoch": 0.07118942731277533, + "grad_norm": 1.5340123254246993, + "learning_rate": 1.4154929577464788e-05, + "loss": 0.9949591755867004, + "step": 202 + }, + { + "epoch": 0.07154185022026431, + "grad_norm": 1.306920931788941, + "learning_rate": 1.4225352112676058e-05, + "loss": 0.9616764783859253, + "step": 203 + }, + { + "epoch": 0.0718942731277533, + "grad_norm": 1.3490978686730206, + "learning_rate": 1.4295774647887326e-05, + "loss": 0.9247175455093384, + "step": 204 + }, + { + "epoch": 0.0722466960352423, + "grad_norm": 1.4241509312853966, + "learning_rate": 1.4366197183098594e-05, + "loss": 0.7946479320526123, + "step": 205 + }, + { + "epoch": 0.07259911894273127, + "grad_norm": 1.3949991357763207, + "learning_rate": 1.443661971830986e-05, + "loss": 0.7929860353469849, + "step": 206 + }, + { + "epoch": 0.07295154185022026, + "grad_norm": 1.3725430537583514, + "learning_rate": 1.4507042253521129e-05, + "loss": 0.9215391874313354, + "step": 207 + }, + { + "epoch": 0.07330396475770926, + "grad_norm": 1.4247014676365253, + "learning_rate": 1.4577464788732397e-05, + "loss": 0.8767607808113098, + "step": 208 + }, + { + "epoch": 0.07365638766519823, + "grad_norm": 1.3691339839746066, + "learning_rate": 1.4647887323943663e-05, + "loss": 0.8586276769638062, + "step": 209 + }, + { + "epoch": 0.07400881057268723, + "grad_norm": 1.3252388254138234, + "learning_rate": 1.4718309859154931e-05, + "loss": 0.8680851459503174, + "step": 210 + }, + { + "epoch": 0.07436123348017622, + "grad_norm": 1.2834178375463614, + "learning_rate": 1.47887323943662e-05, + "loss": 0.8887720108032227, + "step": 211 + }, + { + "epoch": 0.0747136563876652, + "grad_norm": 1.4918681608584679, + "learning_rate": 1.4859154929577466e-05, + "loss": 0.8887100219726562, + "step": 212 + }, + { + "epoch": 0.07506607929515419, + "grad_norm": 1.247870788657092, + "learning_rate": 1.4929577464788734e-05, + "loss": 0.9257807731628418, + "step": 213 + }, + { + "epoch": 0.07541850220264318, + "grad_norm": 1.2922967878533598, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.8107355833053589, + "step": 214 + }, + { + "epoch": 0.07577092511013216, + "grad_norm": 1.390091314994072, + "learning_rate": 1.5070422535211269e-05, + "loss": 0.8765913844108582, + "step": 215 + }, + { + "epoch": 0.07612334801762115, + "grad_norm": 1.3936279931065536, + "learning_rate": 1.5140845070422537e-05, + "loss": 0.8973524570465088, + "step": 216 + }, + { + "epoch": 0.07647577092511013, + "grad_norm": 1.310665112588589, + "learning_rate": 1.5211267605633803e-05, + "loss": 0.9194613695144653, + "step": 217 + }, + { + "epoch": 0.07682819383259912, + "grad_norm": 1.4152279415932816, + "learning_rate": 1.528169014084507e-05, + "loss": 0.8832643032073975, + "step": 218 + }, + { + "epoch": 0.07718061674008811, + "grad_norm": 1.465705079678902, + "learning_rate": 1.535211267605634e-05, + "loss": 0.9575356245040894, + "step": 219 + }, + { + "epoch": 0.07753303964757709, + "grad_norm": 1.2268114727867823, + "learning_rate": 1.5422535211267607e-05, + "loss": 0.8302342891693115, + "step": 220 + }, + { + "epoch": 0.07788546255506608, + "grad_norm": 1.2978917843344704, + "learning_rate": 1.5492957746478872e-05, + "loss": 0.7999966144561768, + "step": 221 + }, + { + "epoch": 0.07823788546255507, + "grad_norm": 1.271952593735668, + "learning_rate": 1.556338028169014e-05, + "loss": 0.8201859593391418, + "step": 222 + }, + { + "epoch": 0.07859030837004405, + "grad_norm": 1.635464665304201, + "learning_rate": 1.563380281690141e-05, + "loss": 0.872761607170105, + "step": 223 + }, + { + "epoch": 0.07894273127753304, + "grad_norm": 1.7544850567681591, + "learning_rate": 1.5704225352112677e-05, + "loss": 0.8695409297943115, + "step": 224 + }, + { + "epoch": 0.07929515418502203, + "grad_norm": 1.2478131333285527, + "learning_rate": 1.5774647887323945e-05, + "loss": 0.8532050848007202, + "step": 225 + }, + { + "epoch": 0.07964757709251101, + "grad_norm": 1.5276196879895285, + "learning_rate": 1.5845070422535213e-05, + "loss": 0.7875121235847473, + "step": 226 + }, + { + "epoch": 0.08, + "grad_norm": 1.5837485275916963, + "learning_rate": 1.591549295774648e-05, + "loss": 0.7131509780883789, + "step": 227 + }, + { + "epoch": 0.080352422907489, + "grad_norm": 1.4681482709870555, + "learning_rate": 1.598591549295775e-05, + "loss": 0.9758431911468506, + "step": 228 + }, + { + "epoch": 0.08070484581497797, + "grad_norm": 1.4451165548552447, + "learning_rate": 1.6056338028169017e-05, + "loss": 0.7894232273101807, + "step": 229 + }, + { + "epoch": 0.08105726872246696, + "grad_norm": 1.2417235745587356, + "learning_rate": 1.6126760563380285e-05, + "loss": 0.9933483600616455, + "step": 230 + }, + { + "epoch": 0.08140969162995594, + "grad_norm": 1.4745298800972837, + "learning_rate": 1.619718309859155e-05, + "loss": 0.8424056768417358, + "step": 231 + }, + { + "epoch": 0.08176211453744493, + "grad_norm": 1.4626597398090972, + "learning_rate": 1.6267605633802818e-05, + "loss": 0.7957695126533508, + "step": 232 + }, + { + "epoch": 0.08211453744493392, + "grad_norm": 1.243843455131114, + "learning_rate": 1.6338028169014086e-05, + "loss": 0.8491722345352173, + "step": 233 + }, + { + "epoch": 0.0824669603524229, + "grad_norm": 1.407640698868158, + "learning_rate": 1.6408450704225354e-05, + "loss": 0.7010964751243591, + "step": 234 + }, + { + "epoch": 0.0828193832599119, + "grad_norm": 1.4584433632361322, + "learning_rate": 1.6478873239436623e-05, + "loss": 0.8713864088058472, + "step": 235 + }, + { + "epoch": 0.08317180616740089, + "grad_norm": 1.261328425360657, + "learning_rate": 1.6549295774647887e-05, + "loss": 0.6724761128425598, + "step": 236 + }, + { + "epoch": 0.08352422907488986, + "grad_norm": 1.219837126653021, + "learning_rate": 1.6619718309859155e-05, + "loss": 0.8612109422683716, + "step": 237 + }, + { + "epoch": 0.08387665198237886, + "grad_norm": 1.4745868727167897, + "learning_rate": 1.6690140845070424e-05, + "loss": 0.5697110891342163, + "step": 238 + }, + { + "epoch": 0.08422907488986785, + "grad_norm": 1.2506294676144012, + "learning_rate": 1.676056338028169e-05, + "loss": 0.7877228260040283, + "step": 239 + }, + { + "epoch": 0.08458149779735682, + "grad_norm": 1.1492235860181979, + "learning_rate": 1.683098591549296e-05, + "loss": 0.8751014471054077, + "step": 240 + }, + { + "epoch": 0.08493392070484582, + "grad_norm": 1.527957574033417, + "learning_rate": 1.6901408450704228e-05, + "loss": 0.8731381893157959, + "step": 241 + }, + { + "epoch": 0.08528634361233481, + "grad_norm": 1.291362512763109, + "learning_rate": 1.6971830985915493e-05, + "loss": 0.831383228302002, + "step": 242 + }, + { + "epoch": 0.08563876651982379, + "grad_norm": 1.2699070733171296, + "learning_rate": 1.704225352112676e-05, + "loss": 0.792934238910675, + "step": 243 + }, + { + "epoch": 0.08599118942731278, + "grad_norm": 1.1592748972292606, + "learning_rate": 1.711267605633803e-05, + "loss": 0.6723657846450806, + "step": 244 + }, + { + "epoch": 0.08634361233480176, + "grad_norm": 1.4796981905185658, + "learning_rate": 1.7183098591549297e-05, + "loss": 0.8377546072006226, + "step": 245 + }, + { + "epoch": 0.08669603524229075, + "grad_norm": 1.2727987522874769, + "learning_rate": 1.7253521126760565e-05, + "loss": 0.8073972463607788, + "step": 246 + }, + { + "epoch": 0.08704845814977974, + "grad_norm": 1.6240304260373406, + "learning_rate": 1.732394366197183e-05, + "loss": 0.8913615942001343, + "step": 247 + }, + { + "epoch": 0.08740088105726872, + "grad_norm": 1.4436852067854697, + "learning_rate": 1.7394366197183098e-05, + "loss": 0.9133341312408447, + "step": 248 + }, + { + "epoch": 0.08775330396475771, + "grad_norm": 1.6098073633875791, + "learning_rate": 1.7464788732394366e-05, + "loss": 0.7593938112258911, + "step": 249 + }, + { + "epoch": 0.0881057268722467, + "grad_norm": 1.456505700957212, + "learning_rate": 1.7535211267605638e-05, + "loss": 0.8049266934394836, + "step": 250 + }, + { + "epoch": 0.08845814977973568, + "grad_norm": 1.44397678174898, + "learning_rate": 1.7605633802816902e-05, + "loss": 0.9065679311752319, + "step": 251 + }, + { + "epoch": 0.08881057268722467, + "grad_norm": 1.5285644429403964, + "learning_rate": 1.767605633802817e-05, + "loss": 0.9309085011482239, + "step": 252 + }, + { + "epoch": 0.08916299559471366, + "grad_norm": 1.3367293223358285, + "learning_rate": 1.774647887323944e-05, + "loss": 0.7846949100494385, + "step": 253 + }, + { + "epoch": 0.08951541850220264, + "grad_norm": 1.4721492627949804, + "learning_rate": 1.7816901408450707e-05, + "loss": 0.9153063297271729, + "step": 254 + }, + { + "epoch": 0.08986784140969163, + "grad_norm": 1.2843813691966974, + "learning_rate": 1.7887323943661975e-05, + "loss": 0.7743638157844543, + "step": 255 + }, + { + "epoch": 0.09022026431718062, + "grad_norm": 1.6034162783223496, + "learning_rate": 1.7957746478873243e-05, + "loss": 0.887751579284668, + "step": 256 + }, + { + "epoch": 0.0905726872246696, + "grad_norm": 1.2387435479452011, + "learning_rate": 1.8028169014084508e-05, + "loss": 0.8072899580001831, + "step": 257 + }, + { + "epoch": 0.09092511013215859, + "grad_norm": 1.3642448388425203, + "learning_rate": 1.8098591549295776e-05, + "loss": 0.8275943994522095, + "step": 258 + }, + { + "epoch": 0.09127753303964757, + "grad_norm": 1.3287842865535133, + "learning_rate": 1.8169014084507044e-05, + "loss": 0.8300620913505554, + "step": 259 + }, + { + "epoch": 0.09162995594713656, + "grad_norm": 1.26616505669333, + "learning_rate": 1.8239436619718312e-05, + "loss": 0.6886857748031616, + "step": 260 + }, + { + "epoch": 0.09198237885462555, + "grad_norm": 2.689833624979495, + "learning_rate": 1.830985915492958e-05, + "loss": 0.8190158605575562, + "step": 261 + }, + { + "epoch": 0.09233480176211453, + "grad_norm": 1.3392491700180422, + "learning_rate": 1.8380281690140845e-05, + "loss": 0.8500730991363525, + "step": 262 + }, + { + "epoch": 0.09268722466960352, + "grad_norm": 1.499663410513064, + "learning_rate": 1.8450704225352113e-05, + "loss": 0.8340811729431152, + "step": 263 + }, + { + "epoch": 0.09303964757709252, + "grad_norm": 1.3031308803407857, + "learning_rate": 1.852112676056338e-05, + "loss": 0.8055675029754639, + "step": 264 + }, + { + "epoch": 0.0933920704845815, + "grad_norm": 1.410218243221954, + "learning_rate": 1.859154929577465e-05, + "loss": 0.7956680059432983, + "step": 265 + }, + { + "epoch": 0.09374449339207049, + "grad_norm": 1.4181751660111779, + "learning_rate": 1.8661971830985917e-05, + "loss": 0.8232501745223999, + "step": 266 + }, + { + "epoch": 0.09409691629955948, + "grad_norm": 1.472224530959967, + "learning_rate": 1.8732394366197186e-05, + "loss": 0.8808565139770508, + "step": 267 + }, + { + "epoch": 0.09444933920704845, + "grad_norm": 1.5113548411958122, + "learning_rate": 1.880281690140845e-05, + "loss": 0.885380744934082, + "step": 268 + }, + { + "epoch": 0.09480176211453745, + "grad_norm": 1.5009611452094687, + "learning_rate": 1.887323943661972e-05, + "loss": 0.8408790826797485, + "step": 269 + }, + { + "epoch": 0.09515418502202644, + "grad_norm": 1.395810517840328, + "learning_rate": 1.8943661971830987e-05, + "loss": 0.7089993953704834, + "step": 270 + }, + { + "epoch": 0.09550660792951542, + "grad_norm": 1.280231938177333, + "learning_rate": 1.9014084507042255e-05, + "loss": 0.7941038608551025, + "step": 271 + }, + { + "epoch": 0.09585903083700441, + "grad_norm": 1.5210768015450882, + "learning_rate": 1.9084507042253523e-05, + "loss": 0.8269138932228088, + "step": 272 + }, + { + "epoch": 0.09621145374449339, + "grad_norm": 1.5053903060638305, + "learning_rate": 1.9154929577464788e-05, + "loss": 0.8206192255020142, + "step": 273 + }, + { + "epoch": 0.09656387665198238, + "grad_norm": 1.49737615599854, + "learning_rate": 1.922535211267606e-05, + "loss": 0.9146496653556824, + "step": 274 + }, + { + "epoch": 0.09691629955947137, + "grad_norm": 1.1755726979972605, + "learning_rate": 1.9295774647887327e-05, + "loss": 0.6738560199737549, + "step": 275 + }, + { + "epoch": 0.09726872246696035, + "grad_norm": 1.3169911381980228, + "learning_rate": 1.9366197183098595e-05, + "loss": 0.934916615486145, + "step": 276 + }, + { + "epoch": 0.09762114537444934, + "grad_norm": 1.357245739203775, + "learning_rate": 1.943661971830986e-05, + "loss": 0.8952134847640991, + "step": 277 + }, + { + "epoch": 0.09797356828193833, + "grad_norm": 1.3423178147772294, + "learning_rate": 1.9507042253521128e-05, + "loss": 0.9346420764923096, + "step": 278 + }, + { + "epoch": 0.09832599118942731, + "grad_norm": 1.5698833191970427, + "learning_rate": 1.9577464788732396e-05, + "loss": 0.8781993985176086, + "step": 279 + }, + { + "epoch": 0.0986784140969163, + "grad_norm": 1.4703395142125208, + "learning_rate": 1.9647887323943664e-05, + "loss": 0.8283448219299316, + "step": 280 + }, + { + "epoch": 0.09903083700440529, + "grad_norm": 1.2650765439550704, + "learning_rate": 1.9718309859154933e-05, + "loss": 0.8010722398757935, + "step": 281 + }, + { + "epoch": 0.09938325991189427, + "grad_norm": 1.3576050403922397, + "learning_rate": 1.97887323943662e-05, + "loss": 0.8697119951248169, + "step": 282 + }, + { + "epoch": 0.09973568281938326, + "grad_norm": 1.098837792765385, + "learning_rate": 1.9859154929577465e-05, + "loss": 0.6448882818222046, + "step": 283 + }, + { + "epoch": 0.10008810572687225, + "grad_norm": 1.5101908618325302, + "learning_rate": 1.9929577464788734e-05, + "loss": 0.7782007455825806, + "step": 284 + }, + { + "epoch": 0.10044052863436123, + "grad_norm": 1.455658231417001, + "learning_rate": 2e-05, + "loss": 0.8131508827209473, + "step": 285 + }, + { + "epoch": 0.10079295154185022, + "grad_norm": 1.4413777660177336, + "learning_rate": 1.999999830265561e-05, + "loss": 0.8592134714126587, + "step": 286 + }, + { + "epoch": 0.1011453744493392, + "grad_norm": 1.5671417589518397, + "learning_rate": 1.9999993210623002e-05, + "loss": 0.9374675750732422, + "step": 287 + }, + { + "epoch": 0.10149779735682819, + "grad_norm": 1.5499152824954487, + "learning_rate": 1.9999984723903913e-05, + "loss": 0.8416328430175781, + "step": 288 + }, + { + "epoch": 0.10185022026431718, + "grad_norm": 1.267360297703748, + "learning_rate": 1.9999972842501218e-05, + "loss": 0.7587184906005859, + "step": 289 + }, + { + "epoch": 0.10220264317180616, + "grad_norm": 1.4783535336356979, + "learning_rate": 1.9999957566418956e-05, + "loss": 1.010494351387024, + "step": 290 + }, + { + "epoch": 0.10255506607929515, + "grad_norm": 1.3092025632301814, + "learning_rate": 1.999993889566231e-05, + "loss": 0.7942835092544556, + "step": 291 + }, + { + "epoch": 0.10290748898678415, + "grad_norm": 1.4620379458028798, + "learning_rate": 1.999991683023762e-05, + "loss": 0.9069477915763855, + "step": 292 + }, + { + "epoch": 0.10325991189427312, + "grad_norm": 1.781963673155629, + "learning_rate": 1.9999891370152375e-05, + "loss": 0.8776397705078125, + "step": 293 + }, + { + "epoch": 0.10361233480176212, + "grad_norm": 1.3409879305652028, + "learning_rate": 1.9999862515415216e-05, + "loss": 0.8560416102409363, + "step": 294 + }, + { + "epoch": 0.10396475770925111, + "grad_norm": 1.601676543787724, + "learning_rate": 1.9999830266035942e-05, + "loss": 0.9177321195602417, + "step": 295 + }, + { + "epoch": 0.10431718061674009, + "grad_norm": 1.621521883940329, + "learning_rate": 1.99997946220255e-05, + "loss": 0.8830884695053101, + "step": 296 + }, + { + "epoch": 0.10466960352422908, + "grad_norm": 1.5076951372471592, + "learning_rate": 1.9999755583395987e-05, + "loss": 0.913659930229187, + "step": 297 + }, + { + "epoch": 0.10502202643171807, + "grad_norm": 1.48724181087663, + "learning_rate": 1.999971315016066e-05, + "loss": 0.773309588432312, + "step": 298 + }, + { + "epoch": 0.10537444933920705, + "grad_norm": 1.4640758198016095, + "learning_rate": 1.9999667322333916e-05, + "loss": 0.8432563543319702, + "step": 299 + }, + { + "epoch": 0.10572687224669604, + "grad_norm": 1.5419897004531282, + "learning_rate": 1.999961809993132e-05, + "loss": 0.9632397890090942, + "step": 300 + }, + { + "epoch": 0.10607929515418502, + "grad_norm": 1.4657018761848883, + "learning_rate": 1.999956548296958e-05, + "loss": 0.8205600380897522, + "step": 301 + }, + { + "epoch": 0.10643171806167401, + "grad_norm": 1.2908123355748096, + "learning_rate": 1.9999509471466557e-05, + "loss": 0.8789785504341125, + "step": 302 + }, + { + "epoch": 0.106784140969163, + "grad_norm": 1.4062841050093677, + "learning_rate": 1.999945006544126e-05, + "loss": 0.8445791006088257, + "step": 303 + }, + { + "epoch": 0.10713656387665198, + "grad_norm": 1.3201850616961108, + "learning_rate": 1.9999387264913865e-05, + "loss": 0.8025245666503906, + "step": 304 + }, + { + "epoch": 0.10748898678414097, + "grad_norm": 1.3596018005437036, + "learning_rate": 1.9999321069905688e-05, + "loss": 0.9271318912506104, + "step": 305 + }, + { + "epoch": 0.10784140969162996, + "grad_norm": 1.167387591378785, + "learning_rate": 1.999925148043919e-05, + "loss": 0.809894859790802, + "step": 306 + }, + { + "epoch": 0.10819383259911894, + "grad_norm": 1.4267923203712158, + "learning_rate": 1.999917849653801e-05, + "loss": 0.8940669298171997, + "step": 307 + }, + { + "epoch": 0.10854625550660793, + "grad_norm": 1.466148592973388, + "learning_rate": 1.9999102118226912e-05, + "loss": 0.9301233887672424, + "step": 308 + }, + { + "epoch": 0.10889867841409692, + "grad_norm": 1.271175959298383, + "learning_rate": 1.9999022345531834e-05, + "loss": 0.6429216861724854, + "step": 309 + }, + { + "epoch": 0.1092511013215859, + "grad_norm": 1.3392816449794738, + "learning_rate": 1.999893917847985e-05, + "loss": 0.7199009656906128, + "step": 310 + }, + { + "epoch": 0.10960352422907489, + "grad_norm": 1.2732787140894477, + "learning_rate": 1.999885261709919e-05, + "loss": 0.8312395811080933, + "step": 311 + }, + { + "epoch": 0.10995594713656388, + "grad_norm": 1.4809957988420102, + "learning_rate": 1.999876266141924e-05, + "loss": 0.8187745213508606, + "step": 312 + }, + { + "epoch": 0.11030837004405286, + "grad_norm": 1.2638906346778362, + "learning_rate": 1.9998669311470546e-05, + "loss": 0.8632344603538513, + "step": 313 + }, + { + "epoch": 0.11066079295154185, + "grad_norm": 1.5651718256034985, + "learning_rate": 1.9998572567284787e-05, + "loss": 0.8789447546005249, + "step": 314 + }, + { + "epoch": 0.11101321585903083, + "grad_norm": 1.4657438576086577, + "learning_rate": 1.999847242889481e-05, + "loss": 0.7647864818572998, + "step": 315 + }, + { + "epoch": 0.11136563876651982, + "grad_norm": 1.2962284510646964, + "learning_rate": 1.9998368896334606e-05, + "loss": 0.872633695602417, + "step": 316 + }, + { + "epoch": 0.11171806167400881, + "grad_norm": 1.4704185501053861, + "learning_rate": 1.9998261969639324e-05, + "loss": 0.8249840140342712, + "step": 317 + }, + { + "epoch": 0.11207048458149779, + "grad_norm": 1.6298830469717174, + "learning_rate": 1.999815164884526e-05, + "loss": 0.7558056116104126, + "step": 318 + }, + { + "epoch": 0.11242290748898678, + "grad_norm": 1.3075257157183537, + "learning_rate": 1.9998037933989866e-05, + "loss": 0.7447441220283508, + "step": 319 + }, + { + "epoch": 0.11277533039647578, + "grad_norm": 1.4956646267919036, + "learning_rate": 1.9997920825111743e-05, + "loss": 0.8260442018508911, + "step": 320 + }, + { + "epoch": 0.11312775330396475, + "grad_norm": 1.2866274072297625, + "learning_rate": 1.999780032225065e-05, + "loss": 0.7916134595870972, + "step": 321 + }, + { + "epoch": 0.11348017621145375, + "grad_norm": 1.3548711592442237, + "learning_rate": 1.9997676425447486e-05, + "loss": 0.7460259199142456, + "step": 322 + }, + { + "epoch": 0.11383259911894274, + "grad_norm": 1.4664419676620792, + "learning_rate": 1.9997549134744318e-05, + "loss": 0.9739946126937866, + "step": 323 + }, + { + "epoch": 0.11418502202643172, + "grad_norm": 1.3133090693965692, + "learning_rate": 1.9997418450184352e-05, + "loss": 0.7242900133132935, + "step": 324 + }, + { + "epoch": 0.1145374449339207, + "grad_norm": 1.7023646414032152, + "learning_rate": 1.9997284371811955e-05, + "loss": 0.7645323276519775, + "step": 325 + }, + { + "epoch": 0.1148898678414097, + "grad_norm": 1.3437215758424148, + "learning_rate": 1.9997146899672638e-05, + "loss": 0.7377017736434937, + "step": 326 + }, + { + "epoch": 0.11524229074889868, + "grad_norm": 1.3608732999796416, + "learning_rate": 1.9997006033813076e-05, + "loss": 0.7117934226989746, + "step": 327 + }, + { + "epoch": 0.11559471365638767, + "grad_norm": 1.485158034808982, + "learning_rate": 1.999686177428108e-05, + "loss": 0.8517680168151855, + "step": 328 + }, + { + "epoch": 0.11594713656387665, + "grad_norm": 1.3118416735480631, + "learning_rate": 1.9996714121125626e-05, + "loss": 0.7099400758743286, + "step": 329 + }, + { + "epoch": 0.11629955947136564, + "grad_norm": 1.3949559553781739, + "learning_rate": 1.9996563074396838e-05, + "loss": 0.8581711053848267, + "step": 330 + }, + { + "epoch": 0.11665198237885463, + "grad_norm": 1.322464822656225, + "learning_rate": 1.9996408634145994e-05, + "loss": 0.7841953635215759, + "step": 331 + }, + { + "epoch": 0.11700440528634361, + "grad_norm": 1.2580468593989962, + "learning_rate": 1.9996250800425515e-05, + "loss": 0.7376754879951477, + "step": 332 + }, + { + "epoch": 0.1173568281938326, + "grad_norm": 1.3538742269891202, + "learning_rate": 1.9996089573288985e-05, + "loss": 0.8934558033943176, + "step": 333 + }, + { + "epoch": 0.11770925110132159, + "grad_norm": 1.4597310886631008, + "learning_rate": 1.999592495279113e-05, + "loss": 0.7870250940322876, + "step": 334 + }, + { + "epoch": 0.11806167400881057, + "grad_norm": 1.5788273084375275, + "learning_rate": 1.9995756938987846e-05, + "loss": 0.7026203274726868, + "step": 335 + }, + { + "epoch": 0.11841409691629956, + "grad_norm": 2.206437289778364, + "learning_rate": 1.999558553193616e-05, + "loss": 1.0066381692886353, + "step": 336 + }, + { + "epoch": 0.11876651982378855, + "grad_norm": 1.349262918557434, + "learning_rate": 1.9995410731694255e-05, + "loss": 0.7860246896743774, + "step": 337 + }, + { + "epoch": 0.11911894273127753, + "grad_norm": 1.4261295710834618, + "learning_rate": 1.999523253832148e-05, + "loss": 0.8142588138580322, + "step": 338 + }, + { + "epoch": 0.11947136563876652, + "grad_norm": 1.403543131076251, + "learning_rate": 1.9995050951878317e-05, + "loss": 0.9737639427185059, + "step": 339 + }, + { + "epoch": 0.1198237885462555, + "grad_norm": 1.2538473699838193, + "learning_rate": 1.999486597242642e-05, + "loss": 0.6165765523910522, + "step": 340 + }, + { + "epoch": 0.12017621145374449, + "grad_norm": 1.4403971646421685, + "learning_rate": 1.999467760002857e-05, + "loss": 0.8553996086120605, + "step": 341 + }, + { + "epoch": 0.12052863436123348, + "grad_norm": 1.579218034733104, + "learning_rate": 1.9994485834748725e-05, + "loss": 0.9291022419929504, + "step": 342 + }, + { + "epoch": 0.12088105726872246, + "grad_norm": 1.3583147087232978, + "learning_rate": 1.9994290676651977e-05, + "loss": 0.8309136629104614, + "step": 343 + }, + { + "epoch": 0.12123348017621145, + "grad_norm": 1.2343518052190974, + "learning_rate": 1.999409212580458e-05, + "loss": 0.6963932514190674, + "step": 344 + }, + { + "epoch": 0.12158590308370044, + "grad_norm": 1.126432291251887, + "learning_rate": 1.9993890182273932e-05, + "loss": 0.8220632076263428, + "step": 345 + }, + { + "epoch": 0.12193832599118942, + "grad_norm": 1.5283410369228738, + "learning_rate": 1.9993684846128588e-05, + "loss": 0.8407794237136841, + "step": 346 + }, + { + "epoch": 0.12229074889867841, + "grad_norm": 1.479739244816861, + "learning_rate": 1.9993476117438257e-05, + "loss": 0.795718789100647, + "step": 347 + }, + { + "epoch": 0.1226431718061674, + "grad_norm": 1.3466106447402244, + "learning_rate": 1.9993263996273792e-05, + "loss": 0.7482223510742188, + "step": 348 + }, + { + "epoch": 0.12299559471365638, + "grad_norm": 1.4606743428798505, + "learning_rate": 1.99930484827072e-05, + "loss": 0.814468264579773, + "step": 349 + }, + { + "epoch": 0.12334801762114538, + "grad_norm": 1.5345713664893856, + "learning_rate": 1.9992829576811648e-05, + "loss": 0.8105748891830444, + "step": 350 + }, + { + "epoch": 0.12370044052863437, + "grad_norm": 1.6869192314100032, + "learning_rate": 1.9992607278661437e-05, + "loss": 0.8756073713302612, + "step": 351 + }, + { + "epoch": 0.12405286343612335, + "grad_norm": 1.228330868948225, + "learning_rate": 1.9992381588332043e-05, + "loss": 0.8643946647644043, + "step": 352 + }, + { + "epoch": 0.12440528634361234, + "grad_norm": 1.1468400313164093, + "learning_rate": 1.9992152505900067e-05, + "loss": 0.7691172361373901, + "step": 353 + }, + { + "epoch": 0.12475770925110131, + "grad_norm": 1.3198644948783926, + "learning_rate": 1.9991920031443288e-05, + "loss": 0.716686487197876, + "step": 354 + }, + { + "epoch": 0.12511013215859032, + "grad_norm": 1.39334404424432, + "learning_rate": 1.9991684165040616e-05, + "loss": 0.697482705116272, + "step": 355 + }, + { + "epoch": 0.12546255506607928, + "grad_norm": 1.5087579956634654, + "learning_rate": 1.999144490677212e-05, + "loss": 0.8039460182189941, + "step": 356 + }, + { + "epoch": 0.12581497797356828, + "grad_norm": 1.3206582875495743, + "learning_rate": 1.9991202256719032e-05, + "loss": 0.872138261795044, + "step": 357 + }, + { + "epoch": 0.12616740088105727, + "grad_norm": 1.330801420963485, + "learning_rate": 1.999095621496371e-05, + "loss": 0.8659502267837524, + "step": 358 + }, + { + "epoch": 0.12651982378854626, + "grad_norm": 1.2062023445068855, + "learning_rate": 1.9990706781589682e-05, + "loss": 0.7585660219192505, + "step": 359 + }, + { + "epoch": 0.12687224669603525, + "grad_norm": 1.349814688916852, + "learning_rate": 1.9990453956681626e-05, + "loss": 0.86381995677948, + "step": 360 + }, + { + "epoch": 0.12722466960352422, + "grad_norm": 1.3080210647965176, + "learning_rate": 1.9990197740325365e-05, + "loss": 0.7623461484909058, + "step": 361 + }, + { + "epoch": 0.1275770925110132, + "grad_norm": 1.4247026163468757, + "learning_rate": 1.9989938132607877e-05, + "loss": 0.8262917995452881, + "step": 362 + }, + { + "epoch": 0.1279295154185022, + "grad_norm": 1.3245955099655373, + "learning_rate": 1.9989675133617294e-05, + "loss": 0.7879630327224731, + "step": 363 + }, + { + "epoch": 0.1282819383259912, + "grad_norm": 1.5925116832241206, + "learning_rate": 1.9989408743442892e-05, + "loss": 0.8282565474510193, + "step": 364 + }, + { + "epoch": 0.12863436123348018, + "grad_norm": 1.151308483630064, + "learning_rate": 1.9989138962175105e-05, + "loss": 0.8358104228973389, + "step": 365 + }, + { + "epoch": 0.12898678414096917, + "grad_norm": 1.4831450607430074, + "learning_rate": 1.9988865789905513e-05, + "loss": 0.9111027121543884, + "step": 366 + }, + { + "epoch": 0.12933920704845814, + "grad_norm": 1.4181532995073547, + "learning_rate": 1.9988589226726847e-05, + "loss": 0.766915500164032, + "step": 367 + }, + { + "epoch": 0.12969162995594713, + "grad_norm": 1.3923253104774793, + "learning_rate": 1.9988309272733e-05, + "loss": 0.818048357963562, + "step": 368 + }, + { + "epoch": 0.13004405286343612, + "grad_norm": 1.2625645815303237, + "learning_rate": 1.9988025928019e-05, + "loss": 0.8188307285308838, + "step": 369 + }, + { + "epoch": 0.1303964757709251, + "grad_norm": 1.4656557007271924, + "learning_rate": 1.998773919268104e-05, + "loss": 0.88718181848526, + "step": 370 + }, + { + "epoch": 0.1307488986784141, + "grad_norm": 1.3104922660776017, + "learning_rate": 1.998744906681645e-05, + "loss": 0.9173898696899414, + "step": 371 + }, + { + "epoch": 0.1311013215859031, + "grad_norm": 1.4305544884130297, + "learning_rate": 1.9987155550523725e-05, + "loss": 0.8025110960006714, + "step": 372 + }, + { + "epoch": 0.13145374449339206, + "grad_norm": 1.2328392002659898, + "learning_rate": 1.9986858643902502e-05, + "loss": 0.8931341767311096, + "step": 373 + }, + { + "epoch": 0.13180616740088105, + "grad_norm": 1.258415234092876, + "learning_rate": 1.9986558347053574e-05, + "loss": 0.8813796043395996, + "step": 374 + }, + { + "epoch": 0.13215859030837004, + "grad_norm": 1.3254702068923054, + "learning_rate": 1.9986254660078877e-05, + "loss": 0.8021976947784424, + "step": 375 + }, + { + "epoch": 0.13251101321585904, + "grad_norm": 1.3001638136254743, + "learning_rate": 1.9985947583081506e-05, + "loss": 0.8083860874176025, + "step": 376 + }, + { + "epoch": 0.13286343612334803, + "grad_norm": 1.2519881014381842, + "learning_rate": 1.9985637116165705e-05, + "loss": 0.7639983296394348, + "step": 377 + }, + { + "epoch": 0.133215859030837, + "grad_norm": 1.3308962501940544, + "learning_rate": 1.9985323259436874e-05, + "loss": 0.7775800228118896, + "step": 378 + }, + { + "epoch": 0.13356828193832598, + "grad_norm": 1.3822704707659155, + "learning_rate": 1.9985006013001545e-05, + "loss": 0.8892228603363037, + "step": 379 + }, + { + "epoch": 0.13392070484581498, + "grad_norm": 1.4007373611969895, + "learning_rate": 1.998468537696742e-05, + "loss": 0.9158765077590942, + "step": 380 + }, + { + "epoch": 0.13427312775330397, + "grad_norm": 1.2142103786325267, + "learning_rate": 1.9984361351443343e-05, + "loss": 0.7523722648620605, + "step": 381 + }, + { + "epoch": 0.13462555066079296, + "grad_norm": 1.5406874167870075, + "learning_rate": 1.998403393653932e-05, + "loss": 0.8052740693092346, + "step": 382 + }, + { + "epoch": 0.13497797356828195, + "grad_norm": 4.839014305582762, + "learning_rate": 1.9983703132366484e-05, + "loss": 0.8271476626396179, + "step": 383 + }, + { + "epoch": 0.13533039647577091, + "grad_norm": 1.3724243356768093, + "learning_rate": 1.998336893903714e-05, + "loss": 0.8904454112052917, + "step": 384 + }, + { + "epoch": 0.1356828193832599, + "grad_norm": 1.5086695454887955, + "learning_rate": 1.9983031356664733e-05, + "loss": 0.8705847263336182, + "step": 385 + }, + { + "epoch": 0.1360352422907489, + "grad_norm": 1.3562221939291232, + "learning_rate": 1.9982690385363867e-05, + "loss": 0.8269569873809814, + "step": 386 + }, + { + "epoch": 0.1363876651982379, + "grad_norm": 1.6156870918588995, + "learning_rate": 1.998234602525029e-05, + "loss": 0.9796818494796753, + "step": 387 + }, + { + "epoch": 0.13674008810572688, + "grad_norm": 1.5268638185003427, + "learning_rate": 1.9981998276440892e-05, + "loss": 0.8276596665382385, + "step": 388 + }, + { + "epoch": 0.13709251101321585, + "grad_norm": 1.1979978409172833, + "learning_rate": 1.9981647139053737e-05, + "loss": 0.8739231824874878, + "step": 389 + }, + { + "epoch": 0.13744493392070484, + "grad_norm": 1.517970302113154, + "learning_rate": 1.9981292613208018e-05, + "loss": 0.677521824836731, + "step": 390 + }, + { + "epoch": 0.13779735682819383, + "grad_norm": 1.483399153515808, + "learning_rate": 1.9980934699024084e-05, + "loss": 0.744938313961029, + "step": 391 + }, + { + "epoch": 0.13814977973568282, + "grad_norm": 1.423178346498717, + "learning_rate": 1.998057339662344e-05, + "loss": 0.8367065787315369, + "step": 392 + }, + { + "epoch": 0.1385022026431718, + "grad_norm": 1.6714277386990386, + "learning_rate": 1.9980208706128733e-05, + "loss": 0.775547981262207, + "step": 393 + }, + { + "epoch": 0.1388546255506608, + "grad_norm": 1.244274379470138, + "learning_rate": 1.9979840627663764e-05, + "loss": 0.8287982940673828, + "step": 394 + }, + { + "epoch": 0.13920704845814977, + "grad_norm": 1.429588244120958, + "learning_rate": 1.997946916135349e-05, + "loss": 0.7582247257232666, + "step": 395 + }, + { + "epoch": 0.13955947136563876, + "grad_norm": 1.309709423857836, + "learning_rate": 1.997909430732401e-05, + "loss": 0.968267560005188, + "step": 396 + }, + { + "epoch": 0.13991189427312775, + "grad_norm": 1.4247483192434738, + "learning_rate": 1.9978716065702566e-05, + "loss": 0.8850257396697998, + "step": 397 + }, + { + "epoch": 0.14026431718061674, + "grad_norm": 1.1261344584223945, + "learning_rate": 1.9978334436617574e-05, + "loss": 0.7206246852874756, + "step": 398 + }, + { + "epoch": 0.14061674008810573, + "grad_norm": 1.2702546976441136, + "learning_rate": 1.9977949420198576e-05, + "loss": 0.7833065986633301, + "step": 399 + }, + { + "epoch": 0.14096916299559473, + "grad_norm": 1.2940706461552187, + "learning_rate": 1.9977561016576275e-05, + "loss": 0.7199673652648926, + "step": 400 + }, + { + "epoch": 0.1413215859030837, + "grad_norm": 1.3300807823897647, + "learning_rate": 1.9977169225882522e-05, + "loss": 0.7544811367988586, + "step": 401 + }, + { + "epoch": 0.14167400881057268, + "grad_norm": 1.3500860064281444, + "learning_rate": 1.9976774048250317e-05, + "loss": 0.7528219819068909, + "step": 402 + }, + { + "epoch": 0.14202643171806167, + "grad_norm": 1.230028309495833, + "learning_rate": 1.9976375483813814e-05, + "loss": 0.8025565147399902, + "step": 403 + }, + { + "epoch": 0.14237885462555067, + "grad_norm": 1.271700071603726, + "learning_rate": 1.997597353270831e-05, + "loss": 0.6553962230682373, + "step": 404 + }, + { + "epoch": 0.14273127753303966, + "grad_norm": 1.195900427449374, + "learning_rate": 1.9975568195070253e-05, + "loss": 0.7070015072822571, + "step": 405 + }, + { + "epoch": 0.14308370044052862, + "grad_norm": 1.238996854756085, + "learning_rate": 1.9975159471037247e-05, + "loss": 0.7454725503921509, + "step": 406 + }, + { + "epoch": 0.1434361233480176, + "grad_norm": 1.5517260528670263, + "learning_rate": 1.9974747360748038e-05, + "loss": 0.7074518799781799, + "step": 407 + }, + { + "epoch": 0.1437885462555066, + "grad_norm": 1.4240478656973132, + "learning_rate": 1.9974331864342527e-05, + "loss": 0.6870182752609253, + "step": 408 + }, + { + "epoch": 0.1441409691629956, + "grad_norm": 1.5514938206230895, + "learning_rate": 1.9973912981961763e-05, + "loss": 0.826898455619812, + "step": 409 + }, + { + "epoch": 0.1444933920704846, + "grad_norm": 1.483679538302774, + "learning_rate": 1.997349071374794e-05, + "loss": 0.7244436740875244, + "step": 410 + }, + { + "epoch": 0.14484581497797358, + "grad_norm": 1.2681717185328807, + "learning_rate": 1.9973065059844404e-05, + "loss": 0.6885448694229126, + "step": 411 + }, + { + "epoch": 0.14519823788546254, + "grad_norm": 1.3797417122455713, + "learning_rate": 1.9972636020395653e-05, + "loss": 0.8477644920349121, + "step": 412 + }, + { + "epoch": 0.14555066079295154, + "grad_norm": 1.5051840849568912, + "learning_rate": 1.9972203595547334e-05, + "loss": 0.9432111382484436, + "step": 413 + }, + { + "epoch": 0.14590308370044053, + "grad_norm": 1.351618505603555, + "learning_rate": 1.9971767785446243e-05, + "loss": 1.0101501941680908, + "step": 414 + }, + { + "epoch": 0.14625550660792952, + "grad_norm": 1.421926997117087, + "learning_rate": 1.997132859024032e-05, + "loss": 0.8174984455108643, + "step": 415 + }, + { + "epoch": 0.1466079295154185, + "grad_norm": 1.1573592385577054, + "learning_rate": 1.997088601007866e-05, + "loss": 0.6857198476791382, + "step": 416 + }, + { + "epoch": 0.14696035242290748, + "grad_norm": 1.1795540078822444, + "learning_rate": 1.9970440045111505e-05, + "loss": 0.7742792367935181, + "step": 417 + }, + { + "epoch": 0.14731277533039647, + "grad_norm": 1.783143700583216, + "learning_rate": 1.996999069549025e-05, + "loss": 0.7489269971847534, + "step": 418 + }, + { + "epoch": 0.14766519823788546, + "grad_norm": 1.4327273961807123, + "learning_rate": 1.9969537961367423e-05, + "loss": 0.7362021207809448, + "step": 419 + }, + { + "epoch": 0.14801762114537445, + "grad_norm": 1.3763810595433905, + "learning_rate": 1.996908184289673e-05, + "loss": 0.7596213221549988, + "step": 420 + }, + { + "epoch": 0.14837004405286344, + "grad_norm": 1.3357573192960268, + "learning_rate": 1.9968622340232993e-05, + "loss": 0.7739163637161255, + "step": 421 + }, + { + "epoch": 0.14872246696035243, + "grad_norm": 1.2890109075687697, + "learning_rate": 1.9968159453532215e-05, + "loss": 0.9059790372848511, + "step": 422 + }, + { + "epoch": 0.1490748898678414, + "grad_norm": 1.4830814966077062, + "learning_rate": 1.9967693182951516e-05, + "loss": 0.7298871278762817, + "step": 423 + }, + { + "epoch": 0.1494273127753304, + "grad_norm": 1.3303231094936145, + "learning_rate": 1.9967223528649194e-05, + "loss": 0.7218194007873535, + "step": 424 + }, + { + "epoch": 0.14977973568281938, + "grad_norm": 1.3738677080017252, + "learning_rate": 1.996675049078467e-05, + "loss": 0.8031259179115295, + "step": 425 + }, + { + "epoch": 0.15013215859030837, + "grad_norm": 1.402915539690338, + "learning_rate": 1.9966274069518533e-05, + "loss": 0.8583194613456726, + "step": 426 + }, + { + "epoch": 0.15048458149779737, + "grad_norm": 1.5081794718854693, + "learning_rate": 1.9965794265012514e-05, + "loss": 0.7829155921936035, + "step": 427 + }, + { + "epoch": 0.15083700440528636, + "grad_norm": 1.3040065928659967, + "learning_rate": 1.9965311077429484e-05, + "loss": 0.709203839302063, + "step": 428 + }, + { + "epoch": 0.15118942731277532, + "grad_norm": 1.324153309243564, + "learning_rate": 1.996482450693348e-05, + "loss": 0.7515710592269897, + "step": 429 + }, + { + "epoch": 0.1515418502202643, + "grad_norm": 1.5966034920450463, + "learning_rate": 1.9964334553689674e-05, + "loss": 0.8552615642547607, + "step": 430 + }, + { + "epoch": 0.1518942731277533, + "grad_norm": 1.3833039246024212, + "learning_rate": 1.9963841217864385e-05, + "loss": 0.7946224808692932, + "step": 431 + }, + { + "epoch": 0.1522466960352423, + "grad_norm": 1.351342046961, + "learning_rate": 1.9963344499625087e-05, + "loss": 0.7117756605148315, + "step": 432 + }, + { + "epoch": 0.1525991189427313, + "grad_norm": 1.5677032677150589, + "learning_rate": 1.9962844399140405e-05, + "loss": 0.8892849683761597, + "step": 433 + }, + { + "epoch": 0.15295154185022025, + "grad_norm": 1.6682742006947457, + "learning_rate": 1.9962340916580105e-05, + "loss": 0.9037783145904541, + "step": 434 + }, + { + "epoch": 0.15330396475770924, + "grad_norm": 1.3178590359087465, + "learning_rate": 1.9961834052115104e-05, + "loss": 0.7419179677963257, + "step": 435 + }, + { + "epoch": 0.15365638766519824, + "grad_norm": 1.500659178246394, + "learning_rate": 1.9961323805917464e-05, + "loss": 0.847285270690918, + "step": 436 + }, + { + "epoch": 0.15400881057268723, + "grad_norm": 1.520891708486689, + "learning_rate": 1.99608101781604e-05, + "loss": 0.793263852596283, + "step": 437 + }, + { + "epoch": 0.15436123348017622, + "grad_norm": 1.2927327484478677, + "learning_rate": 1.9960293169018276e-05, + "loss": 0.6600923538208008, + "step": 438 + }, + { + "epoch": 0.1547136563876652, + "grad_norm": 1.178823428760428, + "learning_rate": 1.9959772778666592e-05, + "loss": 0.7642164826393127, + "step": 439 + }, + { + "epoch": 0.15506607929515417, + "grad_norm": 1.4230767051116806, + "learning_rate": 1.995924900728201e-05, + "loss": 0.897221565246582, + "step": 440 + }, + { + "epoch": 0.15541850220264317, + "grad_norm": 1.3912415328195475, + "learning_rate": 1.9958721855042338e-05, + "loss": 0.830953061580658, + "step": 441 + }, + { + "epoch": 0.15577092511013216, + "grad_norm": 1.3683790024985447, + "learning_rate": 1.995819132212652e-05, + "loss": 0.7514863014221191, + "step": 442 + }, + { + "epoch": 0.15612334801762115, + "grad_norm": 1.3179910502987273, + "learning_rate": 1.995765740871466e-05, + "loss": 0.7039257287979126, + "step": 443 + }, + { + "epoch": 0.15647577092511014, + "grad_norm": 1.5017230130600239, + "learning_rate": 1.9957120114988e-05, + "loss": 0.810503363609314, + "step": 444 + }, + { + "epoch": 0.1568281938325991, + "grad_norm": 1.4050071397488821, + "learning_rate": 1.9956579441128942e-05, + "loss": 0.616968035697937, + "step": 445 + }, + { + "epoch": 0.1571806167400881, + "grad_norm": 1.3149075420166694, + "learning_rate": 1.9956035387321024e-05, + "loss": 0.7008740901947021, + "step": 446 + }, + { + "epoch": 0.1575330396475771, + "grad_norm": 1.4992101173925434, + "learning_rate": 1.995548795374893e-05, + "loss": 0.847025454044342, + "step": 447 + }, + { + "epoch": 0.15788546255506608, + "grad_norm": 1.3763555067673139, + "learning_rate": 1.9954937140598506e-05, + "loss": 0.7788053750991821, + "step": 448 + }, + { + "epoch": 0.15823788546255507, + "grad_norm": 1.301728118921247, + "learning_rate": 1.9954382948056735e-05, + "loss": 0.7592896819114685, + "step": 449 + }, + { + "epoch": 0.15859030837004406, + "grad_norm": 1.6001158206313053, + "learning_rate": 1.995382537631174e-05, + "loss": 0.9458491802215576, + "step": 450 + }, + { + "epoch": 0.15894273127753303, + "grad_norm": 1.3218132869761372, + "learning_rate": 1.9953264425552804e-05, + "loss": 0.8069632053375244, + "step": 451 + }, + { + "epoch": 0.15929515418502202, + "grad_norm": 1.316918406992957, + "learning_rate": 1.9952700095970357e-05, + "loss": 0.7876379489898682, + "step": 452 + }, + { + "epoch": 0.159647577092511, + "grad_norm": 1.5440089355741875, + "learning_rate": 1.9952132387755965e-05, + "loss": 0.796333909034729, + "step": 453 + }, + { + "epoch": 0.16, + "grad_norm": 1.243828269503452, + "learning_rate": 1.9951561301102348e-05, + "loss": 0.7171634435653687, + "step": 454 + }, + { + "epoch": 0.160352422907489, + "grad_norm": 1.429835470120866, + "learning_rate": 1.9950986836203374e-05, + "loss": 0.8312792778015137, + "step": 455 + }, + { + "epoch": 0.160704845814978, + "grad_norm": 1.4333167021702193, + "learning_rate": 1.995040899325406e-05, + "loss": 0.7496857643127441, + "step": 456 + }, + { + "epoch": 0.16105726872246695, + "grad_norm": 1.2513531381670333, + "learning_rate": 1.9949827772450555e-05, + "loss": 0.89504075050354, + "step": 457 + }, + { + "epoch": 0.16140969162995594, + "grad_norm": 1.5536951579594835, + "learning_rate": 1.9949243173990172e-05, + "loss": 0.7580761313438416, + "step": 458 + }, + { + "epoch": 0.16176211453744493, + "grad_norm": 1.6782383396512721, + "learning_rate": 1.9948655198071365e-05, + "loss": 0.7826676368713379, + "step": 459 + }, + { + "epoch": 0.16211453744493393, + "grad_norm": 1.5979456835427475, + "learning_rate": 1.9948063844893733e-05, + "loss": 0.7591372728347778, + "step": 460 + }, + { + "epoch": 0.16246696035242292, + "grad_norm": 1.394749193132719, + "learning_rate": 1.994746911465802e-05, + "loss": 0.7366905808448792, + "step": 461 + }, + { + "epoch": 0.16281938325991188, + "grad_norm": 1.2449236570155473, + "learning_rate": 1.9946871007566116e-05, + "loss": 0.7152266502380371, + "step": 462 + }, + { + "epoch": 0.16317180616740087, + "grad_norm": 1.475247855733958, + "learning_rate": 1.994626952382107e-05, + "loss": 0.8411930799484253, + "step": 463 + }, + { + "epoch": 0.16352422907488987, + "grad_norm": 1.1709525471997975, + "learning_rate": 1.9945664663627054e-05, + "loss": 0.6689857244491577, + "step": 464 + }, + { + "epoch": 0.16387665198237886, + "grad_norm": 1.3007920668059838, + "learning_rate": 1.9945056427189408e-05, + "loss": 0.6474499106407166, + "step": 465 + }, + { + "epoch": 0.16422907488986785, + "grad_norm": 1.397646475804827, + "learning_rate": 1.9944444814714604e-05, + "loss": 0.7861372232437134, + "step": 466 + }, + { + "epoch": 0.16458149779735684, + "grad_norm": 1.4072541980161448, + "learning_rate": 1.9943829826410273e-05, + "loss": 0.8301665186882019, + "step": 467 + }, + { + "epoch": 0.1649339207048458, + "grad_norm": 1.1473159016242473, + "learning_rate": 1.9943211462485176e-05, + "loss": 0.661811888217926, + "step": 468 + }, + { + "epoch": 0.1652863436123348, + "grad_norm": 1.4009911983471504, + "learning_rate": 1.9942589723149233e-05, + "loss": 0.7768537402153015, + "step": 469 + }, + { + "epoch": 0.1656387665198238, + "grad_norm": 1.209922489625636, + "learning_rate": 1.9941964608613503e-05, + "loss": 0.6139112710952759, + "step": 470 + }, + { + "epoch": 0.16599118942731278, + "grad_norm": 1.3814257371396368, + "learning_rate": 1.9941336119090193e-05, + "loss": 0.8284693956375122, + "step": 471 + }, + { + "epoch": 0.16634361233480177, + "grad_norm": 1.2594577624707568, + "learning_rate": 1.9940704254792655e-05, + "loss": 0.7281739711761475, + "step": 472 + }, + { + "epoch": 0.16669603524229074, + "grad_norm": 1.4773463672265492, + "learning_rate": 1.994006901593539e-05, + "loss": 0.687767744064331, + "step": 473 + }, + { + "epoch": 0.16704845814977973, + "grad_norm": 1.3067539084660165, + "learning_rate": 1.9939430402734046e-05, + "loss": 0.7553595304489136, + "step": 474 + }, + { + "epoch": 0.16740088105726872, + "grad_norm": 1.5537103296420662, + "learning_rate": 1.99387884154054e-05, + "loss": 0.9263294339179993, + "step": 475 + }, + { + "epoch": 0.1677533039647577, + "grad_norm": 1.5514792381885942, + "learning_rate": 1.9938143054167397e-05, + "loss": 0.7014337182044983, + "step": 476 + }, + { + "epoch": 0.1681057268722467, + "grad_norm": 1.1598559513797833, + "learning_rate": 1.9937494319239112e-05, + "loss": 0.6454538106918335, + "step": 477 + }, + { + "epoch": 0.1684581497797357, + "grad_norm": 1.3402764899565285, + "learning_rate": 1.9936842210840775e-05, + "loss": 0.7792352437973022, + "step": 478 + }, + { + "epoch": 0.16881057268722466, + "grad_norm": 1.481603380133959, + "learning_rate": 1.9936186729193753e-05, + "loss": 0.8773127794265747, + "step": 479 + }, + { + "epoch": 0.16916299559471365, + "grad_norm": 1.3472965431143242, + "learning_rate": 1.993552787452056e-05, + "loss": 0.892439603805542, + "step": 480 + }, + { + "epoch": 0.16951541850220264, + "grad_norm": 1.5839752051025837, + "learning_rate": 1.993486564704486e-05, + "loss": 0.89835524559021, + "step": 481 + }, + { + "epoch": 0.16986784140969163, + "grad_norm": 1.4593777249036533, + "learning_rate": 1.9934200046991453e-05, + "loss": 0.8013701438903809, + "step": 482 + }, + { + "epoch": 0.17022026431718063, + "grad_norm": 1.5168797838116639, + "learning_rate": 1.9933531074586296e-05, + "loss": 0.8086763620376587, + "step": 483 + }, + { + "epoch": 0.17057268722466962, + "grad_norm": 1.4399310447978144, + "learning_rate": 1.9932858730056486e-05, + "loss": 0.7736518383026123, + "step": 484 + }, + { + "epoch": 0.17092511013215858, + "grad_norm": 1.2982542574143365, + "learning_rate": 1.9932183013630257e-05, + "loss": 0.6247539520263672, + "step": 485 + }, + { + "epoch": 0.17127753303964757, + "grad_norm": 1.519445958865324, + "learning_rate": 1.9931503925536996e-05, + "loss": 0.7172006368637085, + "step": 486 + }, + { + "epoch": 0.17162995594713656, + "grad_norm": 1.3043787656359138, + "learning_rate": 1.993082146600723e-05, + "loss": 0.7854465246200562, + "step": 487 + }, + { + "epoch": 0.17198237885462556, + "grad_norm": 1.2038371426907561, + "learning_rate": 1.9930135635272637e-05, + "loss": 0.7018419504165649, + "step": 488 + }, + { + "epoch": 0.17233480176211455, + "grad_norm": 1.2578522146284077, + "learning_rate": 1.9929446433566033e-05, + "loss": 0.783660352230072, + "step": 489 + }, + { + "epoch": 0.1726872246696035, + "grad_norm": 1.4288043068768257, + "learning_rate": 1.992875386112138e-05, + "loss": 1.0166207551956177, + "step": 490 + }, + { + "epoch": 0.1730396475770925, + "grad_norm": 1.5208280960226344, + "learning_rate": 1.9928057918173786e-05, + "loss": 0.7692895531654358, + "step": 491 + }, + { + "epoch": 0.1733920704845815, + "grad_norm": 1.3733404774184526, + "learning_rate": 1.9927358604959503e-05, + "loss": 0.8005259037017822, + "step": 492 + }, + { + "epoch": 0.1737444933920705, + "grad_norm": 1.3189354109245792, + "learning_rate": 1.9926655921715924e-05, + "loss": 0.6780292987823486, + "step": 493 + }, + { + "epoch": 0.17409691629955948, + "grad_norm": 1.2272422506889333, + "learning_rate": 1.9925949868681587e-05, + "loss": 0.6501175165176392, + "step": 494 + }, + { + "epoch": 0.17444933920704847, + "grad_norm": 1.3095934443108421, + "learning_rate": 1.9925240446096176e-05, + "loss": 0.781839907169342, + "step": 495 + }, + { + "epoch": 0.17480176211453743, + "grad_norm": 1.4508599784840917, + "learning_rate": 1.992452765420052e-05, + "loss": 0.7617994546890259, + "step": 496 + }, + { + "epoch": 0.17515418502202643, + "grad_norm": 1.2324738440312524, + "learning_rate": 1.992381149323659e-05, + "loss": 0.8019097447395325, + "step": 497 + }, + { + "epoch": 0.17550660792951542, + "grad_norm": 1.3071824216187324, + "learning_rate": 1.9923091963447496e-05, + "loss": 0.7526847124099731, + "step": 498 + }, + { + "epoch": 0.1758590308370044, + "grad_norm": 1.340463358272731, + "learning_rate": 1.9922369065077497e-05, + "loss": 0.7101150751113892, + "step": 499 + }, + { + "epoch": 0.1762114537444934, + "grad_norm": 1.396850141714641, + "learning_rate": 1.9921642798372e-05, + "loss": 0.8519806861877441, + "step": 500 + }, + { + "epoch": 0.17656387665198237, + "grad_norm": 1.5427241760761283, + "learning_rate": 1.9920913163577542e-05, + "loss": 0.774759829044342, + "step": 501 + }, + { + "epoch": 0.17691629955947136, + "grad_norm": 1.4501760642130928, + "learning_rate": 1.992018016094182e-05, + "loss": 0.8597595691680908, + "step": 502 + }, + { + "epoch": 0.17726872246696035, + "grad_norm": 1.6336800938277667, + "learning_rate": 1.9919443790713658e-05, + "loss": 0.7023826241493225, + "step": 503 + }, + { + "epoch": 0.17762114537444934, + "grad_norm": 1.8758125980343456, + "learning_rate": 1.991870405314303e-05, + "loss": 0.8290892839431763, + "step": 504 + }, + { + "epoch": 0.17797356828193833, + "grad_norm": 1.368620384992611, + "learning_rate": 1.9917960948481062e-05, + "loss": 0.9240517020225525, + "step": 505 + }, + { + "epoch": 0.17832599118942732, + "grad_norm": 1.4203507781601712, + "learning_rate": 1.9917214476980012e-05, + "loss": 0.8247153759002686, + "step": 506 + }, + { + "epoch": 0.1786784140969163, + "grad_norm": 1.5364946844029868, + "learning_rate": 1.991646463889328e-05, + "loss": 0.9101368188858032, + "step": 507 + }, + { + "epoch": 0.17903083700440528, + "grad_norm": 1.3883082747026767, + "learning_rate": 1.9915711434475416e-05, + "loss": 0.7688114643096924, + "step": 508 + }, + { + "epoch": 0.17938325991189427, + "grad_norm": 1.41173691792053, + "learning_rate": 1.9914954863982106e-05, + "loss": 0.820112943649292, + "step": 509 + }, + { + "epoch": 0.17973568281938326, + "grad_norm": 1.2372115494246672, + "learning_rate": 1.9914194927670186e-05, + "loss": 0.6393542289733887, + "step": 510 + }, + { + "epoch": 0.18008810572687226, + "grad_norm": 1.5514274082803117, + "learning_rate": 1.991343162579763e-05, + "loss": 0.9463154673576355, + "step": 511 + }, + { + "epoch": 0.18044052863436125, + "grad_norm": 1.2818287593652882, + "learning_rate": 1.9912664958623556e-05, + "loss": 0.9498215913772583, + "step": 512 + }, + { + "epoch": 0.1807929515418502, + "grad_norm": 1.3538150363158374, + "learning_rate": 1.991189492640822e-05, + "loss": 0.7659052014350891, + "step": 513 + }, + { + "epoch": 0.1811453744493392, + "grad_norm": 1.3014303918670855, + "learning_rate": 1.9911121529413028e-05, + "loss": 0.9946317672729492, + "step": 514 + }, + { + "epoch": 0.1814977973568282, + "grad_norm": 1.2888096801517381, + "learning_rate": 1.991034476790052e-05, + "loss": 0.762086033821106, + "step": 515 + }, + { + "epoch": 0.18185022026431719, + "grad_norm": 1.2685969775930512, + "learning_rate": 1.990956464213438e-05, + "loss": 0.7507720589637756, + "step": 516 + }, + { + "epoch": 0.18220264317180618, + "grad_norm": 1.2567492686992259, + "learning_rate": 1.990878115237945e-05, + "loss": 0.7859716415405273, + "step": 517 + }, + { + "epoch": 0.18255506607929514, + "grad_norm": 1.3199744761398897, + "learning_rate": 1.9907994298901688e-05, + "loss": 0.8585234880447388, + "step": 518 + }, + { + "epoch": 0.18290748898678413, + "grad_norm": 1.2014345702103446, + "learning_rate": 1.990720408196821e-05, + "loss": 0.8569823503494263, + "step": 519 + }, + { + "epoch": 0.18325991189427313, + "grad_norm": 1.4066812868889107, + "learning_rate": 1.990641050184727e-05, + "loss": 0.8297367095947266, + "step": 520 + }, + { + "epoch": 0.18361233480176212, + "grad_norm": 1.4158335601181062, + "learning_rate": 1.9905613558808262e-05, + "loss": 0.7918041348457336, + "step": 521 + }, + { + "epoch": 0.1839647577092511, + "grad_norm": 1.3066639133280875, + "learning_rate": 1.9904813253121727e-05, + "loss": 0.8322931528091431, + "step": 522 + }, + { + "epoch": 0.1843171806167401, + "grad_norm": 1.600997340162295, + "learning_rate": 1.990400958505934e-05, + "loss": 0.6822292804718018, + "step": 523 + }, + { + "epoch": 0.18466960352422906, + "grad_norm": 1.344951810567012, + "learning_rate": 1.9903202554893925e-05, + "loss": 0.8989835977554321, + "step": 524 + }, + { + "epoch": 0.18502202643171806, + "grad_norm": 1.4644963211452282, + "learning_rate": 1.990239216289944e-05, + "loss": 0.671294093132019, + "step": 525 + }, + { + "epoch": 0.18537444933920705, + "grad_norm": 1.3104672306859468, + "learning_rate": 1.990157840935099e-05, + "loss": 0.9045379161834717, + "step": 526 + }, + { + "epoch": 0.18572687224669604, + "grad_norm": 1.2000125993399395, + "learning_rate": 1.990076129452482e-05, + "loss": 0.7117471694946289, + "step": 527 + }, + { + "epoch": 0.18607929515418503, + "grad_norm": 1.406356072194557, + "learning_rate": 1.9899940818698315e-05, + "loss": 0.890752911567688, + "step": 528 + }, + { + "epoch": 0.186431718061674, + "grad_norm": 1.3199977159633904, + "learning_rate": 1.9899116982149994e-05, + "loss": 0.7209222316741943, + "step": 529 + }, + { + "epoch": 0.186784140969163, + "grad_norm": 1.4346812218183875, + "learning_rate": 1.9898289785159534e-05, + "loss": 0.6912863254547119, + "step": 530 + }, + { + "epoch": 0.18713656387665198, + "grad_norm": 1.4271479463954384, + "learning_rate": 1.9897459228007736e-05, + "loss": 0.7060319185256958, + "step": 531 + }, + { + "epoch": 0.18748898678414097, + "grad_norm": 1.2685161281492263, + "learning_rate": 1.9896625310976553e-05, + "loss": 0.6975364685058594, + "step": 532 + }, + { + "epoch": 0.18784140969162996, + "grad_norm": 1.513572022269192, + "learning_rate": 1.989578803434907e-05, + "loss": 0.8576006293296814, + "step": 533 + }, + { + "epoch": 0.18819383259911895, + "grad_norm": 1.3324082350150075, + "learning_rate": 1.9894947398409516e-05, + "loss": 0.7182095646858215, + "step": 534 + }, + { + "epoch": 0.18854625550660792, + "grad_norm": 1.4808328503550712, + "learning_rate": 1.9894103403443265e-05, + "loss": 0.7546031475067139, + "step": 535 + }, + { + "epoch": 0.1888986784140969, + "grad_norm": 1.4412494852286755, + "learning_rate": 1.9893256049736824e-05, + "loss": 0.7083312273025513, + "step": 536 + }, + { + "epoch": 0.1892511013215859, + "grad_norm": 1.178231291011438, + "learning_rate": 1.9892405337577846e-05, + "loss": 0.5614915490150452, + "step": 537 + }, + { + "epoch": 0.1896035242290749, + "grad_norm": 1.440292679191453, + "learning_rate": 1.9891551267255114e-05, + "loss": 0.7647485733032227, + "step": 538 + }, + { + "epoch": 0.18995594713656389, + "grad_norm": 1.2459192275692494, + "learning_rate": 1.9890693839058566e-05, + "loss": 0.776042103767395, + "step": 539 + }, + { + "epoch": 0.19030837004405288, + "grad_norm": 1.4553672353845373, + "learning_rate": 1.9889833053279268e-05, + "loss": 0.7694810628890991, + "step": 540 + }, + { + "epoch": 0.19066079295154184, + "grad_norm": 1.3521577159056863, + "learning_rate": 1.9888968910209433e-05, + "loss": 0.6935995817184448, + "step": 541 + }, + { + "epoch": 0.19101321585903083, + "grad_norm": 1.7084028073476007, + "learning_rate": 1.988810141014241e-05, + "loss": 0.7538039088249207, + "step": 542 + }, + { + "epoch": 0.19136563876651982, + "grad_norm": 1.305324041919721, + "learning_rate": 1.9887230553372686e-05, + "loss": 0.8149158954620361, + "step": 543 + }, + { + "epoch": 0.19171806167400882, + "grad_norm": 1.6378712299065388, + "learning_rate": 1.988635634019589e-05, + "loss": 0.7776780128479004, + "step": 544 + }, + { + "epoch": 0.1920704845814978, + "grad_norm": 1.48919568324374, + "learning_rate": 1.9885478770908793e-05, + "loss": 0.8527307510375977, + "step": 545 + }, + { + "epoch": 0.19242290748898677, + "grad_norm": 1.1804269388923583, + "learning_rate": 1.98845978458093e-05, + "loss": 0.7239484190940857, + "step": 546 + }, + { + "epoch": 0.19277533039647576, + "grad_norm": 1.4096556213691402, + "learning_rate": 1.9883713565196462e-05, + "loss": 0.6937836408615112, + "step": 547 + }, + { + "epoch": 0.19312775330396476, + "grad_norm": 1.3705456771921078, + "learning_rate": 1.9882825929370456e-05, + "loss": 0.8567923903465271, + "step": 548 + }, + { + "epoch": 0.19348017621145375, + "grad_norm": 1.2851787163283013, + "learning_rate": 1.9881934938632615e-05, + "loss": 0.7948861122131348, + "step": 549 + }, + { + "epoch": 0.19383259911894274, + "grad_norm": 1.2883171824741761, + "learning_rate": 1.9881040593285398e-05, + "loss": 0.6808983087539673, + "step": 550 + }, + { + "epoch": 0.19418502202643173, + "grad_norm": 1.349988333670182, + "learning_rate": 1.9880142893632412e-05, + "loss": 0.9089908599853516, + "step": 551 + }, + { + "epoch": 0.1945374449339207, + "grad_norm": 1.373721278775904, + "learning_rate": 1.9879241839978393e-05, + "loss": 0.7947918176651001, + "step": 552 + }, + { + "epoch": 0.1948898678414097, + "grad_norm": 1.1679807856929723, + "learning_rate": 1.9878337432629224e-05, + "loss": 0.880418598651886, + "step": 553 + }, + { + "epoch": 0.19524229074889868, + "grad_norm": 1.4208435552970164, + "learning_rate": 1.9877429671891917e-05, + "loss": 0.8845832347869873, + "step": 554 + }, + { + "epoch": 0.19559471365638767, + "grad_norm": 1.4905396338040395, + "learning_rate": 1.9876518558074638e-05, + "loss": 0.7635341286659241, + "step": 555 + }, + { + "epoch": 0.19594713656387666, + "grad_norm": 1.3799865187677636, + "learning_rate": 1.9875604091486678e-05, + "loss": 0.9301069974899292, + "step": 556 + }, + { + "epoch": 0.19629955947136563, + "grad_norm": 1.255573262915276, + "learning_rate": 1.9874686272438467e-05, + "loss": 0.8788589239120483, + "step": 557 + }, + { + "epoch": 0.19665198237885462, + "grad_norm": 1.2418539833380446, + "learning_rate": 1.987376510124158e-05, + "loss": 0.7452565431594849, + "step": 558 + }, + { + "epoch": 0.1970044052863436, + "grad_norm": 1.5278788646328887, + "learning_rate": 1.9872840578208722e-05, + "loss": 0.819628119468689, + "step": 559 + }, + { + "epoch": 0.1973568281938326, + "grad_norm": 1.4844496784402743, + "learning_rate": 1.9871912703653744e-05, + "loss": 0.7807571291923523, + "step": 560 + }, + { + "epoch": 0.1977092511013216, + "grad_norm": 1.2611555469759475, + "learning_rate": 1.9870981477891626e-05, + "loss": 0.7091392278671265, + "step": 561 + }, + { + "epoch": 0.19806167400881058, + "grad_norm": 1.2433638561435678, + "learning_rate": 1.9870046901238496e-05, + "loss": 0.8174105882644653, + "step": 562 + }, + { + "epoch": 0.19841409691629955, + "grad_norm": 1.2352337461151273, + "learning_rate": 1.9869108974011607e-05, + "loss": 0.696865439414978, + "step": 563 + }, + { + "epoch": 0.19876651982378854, + "grad_norm": 1.4794543945089762, + "learning_rate": 1.986816769652936e-05, + "loss": 0.914303183555603, + "step": 564 + }, + { + "epoch": 0.19911894273127753, + "grad_norm": 1.2869835497381619, + "learning_rate": 1.986722306911129e-05, + "loss": 0.8397856950759888, + "step": 565 + }, + { + "epoch": 0.19947136563876652, + "grad_norm": 1.216082157504287, + "learning_rate": 1.9866275092078066e-05, + "loss": 0.7206380367279053, + "step": 566 + }, + { + "epoch": 0.19982378854625552, + "grad_norm": 1.3259146958291776, + "learning_rate": 1.98653237657515e-05, + "loss": 0.7017316818237305, + "step": 567 + }, + { + "epoch": 0.2001762114537445, + "grad_norm": 1.6365100661152858, + "learning_rate": 1.9864369090454538e-05, + "loss": 0.8797772526741028, + "step": 568 + }, + { + "epoch": 0.20052863436123347, + "grad_norm": 1.3948984288943356, + "learning_rate": 1.9863411066511257e-05, + "loss": 0.6643391847610474, + "step": 569 + }, + { + "epoch": 0.20088105726872246, + "grad_norm": 1.4631143705399865, + "learning_rate": 1.9862449694246878e-05, + "loss": 0.8662393093109131, + "step": 570 + }, + { + "epoch": 0.20123348017621145, + "grad_norm": 1.4103722629610054, + "learning_rate": 1.9861484973987762e-05, + "loss": 0.7766140699386597, + "step": 571 + }, + { + "epoch": 0.20158590308370045, + "grad_norm": 1.4422501075340284, + "learning_rate": 1.9860516906061397e-05, + "loss": 0.8582239151000977, + "step": 572 + }, + { + "epoch": 0.20193832599118944, + "grad_norm": 1.2359229208879663, + "learning_rate": 1.9859545490796414e-05, + "loss": 0.5838385820388794, + "step": 573 + }, + { + "epoch": 0.2022907488986784, + "grad_norm": 1.4256083108556754, + "learning_rate": 1.9858570728522573e-05, + "loss": 0.6715164184570312, + "step": 574 + }, + { + "epoch": 0.2026431718061674, + "grad_norm": 1.604413564730453, + "learning_rate": 1.9857592619570783e-05, + "loss": 0.7665218114852905, + "step": 575 + }, + { + "epoch": 0.20299559471365639, + "grad_norm": 1.3992633216102752, + "learning_rate": 1.985661116427308e-05, + "loss": 0.8060458898544312, + "step": 576 + }, + { + "epoch": 0.20334801762114538, + "grad_norm": 1.3647027340900928, + "learning_rate": 1.985562636296264e-05, + "loss": 0.8354060649871826, + "step": 577 + }, + { + "epoch": 0.20370044052863437, + "grad_norm": 1.61178503454425, + "learning_rate": 1.985463821597376e-05, + "loss": 0.8814351558685303, + "step": 578 + }, + { + "epoch": 0.20405286343612336, + "grad_norm": 1.3581614903846795, + "learning_rate": 1.9853646723641895e-05, + "loss": 0.9068918228149414, + "step": 579 + }, + { + "epoch": 0.20440528634361232, + "grad_norm": 1.4217775001953692, + "learning_rate": 1.9852651886303624e-05, + "loss": 0.7671997547149658, + "step": 580 + }, + { + "epoch": 0.20475770925110132, + "grad_norm": 1.2987191699893856, + "learning_rate": 1.9851653704296664e-05, + "loss": 0.7906886339187622, + "step": 581 + }, + { + "epoch": 0.2051101321585903, + "grad_norm": 1.4550942850887114, + "learning_rate": 1.985065217795987e-05, + "loss": 0.8424232006072998, + "step": 582 + }, + { + "epoch": 0.2054625550660793, + "grad_norm": 1.2767538498679667, + "learning_rate": 1.984964730763322e-05, + "loss": 0.8335819244384766, + "step": 583 + }, + { + "epoch": 0.2058149779735683, + "grad_norm": 1.2913652769028938, + "learning_rate": 1.9848639093657844e-05, + "loss": 0.8340694308280945, + "step": 584 + }, + { + "epoch": 0.20616740088105726, + "grad_norm": 1.3161255240413319, + "learning_rate": 1.9847627536376e-05, + "loss": 0.9228274822235107, + "step": 585 + }, + { + "epoch": 0.20651982378854625, + "grad_norm": 1.548405161064148, + "learning_rate": 1.984661263613107e-05, + "loss": 0.7843449115753174, + "step": 586 + }, + { + "epoch": 0.20687224669603524, + "grad_norm": 1.3039537503613003, + "learning_rate": 1.9845594393267594e-05, + "loss": 0.7411990165710449, + "step": 587 + }, + { + "epoch": 0.20722466960352423, + "grad_norm": 1.3644443695047568, + "learning_rate": 1.9844572808131228e-05, + "loss": 0.7520540356636047, + "step": 588 + }, + { + "epoch": 0.20757709251101322, + "grad_norm": 1.2894133104841217, + "learning_rate": 1.9843547881068763e-05, + "loss": 0.795365571975708, + "step": 589 + }, + { + "epoch": 0.20792951541850221, + "grad_norm": 1.280356655308606, + "learning_rate": 1.984251961242814e-05, + "loss": 0.8415528535842896, + "step": 590 + }, + { + "epoch": 0.20828193832599118, + "grad_norm": 1.4654647998731167, + "learning_rate": 1.9841488002558416e-05, + "loss": 0.8555570244789124, + "step": 591 + }, + { + "epoch": 0.20863436123348017, + "grad_norm": 1.314593410908928, + "learning_rate": 1.9840453051809792e-05, + "loss": 0.8214600086212158, + "step": 592 + }, + { + "epoch": 0.20898678414096916, + "grad_norm": 1.2598900623176714, + "learning_rate": 1.9839414760533607e-05, + "loss": 0.7746415138244629, + "step": 593 + }, + { + "epoch": 0.20933920704845815, + "grad_norm": 1.6285440778435663, + "learning_rate": 1.9838373129082325e-05, + "loss": 1.0861419439315796, + "step": 594 + }, + { + "epoch": 0.20969162995594715, + "grad_norm": 1.327372383451943, + "learning_rate": 1.9837328157809547e-05, + "loss": 0.7530953884124756, + "step": 595 + }, + { + "epoch": 0.21004405286343614, + "grad_norm": 1.420023169388647, + "learning_rate": 1.9836279847070004e-05, + "loss": 0.8811959624290466, + "step": 596 + }, + { + "epoch": 0.2103964757709251, + "grad_norm": 1.2274254083036087, + "learning_rate": 1.9835228197219573e-05, + "loss": 0.7956523299217224, + "step": 597 + }, + { + "epoch": 0.2107488986784141, + "grad_norm": 1.306015861681406, + "learning_rate": 1.9834173208615253e-05, + "loss": 0.8710414171218872, + "step": 598 + }, + { + "epoch": 0.21110132158590308, + "grad_norm": 1.303850147164254, + "learning_rate": 1.983311488161518e-05, + "loss": 0.9057297706604004, + "step": 599 + }, + { + "epoch": 0.21145374449339208, + "grad_norm": 1.2517049783711822, + "learning_rate": 1.983205321657862e-05, + "loss": 0.7531988024711609, + "step": 600 + }, + { + "epoch": 0.21180616740088107, + "grad_norm": 1.392455719061042, + "learning_rate": 1.983098821386598e-05, + "loss": 0.6508063077926636, + "step": 601 + }, + { + "epoch": 0.21215859030837003, + "grad_norm": 1.238668234857589, + "learning_rate": 1.9829919873838796e-05, + "loss": 0.7267025709152222, + "step": 602 + }, + { + "epoch": 0.21251101321585902, + "grad_norm": 1.2232739877442529, + "learning_rate": 1.9828848196859727e-05, + "loss": 0.6930510997772217, + "step": 603 + }, + { + "epoch": 0.21286343612334802, + "grad_norm": 1.4104259448916805, + "learning_rate": 1.9827773183292583e-05, + "loss": 0.7613120079040527, + "step": 604 + }, + { + "epoch": 0.213215859030837, + "grad_norm": 1.2586328753898472, + "learning_rate": 1.9826694833502295e-05, + "loss": 0.763299822807312, + "step": 605 + }, + { + "epoch": 0.213568281938326, + "grad_norm": 1.4431352363644856, + "learning_rate": 1.9825613147854928e-05, + "loss": 0.7599194049835205, + "step": 606 + }, + { + "epoch": 0.213920704845815, + "grad_norm": 1.3487971590690426, + "learning_rate": 1.9824528126717687e-05, + "loss": 0.869399905204773, + "step": 607 + }, + { + "epoch": 0.21427312775330395, + "grad_norm": 1.3853231700631432, + "learning_rate": 1.9823439770458893e-05, + "loss": 0.733409583568573, + "step": 608 + }, + { + "epoch": 0.21462555066079295, + "grad_norm": 1.2766333009964275, + "learning_rate": 1.9822348079448014e-05, + "loss": 0.8302386999130249, + "step": 609 + }, + { + "epoch": 0.21497797356828194, + "grad_norm": 1.1872454682531661, + "learning_rate": 1.9821253054055645e-05, + "loss": 0.8234561681747437, + "step": 610 + }, + { + "epoch": 0.21533039647577093, + "grad_norm": 1.336729476582052, + "learning_rate": 1.9820154694653514e-05, + "loss": 0.81988525390625, + "step": 611 + }, + { + "epoch": 0.21568281938325992, + "grad_norm": 1.1619766622665528, + "learning_rate": 1.9819053001614478e-05, + "loss": 0.6437678933143616, + "step": 612 + }, + { + "epoch": 0.21603524229074889, + "grad_norm": 1.398835884660331, + "learning_rate": 1.9817947975312527e-05, + "loss": 0.8256562948226929, + "step": 613 + }, + { + "epoch": 0.21638766519823788, + "grad_norm": 1.4423824320045469, + "learning_rate": 1.9816839616122787e-05, + "loss": 0.8204725980758667, + "step": 614 + }, + { + "epoch": 0.21674008810572687, + "grad_norm": 1.4648639859051293, + "learning_rate": 1.9815727924421507e-05, + "loss": 0.7492775917053223, + "step": 615 + }, + { + "epoch": 0.21709251101321586, + "grad_norm": 1.4585481343848268, + "learning_rate": 1.9814612900586075e-05, + "loss": 0.629736065864563, + "step": 616 + }, + { + "epoch": 0.21744493392070485, + "grad_norm": 1.3908853161597456, + "learning_rate": 1.9813494544995e-05, + "loss": 0.7974159717559814, + "step": 617 + }, + { + "epoch": 0.21779735682819384, + "grad_norm": 1.4158223772493663, + "learning_rate": 1.981237285802794e-05, + "loss": 0.8367668390274048, + "step": 618 + }, + { + "epoch": 0.2181497797356828, + "grad_norm": 1.2051753792883582, + "learning_rate": 1.9811247840065667e-05, + "loss": 0.7942521572113037, + "step": 619 + }, + { + "epoch": 0.2185022026431718, + "grad_norm": 1.3827277102573685, + "learning_rate": 1.981011949149009e-05, + "loss": 0.7863545417785645, + "step": 620 + }, + { + "epoch": 0.2188546255506608, + "grad_norm": 1.3809343727942922, + "learning_rate": 1.9808987812684247e-05, + "loss": 0.8667019605636597, + "step": 621 + }, + { + "epoch": 0.21920704845814978, + "grad_norm": 1.5738475739563456, + "learning_rate": 1.9807852804032306e-05, + "loss": 0.8555353283882141, + "step": 622 + }, + { + "epoch": 0.21955947136563878, + "grad_norm": 1.244926951925701, + "learning_rate": 1.9806714465919573e-05, + "loss": 0.8170013427734375, + "step": 623 + }, + { + "epoch": 0.21991189427312777, + "grad_norm": 1.331256668600172, + "learning_rate": 1.9805572798732475e-05, + "loss": 0.9277342557907104, + "step": 624 + }, + { + "epoch": 0.22026431718061673, + "grad_norm": 1.4090219105247375, + "learning_rate": 1.980442780285857e-05, + "loss": 0.6536964178085327, + "step": 625 + }, + { + "epoch": 0.22061674008810572, + "grad_norm": 1.4088256669280743, + "learning_rate": 1.980327947868655e-05, + "loss": 0.7197799682617188, + "step": 626 + }, + { + "epoch": 0.22096916299559471, + "grad_norm": 1.1381025512945977, + "learning_rate": 1.980212782660624e-05, + "loss": 0.7558401823043823, + "step": 627 + }, + { + "epoch": 0.2213215859030837, + "grad_norm": 1.4031284519802554, + "learning_rate": 1.9800972847008586e-05, + "loss": 0.7918291091918945, + "step": 628 + }, + { + "epoch": 0.2216740088105727, + "grad_norm": 1.4810910878326864, + "learning_rate": 1.979981454028567e-05, + "loss": 0.7159492373466492, + "step": 629 + }, + { + "epoch": 0.22202643171806166, + "grad_norm": 1.5418605472416471, + "learning_rate": 1.9798652906830694e-05, + "loss": 0.854686439037323, + "step": 630 + }, + { + "epoch": 0.22237885462555065, + "grad_norm": 1.6329149097762432, + "learning_rate": 1.9797487947038007e-05, + "loss": 0.736785888671875, + "step": 631 + }, + { + "epoch": 0.22273127753303965, + "grad_norm": 1.2749674694710476, + "learning_rate": 1.9796319661303065e-05, + "loss": 0.7092996835708618, + "step": 632 + }, + { + "epoch": 0.22308370044052864, + "grad_norm": 1.4592836621170417, + "learning_rate": 1.9795148050022477e-05, + "loss": 0.8890455961227417, + "step": 633 + }, + { + "epoch": 0.22343612334801763, + "grad_norm": 1.2618947600836363, + "learning_rate": 1.979397311359396e-05, + "loss": 0.7476855516433716, + "step": 634 + }, + { + "epoch": 0.22378854625550662, + "grad_norm": 1.4307363207113668, + "learning_rate": 1.979279485241637e-05, + "loss": 0.7810029983520508, + "step": 635 + }, + { + "epoch": 0.22414096916299558, + "grad_norm": 1.2070666788938549, + "learning_rate": 1.9791613266889688e-05, + "loss": 0.6679891347885132, + "step": 636 + }, + { + "epoch": 0.22449339207048458, + "grad_norm": 1.6320710320094325, + "learning_rate": 1.979042835741503e-05, + "loss": 0.809790849685669, + "step": 637 + }, + { + "epoch": 0.22484581497797357, + "grad_norm": 1.6737967848633384, + "learning_rate": 1.9789240124394638e-05, + "loss": 0.8347213268280029, + "step": 638 + }, + { + "epoch": 0.22519823788546256, + "grad_norm": 1.1935958187808327, + "learning_rate": 1.9788048568231875e-05, + "loss": 0.6620997190475464, + "step": 639 + }, + { + "epoch": 0.22555066079295155, + "grad_norm": 1.2898316066784317, + "learning_rate": 1.9786853689331235e-05, + "loss": 0.7727694511413574, + "step": 640 + }, + { + "epoch": 0.22590308370044052, + "grad_norm": 1.2854878709867101, + "learning_rate": 1.9785655488098348e-05, + "loss": 0.7433278560638428, + "step": 641 + }, + { + "epoch": 0.2262555066079295, + "grad_norm": 1.3523753090224933, + "learning_rate": 1.9784453964939966e-05, + "loss": 0.7375571727752686, + "step": 642 + }, + { + "epoch": 0.2266079295154185, + "grad_norm": 1.3285668366741343, + "learning_rate": 1.9783249120263962e-05, + "loss": 0.5838407874107361, + "step": 643 + }, + { + "epoch": 0.2269603524229075, + "grad_norm": 1.3906475095958148, + "learning_rate": 1.978204095447935e-05, + "loss": 0.7120088934898376, + "step": 644 + }, + { + "epoch": 0.22731277533039648, + "grad_norm": 1.5058740006044322, + "learning_rate": 1.9780829467996262e-05, + "loss": 0.7668102383613586, + "step": 645 + }, + { + "epoch": 0.22766519823788547, + "grad_norm": 1.2993959173766831, + "learning_rate": 1.977961466122596e-05, + "loss": 0.748942494392395, + "step": 646 + }, + { + "epoch": 0.22801762114537444, + "grad_norm": 1.476253609353715, + "learning_rate": 1.9778396534580836e-05, + "loss": 0.7569374442100525, + "step": 647 + }, + { + "epoch": 0.22837004405286343, + "grad_norm": 1.352884217242173, + "learning_rate": 1.97771750884744e-05, + "loss": 0.7981363534927368, + "step": 648 + }, + { + "epoch": 0.22872246696035242, + "grad_norm": 1.5069792289976334, + "learning_rate": 1.97759503233213e-05, + "loss": 0.7501301765441895, + "step": 649 + }, + { + "epoch": 0.2290748898678414, + "grad_norm": 1.4079968546467614, + "learning_rate": 1.9774722239537305e-05, + "loss": 0.7880003452301025, + "step": 650 + }, + { + "epoch": 0.2294273127753304, + "grad_norm": 1.3141024886679253, + "learning_rate": 1.977349083753931e-05, + "loss": 0.9007930755615234, + "step": 651 + }, + { + "epoch": 0.2297797356828194, + "grad_norm": 1.1634171776911992, + "learning_rate": 1.9772256117745335e-05, + "loss": 0.6291126012802124, + "step": 652 + }, + { + "epoch": 0.23013215859030836, + "grad_norm": 1.1487631323898542, + "learning_rate": 1.9771018080574534e-05, + "loss": 0.8155031204223633, + "step": 653 + }, + { + "epoch": 0.23048458149779735, + "grad_norm": 1.2941785819245946, + "learning_rate": 1.976977672644718e-05, + "loss": 0.7103240489959717, + "step": 654 + }, + { + "epoch": 0.23083700440528634, + "grad_norm": 1.4170836267106273, + "learning_rate": 1.9768532055784678e-05, + "loss": 0.8590278625488281, + "step": 655 + }, + { + "epoch": 0.23118942731277534, + "grad_norm": 1.6156852038452685, + "learning_rate": 1.9767284069009545e-05, + "loss": 0.7729001641273499, + "step": 656 + }, + { + "epoch": 0.23154185022026433, + "grad_norm": 1.543950265697803, + "learning_rate": 1.9766032766545445e-05, + "loss": 0.8287409543991089, + "step": 657 + }, + { + "epoch": 0.2318942731277533, + "grad_norm": 1.327581925526745, + "learning_rate": 1.9764778148817147e-05, + "loss": 0.8651477098464966, + "step": 658 + }, + { + "epoch": 0.23224669603524228, + "grad_norm": 1.3954780395501065, + "learning_rate": 1.976352021625056e-05, + "loss": 0.7582576274871826, + "step": 659 + }, + { + "epoch": 0.23259911894273128, + "grad_norm": 1.2510605377459358, + "learning_rate": 1.976225896927271e-05, + "loss": 0.6579675078392029, + "step": 660 + }, + { + "epoch": 0.23295154185022027, + "grad_norm": 1.4130234326235036, + "learning_rate": 1.9760994408311757e-05, + "loss": 0.8817700147628784, + "step": 661 + }, + { + "epoch": 0.23330396475770926, + "grad_norm": 1.3799441341137708, + "learning_rate": 1.9759726533796976e-05, + "loss": 0.7241606712341309, + "step": 662 + }, + { + "epoch": 0.23365638766519825, + "grad_norm": 1.2880787484904483, + "learning_rate": 1.9758455346158768e-05, + "loss": 0.7434183359146118, + "step": 663 + }, + { + "epoch": 0.23400881057268721, + "grad_norm": 1.3406860649308125, + "learning_rate": 1.9757180845828663e-05, + "loss": 0.632422685623169, + "step": 664 + }, + { + "epoch": 0.2343612334801762, + "grad_norm": 1.394213400542702, + "learning_rate": 1.9755903033239318e-05, + "loss": 0.7276040315628052, + "step": 665 + }, + { + "epoch": 0.2347136563876652, + "grad_norm": 1.4191729622512466, + "learning_rate": 1.975462190882451e-05, + "loss": 0.8070325255393982, + "step": 666 + }, + { + "epoch": 0.2350660792951542, + "grad_norm": 1.505939347053283, + "learning_rate": 1.9753337473019133e-05, + "loss": 0.867915689945221, + "step": 667 + }, + { + "epoch": 0.23541850220264318, + "grad_norm": 1.2080841146883634, + "learning_rate": 1.9752049726259223e-05, + "loss": 0.7905307412147522, + "step": 668 + }, + { + "epoch": 0.23577092511013215, + "grad_norm": 1.3166867899458456, + "learning_rate": 1.9750758668981925e-05, + "loss": 0.7721420526504517, + "step": 669 + }, + { + "epoch": 0.23612334801762114, + "grad_norm": 1.3746426458674128, + "learning_rate": 1.9749464301625515e-05, + "loss": 0.7926005125045776, + "step": 670 + }, + { + "epoch": 0.23647577092511013, + "grad_norm": 1.387001164209418, + "learning_rate": 1.974816662462939e-05, + "loss": 0.7651785612106323, + "step": 671 + }, + { + "epoch": 0.23682819383259912, + "grad_norm": 1.3285492717471519, + "learning_rate": 1.974686563843407e-05, + "loss": 0.7548795938491821, + "step": 672 + }, + { + "epoch": 0.2371806167400881, + "grad_norm": 1.256836928643264, + "learning_rate": 1.9745561343481197e-05, + "loss": 0.5405399799346924, + "step": 673 + }, + { + "epoch": 0.2375330396475771, + "grad_norm": 1.429166434081011, + "learning_rate": 1.9744253740213542e-05, + "loss": 0.7561137080192566, + "step": 674 + }, + { + "epoch": 0.23788546255506607, + "grad_norm": 1.2880562459402407, + "learning_rate": 1.9742942829074993e-05, + "loss": 0.8809534907341003, + "step": 675 + }, + { + "epoch": 0.23823788546255506, + "grad_norm": 1.4170174919214424, + "learning_rate": 1.974162861051057e-05, + "loss": 0.750350832939148, + "step": 676 + }, + { + "epoch": 0.23859030837004405, + "grad_norm": 1.629083058939835, + "learning_rate": 1.9740311084966398e-05, + "loss": 0.89476478099823, + "step": 677 + }, + { + "epoch": 0.23894273127753304, + "grad_norm": 1.2576348651951754, + "learning_rate": 1.9738990252889748e-05, + "loss": 0.8647176027297974, + "step": 678 + }, + { + "epoch": 0.23929515418502204, + "grad_norm": 1.4086313229573832, + "learning_rate": 1.9737666114728996e-05, + "loss": 0.7331727743148804, + "step": 679 + }, + { + "epoch": 0.239647577092511, + "grad_norm": 1.471872239566745, + "learning_rate": 1.9736338670933642e-05, + "loss": 0.7714364528656006, + "step": 680 + }, + { + "epoch": 0.24, + "grad_norm": 1.2246586432486557, + "learning_rate": 1.973500792195432e-05, + "loss": 0.7840908765792847, + "step": 681 + }, + { + "epoch": 0.24035242290748898, + "grad_norm": 1.5714154435783916, + "learning_rate": 1.9733673868242767e-05, + "loss": 0.8723878860473633, + "step": 682 + }, + { + "epoch": 0.24070484581497797, + "grad_norm": 1.3325473695906174, + "learning_rate": 1.9732336510251864e-05, + "loss": 0.782090425491333, + "step": 683 + }, + { + "epoch": 0.24105726872246697, + "grad_norm": 1.4114017797446734, + "learning_rate": 1.9730995848435594e-05, + "loss": 0.8000990152359009, + "step": 684 + }, + { + "epoch": 0.24140969162995596, + "grad_norm": 1.2098442989857856, + "learning_rate": 1.9729651883249075e-05, + "loss": 0.7499237060546875, + "step": 685 + }, + { + "epoch": 0.24176211453744492, + "grad_norm": 1.376086425817015, + "learning_rate": 1.972830461514854e-05, + "loss": 0.8786858916282654, + "step": 686 + }, + { + "epoch": 0.2421145374449339, + "grad_norm": 1.2058295584451697, + "learning_rate": 1.972695404459134e-05, + "loss": 0.7039557695388794, + "step": 687 + }, + { + "epoch": 0.2424669603524229, + "grad_norm": 1.2391412724176054, + "learning_rate": 1.9725600172035962e-05, + "loss": 0.6699448823928833, + "step": 688 + }, + { + "epoch": 0.2428193832599119, + "grad_norm": 1.4984585662906706, + "learning_rate": 1.9724242997941995e-05, + "loss": 0.6753977537155151, + "step": 689 + }, + { + "epoch": 0.2431718061674009, + "grad_norm": 1.465232022987203, + "learning_rate": 1.9722882522770163e-05, + "loss": 0.7139854431152344, + "step": 690 + }, + { + "epoch": 0.24352422907488988, + "grad_norm": 1.2814158831499989, + "learning_rate": 1.9721518746982296e-05, + "loss": 0.7894896864891052, + "step": 691 + }, + { + "epoch": 0.24387665198237884, + "grad_norm": 1.2615077213285395, + "learning_rate": 1.972015167104136e-05, + "loss": 0.5663755536079407, + "step": 692 + }, + { + "epoch": 0.24422907488986784, + "grad_norm": 2.058599574246893, + "learning_rate": 1.971878129541144e-05, + "loss": 0.8607856035232544, + "step": 693 + }, + { + "epoch": 0.24458149779735683, + "grad_norm": 1.351791839280567, + "learning_rate": 1.9717407620557724e-05, + "loss": 0.7384383678436279, + "step": 694 + }, + { + "epoch": 0.24493392070484582, + "grad_norm": 1.3580988060863546, + "learning_rate": 1.971603064694654e-05, + "loss": 0.6145502328872681, + "step": 695 + }, + { + "epoch": 0.2452863436123348, + "grad_norm": 1.216736398001555, + "learning_rate": 1.9714650375045328e-05, + "loss": 0.6758620738983154, + "step": 696 + }, + { + "epoch": 0.24563876651982378, + "grad_norm": 1.4471588548341505, + "learning_rate": 1.9713266805322643e-05, + "loss": 0.7416598200798035, + "step": 697 + }, + { + "epoch": 0.24599118942731277, + "grad_norm": 1.5476710427855191, + "learning_rate": 1.9711879938248163e-05, + "loss": 0.7603555917739868, + "step": 698 + }, + { + "epoch": 0.24634361233480176, + "grad_norm": 1.442293220466076, + "learning_rate": 1.9710489774292692e-05, + "loss": 0.9119949340820312, + "step": 699 + }, + { + "epoch": 0.24669603524229075, + "grad_norm": 1.3843099449438452, + "learning_rate": 1.9709096313928144e-05, + "loss": 0.6884537935256958, + "step": 700 + }, + { + "epoch": 0.24704845814977974, + "grad_norm": 1.618333940643818, + "learning_rate": 1.9707699557627554e-05, + "loss": 0.7928721904754639, + "step": 701 + }, + { + "epoch": 0.24740088105726873, + "grad_norm": 1.593414442103489, + "learning_rate": 1.970629950586508e-05, + "loss": 0.888218104839325, + "step": 702 + }, + { + "epoch": 0.2477533039647577, + "grad_norm": 1.484965940613647, + "learning_rate": 1.9704896159115997e-05, + "loss": 0.7949875593185425, + "step": 703 + }, + { + "epoch": 0.2481057268722467, + "grad_norm": 1.5094809465076762, + "learning_rate": 1.970348951785669e-05, + "loss": 0.9031823873519897, + "step": 704 + }, + { + "epoch": 0.24845814977973568, + "grad_norm": 1.4099687182713576, + "learning_rate": 1.9702079582564682e-05, + "loss": 0.636865496635437, + "step": 705 + }, + { + "epoch": 0.24881057268722467, + "grad_norm": 1.5392719282626255, + "learning_rate": 1.9700666353718593e-05, + "loss": 0.731717586517334, + "step": 706 + }, + { + "epoch": 0.24916299559471367, + "grad_norm": 1.5878589631749256, + "learning_rate": 1.9699249831798172e-05, + "loss": 0.7571220397949219, + "step": 707 + }, + { + "epoch": 0.24951541850220263, + "grad_norm": 1.5180992539956903, + "learning_rate": 1.969783001728429e-05, + "loss": 0.6112762689590454, + "step": 708 + }, + { + "epoch": 0.24986784140969162, + "grad_norm": 1.3651864060041954, + "learning_rate": 1.9696406910658918e-05, + "loss": 0.6737902164459229, + "step": 709 + }, + { + "epoch": 0.25022026431718064, + "grad_norm": 1.328645038543607, + "learning_rate": 1.9694980512405167e-05, + "loss": 0.6525848507881165, + "step": 710 + }, + { + "epoch": 0.2505726872246696, + "grad_norm": 1.302186292631501, + "learning_rate": 1.9693550823007248e-05, + "loss": 0.9107403755187988, + "step": 711 + }, + { + "epoch": 0.25092511013215857, + "grad_norm": 1.5423262639437814, + "learning_rate": 1.96921178429505e-05, + "loss": 0.7373934984207153, + "step": 712 + }, + { + "epoch": 0.25127753303964756, + "grad_norm": 1.4043304459804222, + "learning_rate": 1.9690681572721377e-05, + "loss": 0.6383399963378906, + "step": 713 + }, + { + "epoch": 0.25162995594713655, + "grad_norm": 1.3203935888344693, + "learning_rate": 1.9689242012807442e-05, + "loss": 0.6600236296653748, + "step": 714 + }, + { + "epoch": 0.25198237885462554, + "grad_norm": 1.6489156261044324, + "learning_rate": 1.9687799163697386e-05, + "loss": 0.9195891618728638, + "step": 715 + }, + { + "epoch": 0.25233480176211454, + "grad_norm": 1.300868905936819, + "learning_rate": 1.968635302588101e-05, + "loss": 0.7122433185577393, + "step": 716 + }, + { + "epoch": 0.2526872246696035, + "grad_norm": 1.467731789065586, + "learning_rate": 1.968490359984923e-05, + "loss": 0.7601606845855713, + "step": 717 + }, + { + "epoch": 0.2530396475770925, + "grad_norm": 1.2967441771844141, + "learning_rate": 1.9683450886094087e-05, + "loss": 0.8216352462768555, + "step": 718 + }, + { + "epoch": 0.2533920704845815, + "grad_norm": 1.4134852768930402, + "learning_rate": 1.9681994885108727e-05, + "loss": 0.8783165216445923, + "step": 719 + }, + { + "epoch": 0.2537444933920705, + "grad_norm": 1.5566095938184208, + "learning_rate": 1.9680535597387416e-05, + "loss": 0.7323269844055176, + "step": 720 + }, + { + "epoch": 0.2540969162995595, + "grad_norm": 1.6250423495927373, + "learning_rate": 1.9679073023425542e-05, + "loss": 0.93906170129776, + "step": 721 + }, + { + "epoch": 0.25444933920704843, + "grad_norm": 1.3857164700730882, + "learning_rate": 1.96776071637196e-05, + "loss": 0.774397611618042, + "step": 722 + }, + { + "epoch": 0.2548017621145374, + "grad_norm": 1.3653604324598565, + "learning_rate": 1.9676138018767204e-05, + "loss": 0.6634535789489746, + "step": 723 + }, + { + "epoch": 0.2551541850220264, + "grad_norm": 1.3364894441034205, + "learning_rate": 1.9674665589067082e-05, + "loss": 0.7705625295639038, + "step": 724 + }, + { + "epoch": 0.2555066079295154, + "grad_norm": 1.5708708799323368, + "learning_rate": 1.9673189875119082e-05, + "loss": 0.706364631652832, + "step": 725 + }, + { + "epoch": 0.2558590308370044, + "grad_norm": 1.2599963014034798, + "learning_rate": 1.9671710877424158e-05, + "loss": 0.7295894622802734, + "step": 726 + }, + { + "epoch": 0.2562114537444934, + "grad_norm": 1.6926806599843667, + "learning_rate": 1.9670228596484383e-05, + "loss": 0.8135089874267578, + "step": 727 + }, + { + "epoch": 0.2565638766519824, + "grad_norm": 1.5978181657651334, + "learning_rate": 1.966874303280295e-05, + "loss": 0.801734209060669, + "step": 728 + }, + { + "epoch": 0.2569162995594714, + "grad_norm": 1.728546952239603, + "learning_rate": 1.9667254186884164e-05, + "loss": 0.8405104875564575, + "step": 729 + }, + { + "epoch": 0.25726872246696036, + "grad_norm": 1.2523029350782668, + "learning_rate": 1.9665762059233434e-05, + "loss": 0.8320014476776123, + "step": 730 + }, + { + "epoch": 0.25762114537444936, + "grad_norm": 1.2667340666882572, + "learning_rate": 1.96642666503573e-05, + "loss": 0.8701308965682983, + "step": 731 + }, + { + "epoch": 0.25797356828193835, + "grad_norm": 1.1982399130470203, + "learning_rate": 1.9662767960763394e-05, + "loss": 0.7980693578720093, + "step": 732 + }, + { + "epoch": 0.25832599118942734, + "grad_norm": 1.3765503313855298, + "learning_rate": 1.9661265990960486e-05, + "loss": 0.7258214950561523, + "step": 733 + }, + { + "epoch": 0.2586784140969163, + "grad_norm": 1.1683887680739682, + "learning_rate": 1.9659760741458444e-05, + "loss": 0.6860172748565674, + "step": 734 + }, + { + "epoch": 0.25903083700440527, + "grad_norm": 1.4034749748766104, + "learning_rate": 1.9658252212768252e-05, + "loss": 0.7438071370124817, + "step": 735 + }, + { + "epoch": 0.25938325991189426, + "grad_norm": 1.6140837506314978, + "learning_rate": 1.9656740405402007e-05, + "loss": 0.8680309057235718, + "step": 736 + }, + { + "epoch": 0.25973568281938325, + "grad_norm": 1.5365221656010954, + "learning_rate": 1.9655225319872925e-05, + "loss": 0.933163046836853, + "step": 737 + }, + { + "epoch": 0.26008810572687224, + "grad_norm": 1.3636194628802456, + "learning_rate": 1.9653706956695333e-05, + "loss": 0.8746597170829773, + "step": 738 + }, + { + "epoch": 0.26044052863436123, + "grad_norm": 1.31799671460777, + "learning_rate": 1.965218531638466e-05, + "loss": 0.857211709022522, + "step": 739 + }, + { + "epoch": 0.2607929515418502, + "grad_norm": 1.313241643085953, + "learning_rate": 1.965066039945746e-05, + "loss": 0.7837733030319214, + "step": 740 + }, + { + "epoch": 0.2611453744493392, + "grad_norm": 1.3527479757495662, + "learning_rate": 1.9649132206431395e-05, + "loss": 0.8401491641998291, + "step": 741 + }, + { + "epoch": 0.2614977973568282, + "grad_norm": 1.14302378839197, + "learning_rate": 1.9647600737825235e-05, + "loss": 0.7070307731628418, + "step": 742 + }, + { + "epoch": 0.2618502202643172, + "grad_norm": 1.756317003631787, + "learning_rate": 1.9646065994158873e-05, + "loss": 0.7649509310722351, + "step": 743 + }, + { + "epoch": 0.2622026431718062, + "grad_norm": 1.5152987231460182, + "learning_rate": 1.9644527975953302e-05, + "loss": 0.7759182453155518, + "step": 744 + }, + { + "epoch": 0.26255506607929513, + "grad_norm": 1.5151017458848213, + "learning_rate": 1.9642986683730626e-05, + "loss": 0.8176295757293701, + "step": 745 + }, + { + "epoch": 0.2629074889867841, + "grad_norm": 1.2974538182792636, + "learning_rate": 1.9641442118014078e-05, + "loss": 0.8406162261962891, + "step": 746 + }, + { + "epoch": 0.2632599118942731, + "grad_norm": 1.3410871141615202, + "learning_rate": 1.9639894279327985e-05, + "loss": 0.8064795732498169, + "step": 747 + }, + { + "epoch": 0.2636123348017621, + "grad_norm": 1.2769637989850176, + "learning_rate": 1.9638343168197784e-05, + "loss": 0.6662956476211548, + "step": 748 + }, + { + "epoch": 0.2639647577092511, + "grad_norm": 1.5105008685571195, + "learning_rate": 1.9636788785150037e-05, + "loss": 0.8747783899307251, + "step": 749 + }, + { + "epoch": 0.2643171806167401, + "grad_norm": 1.4261291763421449, + "learning_rate": 1.9635231130712406e-05, + "loss": 0.7893349528312683, + "step": 750 + }, + { + "epoch": 0.2646696035242291, + "grad_norm": 1.2907133964100823, + "learning_rate": 1.9633670205413665e-05, + "loss": 0.7380903959274292, + "step": 751 + }, + { + "epoch": 0.26502202643171807, + "grad_norm": 1.5293000163357584, + "learning_rate": 1.96321060097837e-05, + "loss": 0.9164873957633972, + "step": 752 + }, + { + "epoch": 0.26537444933920706, + "grad_norm": 1.5448314355627197, + "learning_rate": 1.9630538544353505e-05, + "loss": 0.7664264440536499, + "step": 753 + }, + { + "epoch": 0.26572687224669606, + "grad_norm": 1.4037068281656377, + "learning_rate": 1.9628967809655187e-05, + "loss": 0.8117275238037109, + "step": 754 + }, + { + "epoch": 0.26607929515418505, + "grad_norm": 1.3044642797371147, + "learning_rate": 1.9627393806221967e-05, + "loss": 0.6203808784484863, + "step": 755 + }, + { + "epoch": 0.266431718061674, + "grad_norm": 1.5462507455011187, + "learning_rate": 1.9625816534588163e-05, + "loss": 0.8777878284454346, + "step": 756 + }, + { + "epoch": 0.266784140969163, + "grad_norm": 1.2883365910622429, + "learning_rate": 1.9624235995289212e-05, + "loss": 0.6984438300132751, + "step": 757 + }, + { + "epoch": 0.26713656387665197, + "grad_norm": 1.5746997664717406, + "learning_rate": 1.962265218886166e-05, + "loss": 0.7806228399276733, + "step": 758 + }, + { + "epoch": 0.26748898678414096, + "grad_norm": 1.318579751564355, + "learning_rate": 1.9621065115843155e-05, + "loss": 0.6924373507499695, + "step": 759 + }, + { + "epoch": 0.26784140969162995, + "grad_norm": 1.2867883287922122, + "learning_rate": 1.9619474776772462e-05, + "loss": 0.6809841394424438, + "step": 760 + }, + { + "epoch": 0.26819383259911894, + "grad_norm": 1.3766475304418688, + "learning_rate": 1.961788117218945e-05, + "loss": 0.8346723318099976, + "step": 761 + }, + { + "epoch": 0.26854625550660793, + "grad_norm": 1.3717126814625271, + "learning_rate": 1.96162843026351e-05, + "loss": 0.8000205755233765, + "step": 762 + }, + { + "epoch": 0.2688986784140969, + "grad_norm": 1.256040752163899, + "learning_rate": 1.9614684168651504e-05, + "loss": 0.8026692271232605, + "step": 763 + }, + { + "epoch": 0.2692511013215859, + "grad_norm": 1.4850412299335856, + "learning_rate": 1.961308077078185e-05, + "loss": 0.921292781829834, + "step": 764 + }, + { + "epoch": 0.2696035242290749, + "grad_norm": 1.6577133714061814, + "learning_rate": 1.9611474109570446e-05, + "loss": 0.8018487095832825, + "step": 765 + }, + { + "epoch": 0.2699559471365639, + "grad_norm": 1.2440268554728864, + "learning_rate": 1.9609864185562698e-05, + "loss": 0.7400588989257812, + "step": 766 + }, + { + "epoch": 0.27030837004405284, + "grad_norm": 1.2094824954459686, + "learning_rate": 1.960825099930513e-05, + "loss": 0.6243399977684021, + "step": 767 + }, + { + "epoch": 0.27066079295154183, + "grad_norm": 1.415024134390762, + "learning_rate": 1.9606634551345373e-05, + "loss": 0.7680903673171997, + "step": 768 + }, + { + "epoch": 0.2710132158590308, + "grad_norm": 1.3126349106428246, + "learning_rate": 1.960501484223215e-05, + "loss": 0.8783930540084839, + "step": 769 + }, + { + "epoch": 0.2713656387665198, + "grad_norm": 1.4964756858010921, + "learning_rate": 1.9603391872515308e-05, + "loss": 0.7910561561584473, + "step": 770 + }, + { + "epoch": 0.2717180616740088, + "grad_norm": 1.4400527227532898, + "learning_rate": 1.9601765642745795e-05, + "loss": 0.7325295209884644, + "step": 771 + }, + { + "epoch": 0.2720704845814978, + "grad_norm": 1.3018158119605838, + "learning_rate": 1.9600136153475666e-05, + "loss": 0.7017170190811157, + "step": 772 + }, + { + "epoch": 0.2724229074889868, + "grad_norm": 1.5395904311410002, + "learning_rate": 1.959850340525808e-05, + "loss": 0.9281908273696899, + "step": 773 + }, + { + "epoch": 0.2727753303964758, + "grad_norm": 1.256408104414643, + "learning_rate": 1.95968673986473e-05, + "loss": 0.7421029806137085, + "step": 774 + }, + { + "epoch": 0.27312775330396477, + "grad_norm": 1.3171523536350294, + "learning_rate": 1.9595228134198708e-05, + "loss": 0.7474848031997681, + "step": 775 + }, + { + "epoch": 0.27348017621145376, + "grad_norm": 1.3683438241049553, + "learning_rate": 1.9593585612468776e-05, + "loss": 0.7267760038375854, + "step": 776 + }, + { + "epoch": 0.27383259911894275, + "grad_norm": 1.4883233103137832, + "learning_rate": 1.9591939834015096e-05, + "loss": 0.739683985710144, + "step": 777 + }, + { + "epoch": 0.2741850220264317, + "grad_norm": 1.2437408403604437, + "learning_rate": 1.9590290799396353e-05, + "loss": 0.6615399122238159, + "step": 778 + }, + { + "epoch": 0.2745374449339207, + "grad_norm": 1.5863201035209105, + "learning_rate": 1.9588638509172343e-05, + "loss": 0.8045977354049683, + "step": 779 + }, + { + "epoch": 0.2748898678414097, + "grad_norm": 1.5522608295626732, + "learning_rate": 1.958698296390397e-05, + "loss": 0.8760169744491577, + "step": 780 + }, + { + "epoch": 0.27524229074889867, + "grad_norm": 1.5297761597873432, + "learning_rate": 1.9585324164153236e-05, + "loss": 0.6676662564277649, + "step": 781 + }, + { + "epoch": 0.27559471365638766, + "grad_norm": 1.1706549585314092, + "learning_rate": 1.958366211048326e-05, + "loss": 0.6650630235671997, + "step": 782 + }, + { + "epoch": 0.27594713656387665, + "grad_norm": 1.157826702613003, + "learning_rate": 1.9581996803458248e-05, + "loss": 0.7399466037750244, + "step": 783 + }, + { + "epoch": 0.27629955947136564, + "grad_norm": 1.316772401506473, + "learning_rate": 1.9580328243643528e-05, + "loss": 0.6121753454208374, + "step": 784 + }, + { + "epoch": 0.27665198237885463, + "grad_norm": 1.334268754223845, + "learning_rate": 1.9578656431605515e-05, + "loss": 0.8562870025634766, + "step": 785 + }, + { + "epoch": 0.2770044052863436, + "grad_norm": 1.441421130314338, + "learning_rate": 1.9576981367911746e-05, + "loss": 0.717842161655426, + "step": 786 + }, + { + "epoch": 0.2773568281938326, + "grad_norm": 1.582720247126145, + "learning_rate": 1.9575303053130847e-05, + "loss": 0.802294135093689, + "step": 787 + }, + { + "epoch": 0.2777092511013216, + "grad_norm": 1.2234390313515955, + "learning_rate": 1.957362148783256e-05, + "loss": 0.6636664867401123, + "step": 788 + }, + { + "epoch": 0.2780616740088106, + "grad_norm": 1.3850288575091645, + "learning_rate": 1.9571936672587718e-05, + "loss": 0.7177780866622925, + "step": 789 + }, + { + "epoch": 0.27841409691629954, + "grad_norm": 1.6181532263095582, + "learning_rate": 1.957024860796826e-05, + "loss": 0.8263623714447021, + "step": 790 + }, + { + "epoch": 0.27876651982378853, + "grad_norm": 1.4470113515398348, + "learning_rate": 1.9568557294547244e-05, + "loss": 0.7620534896850586, + "step": 791 + }, + { + "epoch": 0.2791189427312775, + "grad_norm": 1.5101791429302596, + "learning_rate": 1.956686273289881e-05, + "loss": 0.812814474105835, + "step": 792 + }, + { + "epoch": 0.2794713656387665, + "grad_norm": 1.2812553609430264, + "learning_rate": 1.956516492359821e-05, + "loss": 0.6494747400283813, + "step": 793 + }, + { + "epoch": 0.2798237885462555, + "grad_norm": 1.3413487769011592, + "learning_rate": 1.9563463867221793e-05, + "loss": 0.7152044773101807, + "step": 794 + }, + { + "epoch": 0.2801762114537445, + "grad_norm": 1.5718962936709213, + "learning_rate": 1.956175956434702e-05, + "loss": 0.7607219815254211, + "step": 795 + }, + { + "epoch": 0.2805286343612335, + "grad_norm": 1.4483911078118432, + "learning_rate": 1.9560052015552455e-05, + "loss": 0.8793845176696777, + "step": 796 + }, + { + "epoch": 0.2808810572687225, + "grad_norm": 1.509282266928049, + "learning_rate": 1.9558341221417744e-05, + "loss": 0.8314816951751709, + "step": 797 + }, + { + "epoch": 0.28123348017621147, + "grad_norm": 1.2634078342185056, + "learning_rate": 1.9556627182523656e-05, + "loss": 0.8195264339447021, + "step": 798 + }, + { + "epoch": 0.28158590308370046, + "grad_norm": 1.2808718319688324, + "learning_rate": 1.9554909899452055e-05, + "loss": 0.8079999685287476, + "step": 799 + }, + { + "epoch": 0.28193832599118945, + "grad_norm": 1.6867283155032318, + "learning_rate": 1.9553189372785903e-05, + "loss": 0.7614034414291382, + "step": 800 + }, + { + "epoch": 0.2822907488986784, + "grad_norm": 1.13179227916607, + "learning_rate": 1.9551465603109263e-05, + "loss": 0.6271458268165588, + "step": 801 + }, + { + "epoch": 0.2826431718061674, + "grad_norm": 1.5872328004173855, + "learning_rate": 1.9549738591007302e-05, + "loss": 0.8061915040016174, + "step": 802 + }, + { + "epoch": 0.2829955947136564, + "grad_norm": 1.3494758196376249, + "learning_rate": 1.9548008337066294e-05, + "loss": 0.663912296295166, + "step": 803 + }, + { + "epoch": 0.28334801762114536, + "grad_norm": 1.503624867364233, + "learning_rate": 1.9546274841873597e-05, + "loss": 0.7582170963287354, + "step": 804 + }, + { + "epoch": 0.28370044052863436, + "grad_norm": 1.3181032025931252, + "learning_rate": 1.9544538106017682e-05, + "loss": 0.7855465412139893, + "step": 805 + }, + { + "epoch": 0.28405286343612335, + "grad_norm": 1.4462567272335825, + "learning_rate": 1.9542798130088116e-05, + "loss": 0.6976481676101685, + "step": 806 + }, + { + "epoch": 0.28440528634361234, + "grad_norm": 1.8291330426153005, + "learning_rate": 1.954105491467557e-05, + "loss": 0.7678342461585999, + "step": 807 + }, + { + "epoch": 0.28475770925110133, + "grad_norm": 1.2407582020259869, + "learning_rate": 1.9539308460371812e-05, + "loss": 0.6238858699798584, + "step": 808 + }, + { + "epoch": 0.2851101321585903, + "grad_norm": 1.239785831064125, + "learning_rate": 1.95375587677697e-05, + "loss": 0.7756681442260742, + "step": 809 + }, + { + "epoch": 0.2854625550660793, + "grad_norm": 1.462836813794646, + "learning_rate": 1.953580583746321e-05, + "loss": 0.8908202648162842, + "step": 810 + }, + { + "epoch": 0.2858149779735683, + "grad_norm": 1.1795831445984086, + "learning_rate": 1.9534049670047402e-05, + "loss": 0.6769838929176331, + "step": 811 + }, + { + "epoch": 0.28616740088105724, + "grad_norm": 1.2674126564024601, + "learning_rate": 1.953229026611844e-05, + "loss": 0.8452527523040771, + "step": 812 + }, + { + "epoch": 0.28651982378854624, + "grad_norm": 1.1830287106246784, + "learning_rate": 1.9530527626273592e-05, + "loss": 0.7494348287582397, + "step": 813 + }, + { + "epoch": 0.2868722466960352, + "grad_norm": 1.399665317775642, + "learning_rate": 1.9528761751111215e-05, + "loss": 0.7691028714179993, + "step": 814 + }, + { + "epoch": 0.2872246696035242, + "grad_norm": 1.2077153417445161, + "learning_rate": 1.9526992641230768e-05, + "loss": 0.6854703426361084, + "step": 815 + }, + { + "epoch": 0.2875770925110132, + "grad_norm": 1.4832887577463363, + "learning_rate": 1.9525220297232815e-05, + "loss": 0.7520424127578735, + "step": 816 + }, + { + "epoch": 0.2879295154185022, + "grad_norm": 1.499896401145914, + "learning_rate": 1.9523444719719003e-05, + "loss": 0.7894444465637207, + "step": 817 + }, + { + "epoch": 0.2882819383259912, + "grad_norm": 1.4246285223246848, + "learning_rate": 1.952166590929209e-05, + "loss": 0.7835032939910889, + "step": 818 + }, + { + "epoch": 0.2886343612334802, + "grad_norm": 1.4284322886298129, + "learning_rate": 1.9519883866555928e-05, + "loss": 0.7932062149047852, + "step": 819 + }, + { + "epoch": 0.2889867841409692, + "grad_norm": 1.3689283839888975, + "learning_rate": 1.951809859211546e-05, + "loss": 0.7917006015777588, + "step": 820 + }, + { + "epoch": 0.28933920704845817, + "grad_norm": 1.1579342690806749, + "learning_rate": 1.9516310086576734e-05, + "loss": 0.5330606698989868, + "step": 821 + }, + { + "epoch": 0.28969162995594716, + "grad_norm": 1.3284680872988386, + "learning_rate": 1.9514518350546893e-05, + "loss": 0.7243788242340088, + "step": 822 + }, + { + "epoch": 0.2900440528634361, + "grad_norm": 1.5494348453743318, + "learning_rate": 1.9512723384634175e-05, + "loss": 0.7692278623580933, + "step": 823 + }, + { + "epoch": 0.2903964757709251, + "grad_norm": 1.4784711521599085, + "learning_rate": 1.9510925189447916e-05, + "loss": 0.7537804841995239, + "step": 824 + }, + { + "epoch": 0.2907488986784141, + "grad_norm": 1.6724318756605505, + "learning_rate": 1.9509123765598545e-05, + "loss": 0.9168751239776611, + "step": 825 + }, + { + "epoch": 0.2911013215859031, + "grad_norm": 1.3269710873120673, + "learning_rate": 1.9507319113697592e-05, + "loss": 0.7863682508468628, + "step": 826 + }, + { + "epoch": 0.29145374449339206, + "grad_norm": 1.3629585622585614, + "learning_rate": 1.9505511234357677e-05, + "loss": 0.7119239568710327, + "step": 827 + }, + { + "epoch": 0.29180616740088106, + "grad_norm": 1.3362093903256012, + "learning_rate": 1.950370012819252e-05, + "loss": 0.6071019172668457, + "step": 828 + }, + { + "epoch": 0.29215859030837005, + "grad_norm": 1.479432309492857, + "learning_rate": 1.9501885795816937e-05, + "loss": 0.9750580787658691, + "step": 829 + }, + { + "epoch": 0.29251101321585904, + "grad_norm": 1.3797663030734688, + "learning_rate": 1.9500068237846837e-05, + "loss": 0.7465370297431946, + "step": 830 + }, + { + "epoch": 0.29286343612334803, + "grad_norm": 1.3385246660479724, + "learning_rate": 1.949824745489922e-05, + "loss": 0.7821183204650879, + "step": 831 + }, + { + "epoch": 0.293215859030837, + "grad_norm": 1.4671979426722186, + "learning_rate": 1.949642344759219e-05, + "loss": 0.7555009126663208, + "step": 832 + }, + { + "epoch": 0.293568281938326, + "grad_norm": 1.4661056896012497, + "learning_rate": 1.9494596216544942e-05, + "loss": 0.841058075428009, + "step": 833 + }, + { + "epoch": 0.29392070484581495, + "grad_norm": 1.4108089015351581, + "learning_rate": 1.9492765762377762e-05, + "loss": 0.737910270690918, + "step": 834 + }, + { + "epoch": 0.29427312775330394, + "grad_norm": 1.430769526790491, + "learning_rate": 1.9490932085712027e-05, + "loss": 0.6817367076873779, + "step": 835 + }, + { + "epoch": 0.29462555066079293, + "grad_norm": 1.4513808156166068, + "learning_rate": 1.9489095187170218e-05, + "loss": 0.6739218235015869, + "step": 836 + }, + { + "epoch": 0.2949779735682819, + "grad_norm": 1.3318980758968664, + "learning_rate": 1.9487255067375907e-05, + "loss": 0.8632504940032959, + "step": 837 + }, + { + "epoch": 0.2953303964757709, + "grad_norm": 1.4421404890889282, + "learning_rate": 1.9485411726953753e-05, + "loss": 0.6615850925445557, + "step": 838 + }, + { + "epoch": 0.2956828193832599, + "grad_norm": 1.5159755088266125, + "learning_rate": 1.9483565166529515e-05, + "loss": 0.8647087812423706, + "step": 839 + }, + { + "epoch": 0.2960352422907489, + "grad_norm": 1.0418453596772383, + "learning_rate": 1.9481715386730044e-05, + "loss": 0.5152087807655334, + "step": 840 + }, + { + "epoch": 0.2963876651982379, + "grad_norm": 1.265898832717726, + "learning_rate": 1.9479862388183283e-05, + "loss": 0.7942806482315063, + "step": 841 + }, + { + "epoch": 0.2967400881057269, + "grad_norm": 1.4728087242398047, + "learning_rate": 1.947800617151826e-05, + "loss": 0.6364283561706543, + "step": 842 + }, + { + "epoch": 0.2970925110132159, + "grad_norm": 1.322764194763318, + "learning_rate": 1.9476146737365112e-05, + "loss": 0.8278179168701172, + "step": 843 + }, + { + "epoch": 0.29744493392070487, + "grad_norm": 1.4629329991948483, + "learning_rate": 1.9474284086355057e-05, + "loss": 0.7369956970214844, + "step": 844 + }, + { + "epoch": 0.29779735682819386, + "grad_norm": 1.314820640789224, + "learning_rate": 1.9472418219120403e-05, + "loss": 0.6879928112030029, + "step": 845 + }, + { + "epoch": 0.2981497797356828, + "grad_norm": 1.357319714737328, + "learning_rate": 1.9470549136294554e-05, + "loss": 0.8312973976135254, + "step": 846 + }, + { + "epoch": 0.2985022026431718, + "grad_norm": 1.3920317025034097, + "learning_rate": 1.946867683851201e-05, + "loss": 0.8102964162826538, + "step": 847 + }, + { + "epoch": 0.2988546255506608, + "grad_norm": 1.502648854525568, + "learning_rate": 1.9466801326408355e-05, + "loss": 0.6136792898178101, + "step": 848 + }, + { + "epoch": 0.29920704845814977, + "grad_norm": 1.4028028409959845, + "learning_rate": 1.946492260062027e-05, + "loss": 0.6388760805130005, + "step": 849 + }, + { + "epoch": 0.29955947136563876, + "grad_norm": 1.2376129930975477, + "learning_rate": 1.9463040661785516e-05, + "loss": 0.6443628072738647, + "step": 850 + }, + { + "epoch": 0.29991189427312775, + "grad_norm": 1.4811436209967876, + "learning_rate": 1.9461155510542962e-05, + "loss": 0.7763667702674866, + "step": 851 + }, + { + "epoch": 0.30026431718061675, + "grad_norm": 1.3770267697185403, + "learning_rate": 1.9459267147532555e-05, + "loss": 0.8040921688079834, + "step": 852 + }, + { + "epoch": 0.30061674008810574, + "grad_norm": 1.5848801035694915, + "learning_rate": 1.9457375573395334e-05, + "loss": 0.6271079778671265, + "step": 853 + }, + { + "epoch": 0.30096916299559473, + "grad_norm": 1.4490523944014555, + "learning_rate": 1.945548078877343e-05, + "loss": 0.6970022916793823, + "step": 854 + }, + { + "epoch": 0.3013215859030837, + "grad_norm": 1.5989299247861681, + "learning_rate": 1.9453582794310063e-05, + "loss": 0.8283002972602844, + "step": 855 + }, + { + "epoch": 0.3016740088105727, + "grad_norm": 1.3183590658260465, + "learning_rate": 1.9451681590649545e-05, + "loss": 0.7989551424980164, + "step": 856 + }, + { + "epoch": 0.30202643171806165, + "grad_norm": 1.6791390781024222, + "learning_rate": 1.9449777178437274e-05, + "loss": 0.7000687122344971, + "step": 857 + }, + { + "epoch": 0.30237885462555064, + "grad_norm": 1.8022925697135672, + "learning_rate": 1.944786955831974e-05, + "loss": 0.8005126714706421, + "step": 858 + }, + { + "epoch": 0.30273127753303963, + "grad_norm": 1.4999207234472591, + "learning_rate": 1.9445958730944515e-05, + "loss": 0.7060712575912476, + "step": 859 + }, + { + "epoch": 0.3030837004405286, + "grad_norm": 1.4072429406012825, + "learning_rate": 1.9444044696960277e-05, + "loss": 0.6979726552963257, + "step": 860 + }, + { + "epoch": 0.3034361233480176, + "grad_norm": 1.4515999764557612, + "learning_rate": 1.9442127457016768e-05, + "loss": 0.7916465401649475, + "step": 861 + }, + { + "epoch": 0.3037885462555066, + "grad_norm": 1.1660322947657744, + "learning_rate": 1.944020701176484e-05, + "loss": 0.6980502009391785, + "step": 862 + }, + { + "epoch": 0.3041409691629956, + "grad_norm": 1.7481448087127538, + "learning_rate": 1.943828336185642e-05, + "loss": 0.8479218482971191, + "step": 863 + }, + { + "epoch": 0.3044933920704846, + "grad_norm": 1.6916771358958562, + "learning_rate": 1.9436356507944532e-05, + "loss": 0.8374297022819519, + "step": 864 + }, + { + "epoch": 0.3048458149779736, + "grad_norm": 1.3059238044039985, + "learning_rate": 1.943442645068328e-05, + "loss": 0.6871248483657837, + "step": 865 + }, + { + "epoch": 0.3051982378854626, + "grad_norm": 1.4668202087885096, + "learning_rate": 1.9432493190727854e-05, + "loss": 0.92267906665802, + "step": 866 + }, + { + "epoch": 0.30555066079295157, + "grad_norm": 1.7147503992363287, + "learning_rate": 1.9430556728734543e-05, + "loss": 0.7068654298782349, + "step": 867 + }, + { + "epoch": 0.3059030837004405, + "grad_norm": 1.354783765213683, + "learning_rate": 1.942861706536071e-05, + "loss": 0.830272912979126, + "step": 868 + }, + { + "epoch": 0.3062555066079295, + "grad_norm": 1.5223972366721212, + "learning_rate": 1.9426674201264814e-05, + "loss": 0.7996113300323486, + "step": 869 + }, + { + "epoch": 0.3066079295154185, + "grad_norm": 1.7576483396811688, + "learning_rate": 1.9424728137106398e-05, + "loss": 0.7519441843032837, + "step": 870 + }, + { + "epoch": 0.3069603524229075, + "grad_norm": 1.92300313533063, + "learning_rate": 1.9422778873546084e-05, + "loss": 0.5812790393829346, + "step": 871 + }, + { + "epoch": 0.30731277533039647, + "grad_norm": 1.058437605318741, + "learning_rate": 1.9420826411245595e-05, + "loss": 0.5953323841094971, + "step": 872 + }, + { + "epoch": 0.30766519823788546, + "grad_norm": 1.3954802825469448, + "learning_rate": 1.941887075086772e-05, + "loss": 0.8307937979698181, + "step": 873 + }, + { + "epoch": 0.30801762114537445, + "grad_norm": 1.5122839417773277, + "learning_rate": 1.9416911893076358e-05, + "loss": 0.7753443121910095, + "step": 874 + }, + { + "epoch": 0.30837004405286345, + "grad_norm": 1.3848386830658772, + "learning_rate": 1.9414949838536468e-05, + "loss": 0.8803520798683167, + "step": 875 + }, + { + "epoch": 0.30872246696035244, + "grad_norm": 1.3111930018969615, + "learning_rate": 1.9412984587914115e-05, + "loss": 0.6811587810516357, + "step": 876 + }, + { + "epoch": 0.30907488986784143, + "grad_norm": 1.3880932208512609, + "learning_rate": 1.9411016141876438e-05, + "loss": 0.802099347114563, + "step": 877 + }, + { + "epoch": 0.3094273127753304, + "grad_norm": 1.560285458084049, + "learning_rate": 1.940904450109166e-05, + "loss": 0.7325229644775391, + "step": 878 + }, + { + "epoch": 0.30977973568281936, + "grad_norm": 1.5126812875374416, + "learning_rate": 1.9407069666229097e-05, + "loss": 0.6515973210334778, + "step": 879 + }, + { + "epoch": 0.31013215859030835, + "grad_norm": 1.2990709527675965, + "learning_rate": 1.9405091637959138e-05, + "loss": 0.7314589619636536, + "step": 880 + }, + { + "epoch": 0.31048458149779734, + "grad_norm": 1.2146229290292494, + "learning_rate": 1.9403110416953267e-05, + "loss": 0.6668078303337097, + "step": 881 + }, + { + "epoch": 0.31083700440528633, + "grad_norm": 1.4214853985415763, + "learning_rate": 1.9401126003884047e-05, + "loss": 0.693236231803894, + "step": 882 + }, + { + "epoch": 0.3111894273127753, + "grad_norm": 2.210010730425174, + "learning_rate": 1.939913839942512e-05, + "loss": 0.8242754936218262, + "step": 883 + }, + { + "epoch": 0.3115418502202643, + "grad_norm": 1.4121001226290237, + "learning_rate": 1.939714760425122e-05, + "loss": 0.7776592373847961, + "step": 884 + }, + { + "epoch": 0.3118942731277533, + "grad_norm": 1.6297557283357365, + "learning_rate": 1.9395153619038158e-05, + "loss": 0.7023555636405945, + "step": 885 + }, + { + "epoch": 0.3122466960352423, + "grad_norm": 1.333512905730993, + "learning_rate": 1.939315644446283e-05, + "loss": 0.690382182598114, + "step": 886 + }, + { + "epoch": 0.3125991189427313, + "grad_norm": 1.4632969046362096, + "learning_rate": 1.9391156081203214e-05, + "loss": 0.7590082287788391, + "step": 887 + }, + { + "epoch": 0.3129515418502203, + "grad_norm": 1.3672878296080273, + "learning_rate": 1.9389152529938377e-05, + "loss": 0.7378168702125549, + "step": 888 + }, + { + "epoch": 0.3133039647577093, + "grad_norm": 1.3616414763479574, + "learning_rate": 1.938714579134845e-05, + "loss": 0.7036890983581543, + "step": 889 + }, + { + "epoch": 0.3136563876651982, + "grad_norm": 1.4808362954559244, + "learning_rate": 1.938513586611467e-05, + "loss": 0.8881829977035522, + "step": 890 + }, + { + "epoch": 0.3140088105726872, + "grad_norm": 1.5370313355999317, + "learning_rate": 1.9383122754919342e-05, + "loss": 0.7467600107192993, + "step": 891 + }, + { + "epoch": 0.3143612334801762, + "grad_norm": 1.6168044285318155, + "learning_rate": 1.938110645844585e-05, + "loss": 0.9358077049255371, + "step": 892 + }, + { + "epoch": 0.3147136563876652, + "grad_norm": 1.3982581442164577, + "learning_rate": 1.9379086977378664e-05, + "loss": 0.7751256227493286, + "step": 893 + }, + { + "epoch": 0.3150660792951542, + "grad_norm": 1.3071717433837386, + "learning_rate": 1.9377064312403338e-05, + "loss": 0.8020666837692261, + "step": 894 + }, + { + "epoch": 0.31541850220264317, + "grad_norm": 1.2076526617304193, + "learning_rate": 1.9375038464206507e-05, + "loss": 0.7251513004302979, + "step": 895 + }, + { + "epoch": 0.31577092511013216, + "grad_norm": 1.3323484110232422, + "learning_rate": 1.9373009433475874e-05, + "loss": 0.7163990139961243, + "step": 896 + }, + { + "epoch": 0.31612334801762115, + "grad_norm": 1.7368098259899396, + "learning_rate": 1.937097722090024e-05, + "loss": 0.7208842039108276, + "step": 897 + }, + { + "epoch": 0.31647577092511014, + "grad_norm": 1.3890083085574685, + "learning_rate": 1.9368941827169475e-05, + "loss": 0.7660849690437317, + "step": 898 + }, + { + "epoch": 0.31682819383259914, + "grad_norm": 1.4598849060474621, + "learning_rate": 1.9366903252974532e-05, + "loss": 0.7017598152160645, + "step": 899 + }, + { + "epoch": 0.31718061674008813, + "grad_norm": 1.1578269588811556, + "learning_rate": 1.9364861499007443e-05, + "loss": 0.6831692457199097, + "step": 900 + }, + { + "epoch": 0.3175330396475771, + "grad_norm": 1.5110843884258551, + "learning_rate": 1.936281656596132e-05, + "loss": 0.6555520296096802, + "step": 901 + }, + { + "epoch": 0.31788546255506606, + "grad_norm": 1.5455350998398028, + "learning_rate": 1.9360768454530356e-05, + "loss": 0.7401334047317505, + "step": 902 + }, + { + "epoch": 0.31823788546255505, + "grad_norm": 1.445337217541868, + "learning_rate": 1.935871716540982e-05, + "loss": 0.7415893077850342, + "step": 903 + }, + { + "epoch": 0.31859030837004404, + "grad_norm": 1.280838808592201, + "learning_rate": 1.935666269929606e-05, + "loss": 0.8254752159118652, + "step": 904 + }, + { + "epoch": 0.31894273127753303, + "grad_norm": 1.4164353369528349, + "learning_rate": 1.9354605056886505e-05, + "loss": 0.708149254322052, + "step": 905 + }, + { + "epoch": 0.319295154185022, + "grad_norm": 5.868993531178127, + "learning_rate": 1.9352544238879654e-05, + "loss": 0.8084006905555725, + "step": 906 + }, + { + "epoch": 0.319647577092511, + "grad_norm": 1.264327413823813, + "learning_rate": 1.93504802459751e-05, + "loss": 0.8039542436599731, + "step": 907 + }, + { + "epoch": 0.32, + "grad_norm": 1.3552380315038073, + "learning_rate": 1.93484130788735e-05, + "loss": 0.7563241720199585, + "step": 908 + }, + { + "epoch": 0.320352422907489, + "grad_norm": 1.4802258000623036, + "learning_rate": 1.9346342738276593e-05, + "loss": 0.7972971200942993, + "step": 909 + }, + { + "epoch": 0.320704845814978, + "grad_norm": 1.2978401429696003, + "learning_rate": 1.93442692248872e-05, + "loss": 0.6693121790885925, + "step": 910 + }, + { + "epoch": 0.321057268722467, + "grad_norm": 1.567978048588056, + "learning_rate": 1.9342192539409203e-05, + "loss": 0.6597858667373657, + "step": 911 + }, + { + "epoch": 0.321409691629956, + "grad_norm": 1.368700143265877, + "learning_rate": 1.934011268254758e-05, + "loss": 0.6771499514579773, + "step": 912 + }, + { + "epoch": 0.3217621145374449, + "grad_norm": 1.2365987861589656, + "learning_rate": 1.9338029655008375e-05, + "loss": 0.6903397440910339, + "step": 913 + }, + { + "epoch": 0.3221145374449339, + "grad_norm": 1.1408319382533163, + "learning_rate": 1.9335943457498717e-05, + "loss": 0.6287999153137207, + "step": 914 + }, + { + "epoch": 0.3224669603524229, + "grad_norm": 1.6382789883498257, + "learning_rate": 1.93338540907268e-05, + "loss": 0.7199264764785767, + "step": 915 + }, + { + "epoch": 0.3228193832599119, + "grad_norm": 1.3951711845041654, + "learning_rate": 1.9331761555401896e-05, + "loss": 0.6960160732269287, + "step": 916 + }, + { + "epoch": 0.3231718061674009, + "grad_norm": 1.4692108732272398, + "learning_rate": 1.932966585223436e-05, + "loss": 0.8981958627700806, + "step": 917 + }, + { + "epoch": 0.32352422907488987, + "grad_norm": 1.5685980092664367, + "learning_rate": 1.932756698193562e-05, + "loss": 0.786432147026062, + "step": 918 + }, + { + "epoch": 0.32387665198237886, + "grad_norm": 1.5208274085752962, + "learning_rate": 1.9325464945218172e-05, + "loss": 0.7260904312133789, + "step": 919 + }, + { + "epoch": 0.32422907488986785, + "grad_norm": 1.5076861367086136, + "learning_rate": 1.9323359742795595e-05, + "loss": 0.715835452079773, + "step": 920 + }, + { + "epoch": 0.32458149779735684, + "grad_norm": 1.5022880591009429, + "learning_rate": 1.932125137538254e-05, + "loss": 0.6312157511711121, + "step": 921 + }, + { + "epoch": 0.32493392070484584, + "grad_norm": 1.3825485581433186, + "learning_rate": 1.931913984369473e-05, + "loss": 0.7565821409225464, + "step": 922 + }, + { + "epoch": 0.3252863436123348, + "grad_norm": 1.3787375139479208, + "learning_rate": 1.931702514844896e-05, + "loss": 0.6866531372070312, + "step": 923 + }, + { + "epoch": 0.32563876651982376, + "grad_norm": 2.06933203374066, + "learning_rate": 1.9314907290363117e-05, + "loss": 0.879021167755127, + "step": 924 + }, + { + "epoch": 0.32599118942731276, + "grad_norm": 1.4876230584538193, + "learning_rate": 1.9312786270156135e-05, + "loss": 0.6972150802612305, + "step": 925 + }, + { + "epoch": 0.32634361233480175, + "grad_norm": 1.5939077112190465, + "learning_rate": 1.9310662088548042e-05, + "loss": 0.8735189437866211, + "step": 926 + }, + { + "epoch": 0.32669603524229074, + "grad_norm": 1.4550040646675775, + "learning_rate": 1.930853474625993e-05, + "loss": 0.6114254593849182, + "step": 927 + }, + { + "epoch": 0.32704845814977973, + "grad_norm": 1.5850836788259668, + "learning_rate": 1.930640424401396e-05, + "loss": 0.8032322525978088, + "step": 928 + }, + { + "epoch": 0.3274008810572687, + "grad_norm": 1.3666090686487828, + "learning_rate": 1.9304270582533376e-05, + "loss": 0.7391160726547241, + "step": 929 + }, + { + "epoch": 0.3277533039647577, + "grad_norm": 1.3744182830455962, + "learning_rate": 1.930213376254249e-05, + "loss": 0.7055366039276123, + "step": 930 + }, + { + "epoch": 0.3281057268722467, + "grad_norm": 1.3717314993069374, + "learning_rate": 1.9299993784766684e-05, + "loss": 0.671670138835907, + "step": 931 + }, + { + "epoch": 0.3284581497797357, + "grad_norm": 1.4961694507376992, + "learning_rate": 1.9297850649932416e-05, + "loss": 0.7486976385116577, + "step": 932 + }, + { + "epoch": 0.3288105726872247, + "grad_norm": 1.3777653583239398, + "learning_rate": 1.929570435876721e-05, + "loss": 0.8767625093460083, + "step": 933 + }, + { + "epoch": 0.3291629955947137, + "grad_norm": 1.5767252427705674, + "learning_rate": 1.929355491199967e-05, + "loss": 0.6841862797737122, + "step": 934 + }, + { + "epoch": 0.3295154185022026, + "grad_norm": 1.4985001262879563, + "learning_rate": 1.929140231035946e-05, + "loss": 0.7745054960250854, + "step": 935 + }, + { + "epoch": 0.3298678414096916, + "grad_norm": 1.4538548583561628, + "learning_rate": 1.928924655457733e-05, + "loss": 0.5879434943199158, + "step": 936 + }, + { + "epoch": 0.3302202643171806, + "grad_norm": 1.4292680321712006, + "learning_rate": 1.9287087645385084e-05, + "loss": 0.8484170436859131, + "step": 937 + }, + { + "epoch": 0.3305726872246696, + "grad_norm": 1.3383126778675687, + "learning_rate": 1.9284925583515604e-05, + "loss": 0.6518877148628235, + "step": 938 + }, + { + "epoch": 0.3309251101321586, + "grad_norm": 1.3496744406534642, + "learning_rate": 1.928276036970285e-05, + "loss": 0.7694787383079529, + "step": 939 + }, + { + "epoch": 0.3312775330396476, + "grad_norm": 1.475669634065235, + "learning_rate": 1.928059200468184e-05, + "loss": 0.6893239617347717, + "step": 940 + }, + { + "epoch": 0.33162995594713657, + "grad_norm": 1.9386710613485005, + "learning_rate": 1.927842048918867e-05, + "loss": 0.7731181383132935, + "step": 941 + }, + { + "epoch": 0.33198237885462556, + "grad_norm": 1.2730945433300995, + "learning_rate": 1.9276245823960495e-05, + "loss": 0.652579665184021, + "step": 942 + }, + { + "epoch": 0.33233480176211455, + "grad_norm": 1.4590802585162193, + "learning_rate": 1.927406800973555e-05, + "loss": 0.7504575252532959, + "step": 943 + }, + { + "epoch": 0.33268722466960354, + "grad_norm": 1.2636242756085148, + "learning_rate": 1.927188704725314e-05, + "loss": 0.6199444532394409, + "step": 944 + }, + { + "epoch": 0.33303964757709253, + "grad_norm": 1.3381297141173314, + "learning_rate": 1.9269702937253623e-05, + "loss": 0.7452073693275452, + "step": 945 + }, + { + "epoch": 0.33339207048458147, + "grad_norm": 1.6220831494484687, + "learning_rate": 1.926751568047845e-05, + "loss": 0.7538012266159058, + "step": 946 + }, + { + "epoch": 0.33374449339207046, + "grad_norm": 1.298282312930767, + "learning_rate": 1.9265325277670114e-05, + "loss": 0.6670408248901367, + "step": 947 + }, + { + "epoch": 0.33409691629955945, + "grad_norm": 1.3861711594873305, + "learning_rate": 1.926313172957219e-05, + "loss": 0.8060495853424072, + "step": 948 + }, + { + "epoch": 0.33444933920704845, + "grad_norm": 1.497135036962013, + "learning_rate": 1.926093503692933e-05, + "loss": 0.7494044303894043, + "step": 949 + }, + { + "epoch": 0.33480176211453744, + "grad_norm": 1.4954420855155135, + "learning_rate": 1.9258735200487235e-05, + "loss": 0.5751914978027344, + "step": 950 + }, + { + "epoch": 0.33515418502202643, + "grad_norm": 1.3135496972020755, + "learning_rate": 1.9256532220992683e-05, + "loss": 0.7234281301498413, + "step": 951 + }, + { + "epoch": 0.3355066079295154, + "grad_norm": 1.648299384166419, + "learning_rate": 1.9254326099193515e-05, + "loss": 0.7721251249313354, + "step": 952 + }, + { + "epoch": 0.3358590308370044, + "grad_norm": 1.5273494870998061, + "learning_rate": 1.925211683583864e-05, + "loss": 0.7240835428237915, + "step": 953 + }, + { + "epoch": 0.3362114537444934, + "grad_norm": 1.5101195617398009, + "learning_rate": 1.9249904431678037e-05, + "loss": 0.6622776985168457, + "step": 954 + }, + { + "epoch": 0.3365638766519824, + "grad_norm": 1.7484785330432984, + "learning_rate": 1.9247688887462747e-05, + "loss": 0.9682766199111938, + "step": 955 + }, + { + "epoch": 0.3369162995594714, + "grad_norm": 1.5743447413941896, + "learning_rate": 1.9245470203944878e-05, + "loss": 0.8363134860992432, + "step": 956 + }, + { + "epoch": 0.3372687224669604, + "grad_norm": 1.4500608043156524, + "learning_rate": 1.9243248381877605e-05, + "loss": 0.6530857086181641, + "step": 957 + }, + { + "epoch": 0.3376211453744493, + "grad_norm": 1.2035108561422267, + "learning_rate": 1.924102342201517e-05, + "loss": 0.5186585187911987, + "step": 958 + }, + { + "epoch": 0.3379735682819383, + "grad_norm": 1.3827408215949344, + "learning_rate": 1.9238795325112867e-05, + "loss": 0.6729516983032227, + "step": 959 + }, + { + "epoch": 0.3383259911894273, + "grad_norm": 4.587971824519282, + "learning_rate": 1.9236564091927083e-05, + "loss": 0.6991842985153198, + "step": 960 + }, + { + "epoch": 0.3386784140969163, + "grad_norm": 1.516889979226708, + "learning_rate": 1.9234329723215235e-05, + "loss": 0.7738245725631714, + "step": 961 + }, + { + "epoch": 0.3390308370044053, + "grad_norm": 1.4574207335379696, + "learning_rate": 1.923209221973583e-05, + "loss": 0.7027466893196106, + "step": 962 + }, + { + "epoch": 0.3393832599118943, + "grad_norm": 1.401098486802875, + "learning_rate": 1.922985158224843e-05, + "loss": 0.7868508696556091, + "step": 963 + }, + { + "epoch": 0.33973568281938327, + "grad_norm": 1.3325223534105368, + "learning_rate": 1.9227607811513662e-05, + "loss": 0.7499512434005737, + "step": 964 + }, + { + "epoch": 0.34008810572687226, + "grad_norm": 1.3198116129339372, + "learning_rate": 1.9225360908293217e-05, + "loss": 0.6662228107452393, + "step": 965 + }, + { + "epoch": 0.34044052863436125, + "grad_norm": 1.4854956624988247, + "learning_rate": 1.9223110873349847e-05, + "loss": 0.8570939302444458, + "step": 966 + }, + { + "epoch": 0.34079295154185024, + "grad_norm": 1.3385040645698225, + "learning_rate": 1.9220857707447372e-05, + "loss": 0.7497669458389282, + "step": 967 + }, + { + "epoch": 0.34114537444933923, + "grad_norm": 1.2753268813313299, + "learning_rate": 1.9218601411350663e-05, + "loss": 0.7356737852096558, + "step": 968 + }, + { + "epoch": 0.34149779735682817, + "grad_norm": 2.3286924006274896, + "learning_rate": 1.9216341985825672e-05, + "loss": 0.7880491018295288, + "step": 969 + }, + { + "epoch": 0.34185022026431716, + "grad_norm": 1.4677269303314853, + "learning_rate": 1.92140794316394e-05, + "loss": 0.734922468662262, + "step": 970 + }, + { + "epoch": 0.34220264317180615, + "grad_norm": 1.5501144518696521, + "learning_rate": 1.9211813749559916e-05, + "loss": 0.6710363626480103, + "step": 971 + }, + { + "epoch": 0.34255506607929515, + "grad_norm": 1.256856073477316, + "learning_rate": 1.920954494035634e-05, + "loss": 0.7300584316253662, + "step": 972 + }, + { + "epoch": 0.34290748898678414, + "grad_norm": 1.5351698758546528, + "learning_rate": 1.9207273004798873e-05, + "loss": 0.8584152460098267, + "step": 973 + }, + { + "epoch": 0.34325991189427313, + "grad_norm": 1.4115351274616093, + "learning_rate": 1.9204997943658764e-05, + "loss": 0.7307419776916504, + "step": 974 + }, + { + "epoch": 0.3436123348017621, + "grad_norm": 1.434441373942747, + "learning_rate": 1.920271975770832e-05, + "loss": 0.6004960536956787, + "step": 975 + }, + { + "epoch": 0.3439647577092511, + "grad_norm": 1.4015679334218965, + "learning_rate": 1.920043844772092e-05, + "loss": 0.7951763868331909, + "step": 976 + }, + { + "epoch": 0.3443171806167401, + "grad_norm": 1.6575061104845086, + "learning_rate": 1.919815401447099e-05, + "loss": 0.6835082769393921, + "step": 977 + }, + { + "epoch": 0.3446696035242291, + "grad_norm": 1.3790503468449504, + "learning_rate": 1.9195866458734034e-05, + "loss": 0.7556526064872742, + "step": 978 + }, + { + "epoch": 0.3450220264317181, + "grad_norm": 1.2642410643718298, + "learning_rate": 1.91935757812866e-05, + "loss": 0.6918114423751831, + "step": 979 + }, + { + "epoch": 0.345374449339207, + "grad_norm": 1.6331863015088222, + "learning_rate": 1.9191281982906304e-05, + "loss": 0.8197037577629089, + "step": 980 + }, + { + "epoch": 0.345726872246696, + "grad_norm": 1.9820857497043596, + "learning_rate": 1.9188985064371818e-05, + "loss": 0.833138644695282, + "step": 981 + }, + { + "epoch": 0.346079295154185, + "grad_norm": 1.3877421520016489, + "learning_rate": 1.9186685026462874e-05, + "loss": 0.6593397855758667, + "step": 982 + }, + { + "epoch": 0.346431718061674, + "grad_norm": 1.750501089720619, + "learning_rate": 1.918438186996026e-05, + "loss": 0.7535643577575684, + "step": 983 + }, + { + "epoch": 0.346784140969163, + "grad_norm": 1.5295833510904033, + "learning_rate": 1.9182075595645836e-05, + "loss": 0.6959745287895203, + "step": 984 + }, + { + "epoch": 0.347136563876652, + "grad_norm": 1.4045200992789866, + "learning_rate": 1.91797662043025e-05, + "loss": 0.7349518537521362, + "step": 985 + }, + { + "epoch": 0.347488986784141, + "grad_norm": 1.3769518272852244, + "learning_rate": 1.9177453696714224e-05, + "loss": 0.7677974700927734, + "step": 986 + }, + { + "epoch": 0.34784140969162997, + "grad_norm": 1.4486626509256493, + "learning_rate": 1.917513807366603e-05, + "loss": 0.7302255630493164, + "step": 987 + }, + { + "epoch": 0.34819383259911896, + "grad_norm": 1.477891236612788, + "learning_rate": 1.9172819335944003e-05, + "loss": 0.838138222694397, + "step": 988 + }, + { + "epoch": 0.34854625550660795, + "grad_norm": 1.555345260078333, + "learning_rate": 1.9170497484335276e-05, + "loss": 0.8018180131912231, + "step": 989 + }, + { + "epoch": 0.34889867841409694, + "grad_norm": 1.4299439839627417, + "learning_rate": 1.9168172519628056e-05, + "loss": 0.8085787296295166, + "step": 990 + }, + { + "epoch": 0.3492511013215859, + "grad_norm": 1.407734167007011, + "learning_rate": 1.9165844442611584e-05, + "loss": 0.8419004082679749, + "step": 991 + }, + { + "epoch": 0.34960352422907487, + "grad_norm": 1.485093259368171, + "learning_rate": 1.916351325407618e-05, + "loss": 0.8255139589309692, + "step": 992 + }, + { + "epoch": 0.34995594713656386, + "grad_norm": 1.3581016847128187, + "learning_rate": 1.9161178954813203e-05, + "loss": 0.7588528990745544, + "step": 993 + }, + { + "epoch": 0.35030837004405285, + "grad_norm": 1.3722258517458088, + "learning_rate": 1.9158841545615076e-05, + "loss": 0.7057096362113953, + "step": 994 + }, + { + "epoch": 0.35066079295154184, + "grad_norm": 1.3264479954648483, + "learning_rate": 1.915650102727528e-05, + "loss": 0.6913125514984131, + "step": 995 + }, + { + "epoch": 0.35101321585903084, + "grad_norm": 1.4277288783882767, + "learning_rate": 1.9154157400588348e-05, + "loss": 0.7622898817062378, + "step": 996 + }, + { + "epoch": 0.3513656387665198, + "grad_norm": 1.3345359637809249, + "learning_rate": 1.915181066634986e-05, + "loss": 0.6918702125549316, + "step": 997 + }, + { + "epoch": 0.3517180616740088, + "grad_norm": 1.4330955991310976, + "learning_rate": 1.914946082535647e-05, + "loss": 0.8801462650299072, + "step": 998 + }, + { + "epoch": 0.3520704845814978, + "grad_norm": 1.6364104196010791, + "learning_rate": 1.9147107878405873e-05, + "loss": 0.7901172637939453, + "step": 999 + }, + { + "epoch": 0.3524229074889868, + "grad_norm": 1.3202428944557627, + "learning_rate": 1.9144751826296818e-05, + "loss": 0.7308447360992432, + "step": 1000 + }, + { + "epoch": 0.3527753303964758, + "grad_norm": 1.3152547105893029, + "learning_rate": 1.9142392669829114e-05, + "loss": 0.5733275413513184, + "step": 1001 + }, + { + "epoch": 0.35312775330396473, + "grad_norm": 1.4327185784306546, + "learning_rate": 1.9140030409803622e-05, + "loss": 0.7251306772232056, + "step": 1002 + }, + { + "epoch": 0.3534801762114537, + "grad_norm": 1.3492122584167072, + "learning_rate": 1.913766504702225e-05, + "loss": 0.7983027696609497, + "step": 1003 + }, + { + "epoch": 0.3538325991189427, + "grad_norm": 1.5284478719025472, + "learning_rate": 1.9135296582287973e-05, + "loss": 0.7464017868041992, + "step": 1004 + }, + { + "epoch": 0.3541850220264317, + "grad_norm": 1.3377291300677683, + "learning_rate": 1.9132925016404805e-05, + "loss": 0.7333002686500549, + "step": 1005 + }, + { + "epoch": 0.3545374449339207, + "grad_norm": 1.4170618275882645, + "learning_rate": 1.9130550350177823e-05, + "loss": 0.729085385799408, + "step": 1006 + }, + { + "epoch": 0.3548898678414097, + "grad_norm": 1.1531700234964573, + "learning_rate": 1.9128172584413148e-05, + "loss": 0.7599227428436279, + "step": 1007 + }, + { + "epoch": 0.3552422907488987, + "grad_norm": 1.3499603875621307, + "learning_rate": 1.9125791719917962e-05, + "loss": 0.8110464811325073, + "step": 1008 + }, + { + "epoch": 0.3555947136563877, + "grad_norm": 1.443391069493257, + "learning_rate": 1.912340775750049e-05, + "loss": 0.7431697845458984, + "step": 1009 + }, + { + "epoch": 0.35594713656387666, + "grad_norm": 1.3353700802371913, + "learning_rate": 1.9121020697970016e-05, + "loss": 0.7833640575408936, + "step": 1010 + }, + { + "epoch": 0.35629955947136566, + "grad_norm": 1.2927496434698726, + "learning_rate": 1.9118630542136874e-05, + "loss": 0.7693058252334595, + "step": 1011 + }, + { + "epoch": 0.35665198237885465, + "grad_norm": 1.3593779388270224, + "learning_rate": 1.9116237290812445e-05, + "loss": 0.7724676132202148, + "step": 1012 + }, + { + "epoch": 0.3570044052863436, + "grad_norm": 1.3849928303091037, + "learning_rate": 1.911384094480916e-05, + "loss": 0.6024055480957031, + "step": 1013 + }, + { + "epoch": 0.3573568281938326, + "grad_norm": 1.254237630036734, + "learning_rate": 1.9111441504940514e-05, + "loss": 0.7710703611373901, + "step": 1014 + }, + { + "epoch": 0.35770925110132157, + "grad_norm": 1.3917926832468532, + "learning_rate": 1.910903897202103e-05, + "loss": 0.7591651678085327, + "step": 1015 + }, + { + "epoch": 0.35806167400881056, + "grad_norm": 1.3250641662724636, + "learning_rate": 1.9106633346866302e-05, + "loss": 0.7721874713897705, + "step": 1016 + }, + { + "epoch": 0.35841409691629955, + "grad_norm": 1.3837097156983347, + "learning_rate": 1.910422463029296e-05, + "loss": 0.6767420172691345, + "step": 1017 + }, + { + "epoch": 0.35876651982378854, + "grad_norm": 1.5808312779065312, + "learning_rate": 1.910181282311869e-05, + "loss": 0.6704902648925781, + "step": 1018 + }, + { + "epoch": 0.35911894273127754, + "grad_norm": 1.3288966146848866, + "learning_rate": 1.9099397926162227e-05, + "loss": 0.8871079683303833, + "step": 1019 + }, + { + "epoch": 0.3594713656387665, + "grad_norm": 1.5716465127646195, + "learning_rate": 1.909697994024335e-05, + "loss": 0.7222549319267273, + "step": 1020 + }, + { + "epoch": 0.3598237885462555, + "grad_norm": 1.4050103839828958, + "learning_rate": 1.9094558866182892e-05, + "loss": 0.7443021535873413, + "step": 1021 + }, + { + "epoch": 0.3601762114537445, + "grad_norm": 1.3877313570980134, + "learning_rate": 1.9092134704802735e-05, + "loss": 0.7698349952697754, + "step": 1022 + }, + { + "epoch": 0.3605286343612335, + "grad_norm": 1.9010750041325926, + "learning_rate": 1.9089707456925798e-05, + "loss": 0.863248348236084, + "step": 1023 + }, + { + "epoch": 0.3608810572687225, + "grad_norm": 1.1572981545597187, + "learning_rate": 1.9087277123376068e-05, + "loss": 0.7036338448524475, + "step": 1024 + }, + { + "epoch": 0.36123348017621143, + "grad_norm": 1.5140044810060398, + "learning_rate": 1.9084843704978558e-05, + "loss": 0.7427274584770203, + "step": 1025 + }, + { + "epoch": 0.3615859030837004, + "grad_norm": 1.5903685422277276, + "learning_rate": 1.908240720255934e-05, + "loss": 0.6548313498497009, + "step": 1026 + }, + { + "epoch": 0.3619383259911894, + "grad_norm": 1.3326463394362358, + "learning_rate": 1.9079967616945534e-05, + "loss": 0.7586454749107361, + "step": 1027 + }, + { + "epoch": 0.3622907488986784, + "grad_norm": 1.45389698507953, + "learning_rate": 1.90775249489653e-05, + "loss": 0.6954889297485352, + "step": 1028 + }, + { + "epoch": 0.3626431718061674, + "grad_norm": 1.6543950271160617, + "learning_rate": 1.907507919944785e-05, + "loss": 0.8798770904541016, + "step": 1029 + }, + { + "epoch": 0.3629955947136564, + "grad_norm": 1.3815054682339305, + "learning_rate": 1.9072630369223433e-05, + "loss": 0.6600694060325623, + "step": 1030 + }, + { + "epoch": 0.3633480176211454, + "grad_norm": 1.5776995405913148, + "learning_rate": 1.9070178459123366e-05, + "loss": 0.6830897927284241, + "step": 1031 + }, + { + "epoch": 0.36370044052863437, + "grad_norm": 1.1973844620945089, + "learning_rate": 1.906772346997998e-05, + "loss": 0.6283613443374634, + "step": 1032 + }, + { + "epoch": 0.36405286343612336, + "grad_norm": 1.2892968799675324, + "learning_rate": 1.9065265402626676e-05, + "loss": 0.6451754570007324, + "step": 1033 + }, + { + "epoch": 0.36440528634361236, + "grad_norm": 1.4387559441313162, + "learning_rate": 1.9062804257897887e-05, + "loss": 0.7949883937835693, + "step": 1034 + }, + { + "epoch": 0.36475770925110135, + "grad_norm": 1.4366893391590683, + "learning_rate": 1.90603400366291e-05, + "loss": 0.625343918800354, + "step": 1035 + }, + { + "epoch": 0.3651101321585903, + "grad_norm": 1.5716897663583798, + "learning_rate": 1.9057872739656843e-05, + "loss": 0.8398839235305786, + "step": 1036 + }, + { + "epoch": 0.3654625550660793, + "grad_norm": 1.6515297053174456, + "learning_rate": 1.9055402367818673e-05, + "loss": 0.8628166913986206, + "step": 1037 + }, + { + "epoch": 0.36581497797356827, + "grad_norm": 1.6000244306696312, + "learning_rate": 1.905292892195322e-05, + "loss": 0.7494110465049744, + "step": 1038 + }, + { + "epoch": 0.36616740088105726, + "grad_norm": 1.6358981860019415, + "learning_rate": 1.9050452402900134e-05, + "loss": 0.7695099115371704, + "step": 1039 + }, + { + "epoch": 0.36651982378854625, + "grad_norm": 1.3948395289772064, + "learning_rate": 1.904797281150012e-05, + "loss": 0.8067067861557007, + "step": 1040 + }, + { + "epoch": 0.36687224669603524, + "grad_norm": 1.5430196098026252, + "learning_rate": 1.9045490148594917e-05, + "loss": 0.7542074918746948, + "step": 1041 + }, + { + "epoch": 0.36722466960352423, + "grad_norm": 1.4232871422135234, + "learning_rate": 1.9043004415027314e-05, + "loss": 0.7027335166931152, + "step": 1042 + }, + { + "epoch": 0.3675770925110132, + "grad_norm": 1.2842638834648272, + "learning_rate": 1.9040515611641142e-05, + "loss": 0.7779253721237183, + "step": 1043 + }, + { + "epoch": 0.3679295154185022, + "grad_norm": 1.4713589430159515, + "learning_rate": 1.9038023739281275e-05, + "loss": 0.6840049028396606, + "step": 1044 + }, + { + "epoch": 0.3682819383259912, + "grad_norm": 1.2252786450532585, + "learning_rate": 1.903552879879362e-05, + "loss": 0.6183794736862183, + "step": 1045 + }, + { + "epoch": 0.3686343612334802, + "grad_norm": 1.3239395642180716, + "learning_rate": 1.9033030791025127e-05, + "loss": 0.7770168781280518, + "step": 1046 + }, + { + "epoch": 0.36898678414096914, + "grad_norm": 1.5646813675557831, + "learning_rate": 1.9030529716823806e-05, + "loss": 0.7192036509513855, + "step": 1047 + }, + { + "epoch": 0.36933920704845813, + "grad_norm": 1.3179369082607764, + "learning_rate": 1.9028025577038688e-05, + "loss": 0.6604419946670532, + "step": 1048 + }, + { + "epoch": 0.3696916299559471, + "grad_norm": 1.7088212085954357, + "learning_rate": 1.9025518372519847e-05, + "loss": 0.7999060153961182, + "step": 1049 + }, + { + "epoch": 0.3700440528634361, + "grad_norm": 1.6369356635778263, + "learning_rate": 1.9023008104118404e-05, + "loss": 0.7487536668777466, + "step": 1050 + }, + { + "epoch": 0.3703964757709251, + "grad_norm": 1.4534592079598474, + "learning_rate": 1.9020494772686513e-05, + "loss": 0.7786455154418945, + "step": 1051 + }, + { + "epoch": 0.3707488986784141, + "grad_norm": 1.5556124976221868, + "learning_rate": 1.9017978379077378e-05, + "loss": 0.7592626214027405, + "step": 1052 + }, + { + "epoch": 0.3711013215859031, + "grad_norm": 1.3193440168525459, + "learning_rate": 1.901545892414523e-05, + "loss": 0.774850606918335, + "step": 1053 + }, + { + "epoch": 0.3714537444933921, + "grad_norm": 1.4859587321900767, + "learning_rate": 1.901293640874535e-05, + "loss": 0.5430009365081787, + "step": 1054 + }, + { + "epoch": 0.37180616740088107, + "grad_norm": 1.4541817899150224, + "learning_rate": 1.9010410833734053e-05, + "loss": 0.7459923624992371, + "step": 1055 + }, + { + "epoch": 0.37215859030837006, + "grad_norm": 1.6269332982530442, + "learning_rate": 1.9007882199968692e-05, + "loss": 0.6372017860412598, + "step": 1056 + }, + { + "epoch": 0.37251101321585905, + "grad_norm": 1.6522112420188226, + "learning_rate": 1.900535050830766e-05, + "loss": 0.6773583292961121, + "step": 1057 + }, + { + "epoch": 0.372863436123348, + "grad_norm": 1.7342256392022233, + "learning_rate": 1.900281575961039e-05, + "loss": 0.8431004285812378, + "step": 1058 + }, + { + "epoch": 0.373215859030837, + "grad_norm": 1.4085085883480681, + "learning_rate": 1.9000277954737342e-05, + "loss": 0.6361340284347534, + "step": 1059 + }, + { + "epoch": 0.373568281938326, + "grad_norm": 1.3793359019510345, + "learning_rate": 1.8997737094550033e-05, + "loss": 0.7153787612915039, + "step": 1060 + }, + { + "epoch": 0.37392070484581497, + "grad_norm": 1.4220392348844544, + "learning_rate": 1.8995193179911e-05, + "loss": 0.7244935631752014, + "step": 1061 + }, + { + "epoch": 0.37427312775330396, + "grad_norm": 1.4061330426818142, + "learning_rate": 1.8992646211683817e-05, + "loss": 0.6648202538490295, + "step": 1062 + }, + { + "epoch": 0.37462555066079295, + "grad_norm": 1.4217807346058315, + "learning_rate": 1.8990096190733113e-05, + "loss": 0.6528836488723755, + "step": 1063 + }, + { + "epoch": 0.37497797356828194, + "grad_norm": 1.4695679092519263, + "learning_rate": 1.8987543117924532e-05, + "loss": 0.6749341487884521, + "step": 1064 + }, + { + "epoch": 0.37533039647577093, + "grad_norm": 1.3287092803608218, + "learning_rate": 1.8984986994124766e-05, + "loss": 0.7402256727218628, + "step": 1065 + }, + { + "epoch": 0.3756828193832599, + "grad_norm": 1.2181513754192281, + "learning_rate": 1.898242782020154e-05, + "loss": 0.5638695955276489, + "step": 1066 + }, + { + "epoch": 0.3760352422907489, + "grad_norm": 1.5457056768133204, + "learning_rate": 1.897986559702361e-05, + "loss": 0.829822838306427, + "step": 1067 + }, + { + "epoch": 0.3763876651982379, + "grad_norm": 1.3351440834834858, + "learning_rate": 1.8977300325460774e-05, + "loss": 0.6796025037765503, + "step": 1068 + }, + { + "epoch": 0.37674008810572684, + "grad_norm": 1.3611135527247238, + "learning_rate": 1.897473200638386e-05, + "loss": 0.8584038615226746, + "step": 1069 + }, + { + "epoch": 0.37709251101321584, + "grad_norm": 1.4622377307020165, + "learning_rate": 1.897216064066474e-05, + "loss": 0.8069149255752563, + "step": 1070 + }, + { + "epoch": 0.37744493392070483, + "grad_norm": 1.2194173424769332, + "learning_rate": 1.89695862291763e-05, + "loss": 0.5762223601341248, + "step": 1071 + }, + { + "epoch": 0.3777973568281938, + "grad_norm": 1.3827918624348656, + "learning_rate": 1.8967008772792483e-05, + "loss": 0.6626466512680054, + "step": 1072 + }, + { + "epoch": 0.3781497797356828, + "grad_norm": 1.15359758590964, + "learning_rate": 1.896442827238825e-05, + "loss": 0.6260244250297546, + "step": 1073 + }, + { + "epoch": 0.3785022026431718, + "grad_norm": 1.8994686915407593, + "learning_rate": 1.8961844728839602e-05, + "loss": 0.8090343475341797, + "step": 1074 + }, + { + "epoch": 0.3788546255506608, + "grad_norm": 1.4116056126096472, + "learning_rate": 1.8959258143023575e-05, + "loss": 0.66957026720047, + "step": 1075 + }, + { + "epoch": 0.3792070484581498, + "grad_norm": 1.308974606662818, + "learning_rate": 1.8956668515818223e-05, + "loss": 0.7103087306022644, + "step": 1076 + }, + { + "epoch": 0.3795594713656388, + "grad_norm": 1.468914156940793, + "learning_rate": 1.895407584810266e-05, + "loss": 0.7469112277030945, + "step": 1077 + }, + { + "epoch": 0.37991189427312777, + "grad_norm": 1.624950928787921, + "learning_rate": 1.8951480140757003e-05, + "loss": 0.8252213001251221, + "step": 1078 + }, + { + "epoch": 0.38026431718061676, + "grad_norm": 1.4238044077341658, + "learning_rate": 1.8948881394662417e-05, + "loss": 0.7204562425613403, + "step": 1079 + }, + { + "epoch": 0.38061674008810575, + "grad_norm": 1.5659608304591812, + "learning_rate": 1.89462796107011e-05, + "loss": 0.7325669527053833, + "step": 1080 + }, + { + "epoch": 0.3809691629955947, + "grad_norm": 1.2964480504204927, + "learning_rate": 1.8943674789756276e-05, + "loss": 0.738972008228302, + "step": 1081 + }, + { + "epoch": 0.3813215859030837, + "grad_norm": 1.5892566433984823, + "learning_rate": 1.8941066932712194e-05, + "loss": 0.7468631267547607, + "step": 1082 + }, + { + "epoch": 0.3816740088105727, + "grad_norm": 1.6145182365902104, + "learning_rate": 1.893845604045415e-05, + "loss": 0.6479831337928772, + "step": 1083 + }, + { + "epoch": 0.38202643171806167, + "grad_norm": 1.3615750017210906, + "learning_rate": 1.893584211386845e-05, + "loss": 0.7615871429443359, + "step": 1084 + }, + { + "epoch": 0.38237885462555066, + "grad_norm": 1.8901071385329251, + "learning_rate": 1.8933225153842446e-05, + "loss": 0.6934449076652527, + "step": 1085 + }, + { + "epoch": 0.38273127753303965, + "grad_norm": 1.2384833194245852, + "learning_rate": 1.8930605161264517e-05, + "loss": 0.5267079472541809, + "step": 1086 + }, + { + "epoch": 0.38308370044052864, + "grad_norm": 1.524832028509735, + "learning_rate": 1.892798213702407e-05, + "loss": 0.7309125661849976, + "step": 1087 + }, + { + "epoch": 0.38343612334801763, + "grad_norm": 1.3743253361073855, + "learning_rate": 1.892535608201153e-05, + "loss": 0.8133678436279297, + "step": 1088 + }, + { + "epoch": 0.3837885462555066, + "grad_norm": 1.3915725940468886, + "learning_rate": 1.892272699711837e-05, + "loss": 0.6097027063369751, + "step": 1089 + }, + { + "epoch": 0.3841409691629956, + "grad_norm": 1.548287022579551, + "learning_rate": 1.8920094883237082e-05, + "loss": 0.70456862449646, + "step": 1090 + }, + { + "epoch": 0.3844933920704846, + "grad_norm": 1.2952569165029428, + "learning_rate": 1.8917459741261183e-05, + "loss": 0.7236523628234863, + "step": 1091 + }, + { + "epoch": 0.38484581497797354, + "grad_norm": 1.5039785189114319, + "learning_rate": 1.8914821572085224e-05, + "loss": 0.7251272201538086, + "step": 1092 + }, + { + "epoch": 0.38519823788546254, + "grad_norm": 1.271767676796452, + "learning_rate": 1.8912180376604777e-05, + "loss": 0.7381070852279663, + "step": 1093 + }, + { + "epoch": 0.3855506607929515, + "grad_norm": 1.6023999081974447, + "learning_rate": 1.8909536155716458e-05, + "loss": 0.6654129028320312, + "step": 1094 + }, + { + "epoch": 0.3859030837004405, + "grad_norm": 1.4351957388528893, + "learning_rate": 1.8906888910317883e-05, + "loss": 0.7823128700256348, + "step": 1095 + }, + { + "epoch": 0.3862555066079295, + "grad_norm": 1.2302320218391962, + "learning_rate": 1.8904238641307718e-05, + "loss": 0.5988126993179321, + "step": 1096 + }, + { + "epoch": 0.3866079295154185, + "grad_norm": 1.6745614533481283, + "learning_rate": 1.8901585349585643e-05, + "loss": 0.7671465873718262, + "step": 1097 + }, + { + "epoch": 0.3869603524229075, + "grad_norm": 1.4027982600434907, + "learning_rate": 1.889892903605237e-05, + "loss": 0.7878838777542114, + "step": 1098 + }, + { + "epoch": 0.3873127753303965, + "grad_norm": 1.2802181437962392, + "learning_rate": 1.8896269701609634e-05, + "loss": 0.72254878282547, + "step": 1099 + }, + { + "epoch": 0.3876651982378855, + "grad_norm": 1.4183908379879375, + "learning_rate": 1.8893607347160198e-05, + "loss": 0.6796868443489075, + "step": 1100 + }, + { + "epoch": 0.38801762114537447, + "grad_norm": 1.510469064523606, + "learning_rate": 1.8890941973607843e-05, + "loss": 0.6378471851348877, + "step": 1101 + }, + { + "epoch": 0.38837004405286346, + "grad_norm": 1.327169163711753, + "learning_rate": 1.888827358185739e-05, + "loss": 0.8473032712936401, + "step": 1102 + }, + { + "epoch": 0.3887224669603524, + "grad_norm": 1.4704779902492213, + "learning_rate": 1.8885602172814667e-05, + "loss": 0.8272742033004761, + "step": 1103 + }, + { + "epoch": 0.3890748898678414, + "grad_norm": 1.45593169268278, + "learning_rate": 1.8882927747386533e-05, + "loss": 0.7244507670402527, + "step": 1104 + }, + { + "epoch": 0.3894273127753304, + "grad_norm": 1.3081271484466186, + "learning_rate": 1.888025030648088e-05, + "loss": 0.5764014720916748, + "step": 1105 + }, + { + "epoch": 0.3897797356828194, + "grad_norm": 1.230279760550168, + "learning_rate": 1.887756985100661e-05, + "loss": 0.6944009065628052, + "step": 1106 + }, + { + "epoch": 0.39013215859030836, + "grad_norm": 1.381963017332696, + "learning_rate": 1.8874886381873657e-05, + "loss": 0.7096902132034302, + "step": 1107 + }, + { + "epoch": 0.39048458149779736, + "grad_norm": 1.6526795986169043, + "learning_rate": 1.8872199899992973e-05, + "loss": 0.6304805278778076, + "step": 1108 + }, + { + "epoch": 0.39083700440528635, + "grad_norm": 1.3081643743142675, + "learning_rate": 1.8869510406276538e-05, + "loss": 0.7091327905654907, + "step": 1109 + }, + { + "epoch": 0.39118942731277534, + "grad_norm": 1.4257979117717376, + "learning_rate": 1.886681790163735e-05, + "loss": 0.6575565338134766, + "step": 1110 + }, + { + "epoch": 0.39154185022026433, + "grad_norm": 1.6155582257297172, + "learning_rate": 1.8864122386989426e-05, + "loss": 0.837468147277832, + "step": 1111 + }, + { + "epoch": 0.3918942731277533, + "grad_norm": 1.4395330206284223, + "learning_rate": 1.8861423863247816e-05, + "loss": 0.6861380338668823, + "step": 1112 + }, + { + "epoch": 0.3922466960352423, + "grad_norm": 1.3206140573248442, + "learning_rate": 1.8858722331328582e-05, + "loss": 0.7421156167984009, + "step": 1113 + }, + { + "epoch": 0.39259911894273125, + "grad_norm": 1.4106532753820455, + "learning_rate": 1.8856017792148807e-05, + "loss": 0.8037575483322144, + "step": 1114 + }, + { + "epoch": 0.39295154185022024, + "grad_norm": 1.34412494732323, + "learning_rate": 1.8853310246626608e-05, + "loss": 0.6530179381370544, + "step": 1115 + }, + { + "epoch": 0.39330396475770923, + "grad_norm": 1.7480111733406445, + "learning_rate": 1.88505996956811e-05, + "loss": 0.9039478302001953, + "step": 1116 + }, + { + "epoch": 0.3936563876651982, + "grad_norm": 1.2556675250098766, + "learning_rate": 1.8847886140232438e-05, + "loss": 0.7734917998313904, + "step": 1117 + }, + { + "epoch": 0.3940088105726872, + "grad_norm": 1.4809117769611548, + "learning_rate": 1.8845169581201786e-05, + "loss": 0.7146204113960266, + "step": 1118 + }, + { + "epoch": 0.3943612334801762, + "grad_norm": 1.4108388267740644, + "learning_rate": 1.8842450019511337e-05, + "loss": 0.6427414417266846, + "step": 1119 + }, + { + "epoch": 0.3947136563876652, + "grad_norm": 1.462443026711516, + "learning_rate": 1.883972745608429e-05, + "loss": 0.7241504192352295, + "step": 1120 + }, + { + "epoch": 0.3950660792951542, + "grad_norm": 1.5796197427651677, + "learning_rate": 1.8837001891844875e-05, + "loss": 0.7085466384887695, + "step": 1121 + }, + { + "epoch": 0.3954185022026432, + "grad_norm": 1.220037664049328, + "learning_rate": 1.8834273327718334e-05, + "loss": 0.6099711656570435, + "step": 1122 + }, + { + "epoch": 0.3957709251101322, + "grad_norm": 1.7637467057266936, + "learning_rate": 1.8831541764630936e-05, + "loss": 0.9153809547424316, + "step": 1123 + }, + { + "epoch": 0.39612334801762117, + "grad_norm": 1.432058114739846, + "learning_rate": 1.8828807203509953e-05, + "loss": 0.7025514841079712, + "step": 1124 + }, + { + "epoch": 0.3964757709251101, + "grad_norm": 1.3170228531933665, + "learning_rate": 1.882606964528369e-05, + "loss": 0.8254855275154114, + "step": 1125 + }, + { + "epoch": 0.3968281938325991, + "grad_norm": 1.3015643549096694, + "learning_rate": 1.8823329090881457e-05, + "loss": 0.6812278032302856, + "step": 1126 + }, + { + "epoch": 0.3971806167400881, + "grad_norm": 1.4379402990614556, + "learning_rate": 1.8820585541233592e-05, + "loss": 0.6570114493370056, + "step": 1127 + }, + { + "epoch": 0.3975330396475771, + "grad_norm": 1.4245448514304093, + "learning_rate": 1.881783899727144e-05, + "loss": 0.636163592338562, + "step": 1128 + }, + { + "epoch": 0.39788546255506607, + "grad_norm": 1.4535684365173425, + "learning_rate": 1.8815089459927373e-05, + "loss": 0.6744807958602905, + "step": 1129 + }, + { + "epoch": 0.39823788546255506, + "grad_norm": 1.2654983836452696, + "learning_rate": 1.8812336930134768e-05, + "loss": 0.6739502549171448, + "step": 1130 + }, + { + "epoch": 0.39859030837004406, + "grad_norm": 1.5274150360278067, + "learning_rate": 1.8809581408828026e-05, + "loss": 0.800058126449585, + "step": 1131 + }, + { + "epoch": 0.39894273127753305, + "grad_norm": 1.293199138820765, + "learning_rate": 1.880682289694256e-05, + "loss": 0.7158734798431396, + "step": 1132 + }, + { + "epoch": 0.39929515418502204, + "grad_norm": 1.426620948967722, + "learning_rate": 1.8804061395414795e-05, + "loss": 0.7142150402069092, + "step": 1133 + }, + { + "epoch": 0.39964757709251103, + "grad_norm": 1.5712220679274596, + "learning_rate": 1.8801296905182184e-05, + "loss": 0.7830438613891602, + "step": 1134 + }, + { + "epoch": 0.4, + "grad_norm": 1.3789411964854812, + "learning_rate": 1.879852942718318e-05, + "loss": 0.7037091255187988, + "step": 1135 + }, + { + "epoch": 0.400352422907489, + "grad_norm": 1.5410576826642701, + "learning_rate": 1.8795758962357254e-05, + "loss": 0.7634316682815552, + "step": 1136 + }, + { + "epoch": 0.40070484581497795, + "grad_norm": 1.3380525485574057, + "learning_rate": 1.8792985511644895e-05, + "loss": 0.8569636344909668, + "step": 1137 + }, + { + "epoch": 0.40105726872246694, + "grad_norm": 1.4697640342217926, + "learning_rate": 1.8790209075987603e-05, + "loss": 0.8589881062507629, + "step": 1138 + }, + { + "epoch": 0.40140969162995593, + "grad_norm": 1.4119711578026037, + "learning_rate": 1.8787429656327892e-05, + "loss": 0.6667177677154541, + "step": 1139 + }, + { + "epoch": 0.4017621145374449, + "grad_norm": 1.5302691962759787, + "learning_rate": 1.8784647253609286e-05, + "loss": 0.8272922039031982, + "step": 1140 + }, + { + "epoch": 0.4021145374449339, + "grad_norm": 1.4934073596410382, + "learning_rate": 1.8781861868776328e-05, + "loss": 0.735906720161438, + "step": 1141 + }, + { + "epoch": 0.4024669603524229, + "grad_norm": 1.6214826290901958, + "learning_rate": 1.8779073502774567e-05, + "loss": 0.7496200799942017, + "step": 1142 + }, + { + "epoch": 0.4028193832599119, + "grad_norm": 1.5379634398249482, + "learning_rate": 1.8776282156550563e-05, + "loss": 0.741244912147522, + "step": 1143 + }, + { + "epoch": 0.4031718061674009, + "grad_norm": 1.6175484470841388, + "learning_rate": 1.87734878310519e-05, + "loss": 0.6074572205543518, + "step": 1144 + }, + { + "epoch": 0.4035242290748899, + "grad_norm": 1.5403137415943102, + "learning_rate": 1.8770690527227156e-05, + "loss": 0.7852963805198669, + "step": 1145 + }, + { + "epoch": 0.4038766519823789, + "grad_norm": 1.3167947695811832, + "learning_rate": 1.8767890246025934e-05, + "loss": 0.8041664361953735, + "step": 1146 + }, + { + "epoch": 0.40422907488986787, + "grad_norm": 1.2847896666293108, + "learning_rate": 1.876508698839884e-05, + "loss": 0.6014564037322998, + "step": 1147 + }, + { + "epoch": 0.4045814977973568, + "grad_norm": 1.6737775020761936, + "learning_rate": 1.876228075529749e-05, + "loss": 0.7389206886291504, + "step": 1148 + }, + { + "epoch": 0.4049339207048458, + "grad_norm": 1.5291026740622409, + "learning_rate": 1.875947154767452e-05, + "loss": 0.7540062665939331, + "step": 1149 + }, + { + "epoch": 0.4052863436123348, + "grad_norm": 1.5780731113626183, + "learning_rate": 1.8756659366483564e-05, + "loss": 0.6953487396240234, + "step": 1150 + }, + { + "epoch": 0.4056387665198238, + "grad_norm": 1.8069469411894516, + "learning_rate": 1.875384421267927e-05, + "loss": 0.6715666055679321, + "step": 1151 + }, + { + "epoch": 0.40599118942731277, + "grad_norm": 1.6113428960633331, + "learning_rate": 1.8751026087217294e-05, + "loss": 0.7763206362724304, + "step": 1152 + }, + { + "epoch": 0.40634361233480176, + "grad_norm": 1.7227531605547286, + "learning_rate": 1.8748204991054304e-05, + "loss": 0.8445626497268677, + "step": 1153 + }, + { + "epoch": 0.40669603524229075, + "grad_norm": 1.4170830085508515, + "learning_rate": 1.8745380925147976e-05, + "loss": 0.6789584159851074, + "step": 1154 + }, + { + "epoch": 0.40704845814977975, + "grad_norm": 1.403092590323935, + "learning_rate": 1.8742553890456986e-05, + "loss": 0.6301349401473999, + "step": 1155 + }, + { + "epoch": 0.40740088105726874, + "grad_norm": 1.243923442253091, + "learning_rate": 1.873972388794103e-05, + "loss": 0.5968909859657288, + "step": 1156 + }, + { + "epoch": 0.40775330396475773, + "grad_norm": 1.489269903668207, + "learning_rate": 1.873689091856081e-05, + "loss": 0.759127676486969, + "step": 1157 + }, + { + "epoch": 0.4081057268722467, + "grad_norm": 1.7062525426103168, + "learning_rate": 1.873405498327802e-05, + "loss": 0.8113895654678345, + "step": 1158 + }, + { + "epoch": 0.40845814977973566, + "grad_norm": 2.2841166697739266, + "learning_rate": 1.8731216083055373e-05, + "loss": 0.6294944286346436, + "step": 1159 + }, + { + "epoch": 0.40881057268722465, + "grad_norm": 1.7643300465666825, + "learning_rate": 1.87283742188566e-05, + "loss": 0.7024469375610352, + "step": 1160 + }, + { + "epoch": 0.40916299559471364, + "grad_norm": 1.6192702903054457, + "learning_rate": 1.8725529391646413e-05, + "loss": 0.6593793034553528, + "step": 1161 + }, + { + "epoch": 0.40951541850220263, + "grad_norm": 1.491465083071803, + "learning_rate": 1.8722681602390548e-05, + "loss": 0.72177654504776, + "step": 1162 + }, + { + "epoch": 0.4098678414096916, + "grad_norm": 1.5089448151062697, + "learning_rate": 1.8719830852055736e-05, + "loss": 0.7099393606185913, + "step": 1163 + }, + { + "epoch": 0.4102202643171806, + "grad_norm": 1.3870038981594819, + "learning_rate": 1.871697714160972e-05, + "loss": 0.6221687197685242, + "step": 1164 + }, + { + "epoch": 0.4105726872246696, + "grad_norm": 1.6034975452453926, + "learning_rate": 1.8714120472021252e-05, + "loss": 0.7236911058425903, + "step": 1165 + }, + { + "epoch": 0.4109251101321586, + "grad_norm": 1.6733335742616042, + "learning_rate": 1.8711260844260072e-05, + "loss": 0.6777583360671997, + "step": 1166 + }, + { + "epoch": 0.4112775330396476, + "grad_norm": 1.2685396486773262, + "learning_rate": 1.870839825929694e-05, + "loss": 0.6408713459968567, + "step": 1167 + }, + { + "epoch": 0.4116299559471366, + "grad_norm": 1.5501797457897155, + "learning_rate": 1.870553271810362e-05, + "loss": 0.6081968545913696, + "step": 1168 + }, + { + "epoch": 0.4119823788546256, + "grad_norm": 1.324315376857478, + "learning_rate": 1.8702664221652864e-05, + "loss": 0.7269757986068726, + "step": 1169 + }, + { + "epoch": 0.4123348017621145, + "grad_norm": 1.359571395974998, + "learning_rate": 1.8699792770918443e-05, + "loss": 0.6563149094581604, + "step": 1170 + }, + { + "epoch": 0.4126872246696035, + "grad_norm": 1.412304869808958, + "learning_rate": 1.8696918366875123e-05, + "loss": 0.6900039911270142, + "step": 1171 + }, + { + "epoch": 0.4130396475770925, + "grad_norm": 1.6666238046463622, + "learning_rate": 1.869404101049868e-05, + "loss": 0.6575014591217041, + "step": 1172 + }, + { + "epoch": 0.4133920704845815, + "grad_norm": 1.7453316480937289, + "learning_rate": 1.8691160702765878e-05, + "loss": 0.8178410530090332, + "step": 1173 + }, + { + "epoch": 0.4137444933920705, + "grad_norm": 1.2369225358107252, + "learning_rate": 1.8688277444654495e-05, + "loss": 0.6247331500053406, + "step": 1174 + }, + { + "epoch": 0.41409691629955947, + "grad_norm": 1.4809443864869283, + "learning_rate": 1.868539123714331e-05, + "loss": 0.7220792770385742, + "step": 1175 + }, + { + "epoch": 0.41444933920704846, + "grad_norm": 1.3133478143499064, + "learning_rate": 1.8682502081212104e-05, + "loss": 0.6279594302177429, + "step": 1176 + }, + { + "epoch": 0.41480176211453745, + "grad_norm": 1.9965951061666904, + "learning_rate": 1.8679609977841646e-05, + "loss": 0.8814467787742615, + "step": 1177 + }, + { + "epoch": 0.41515418502202645, + "grad_norm": 1.337413771448709, + "learning_rate": 1.867671492801372e-05, + "loss": 0.6601974368095398, + "step": 1178 + }, + { + "epoch": 0.41550660792951544, + "grad_norm": 1.5188708939818696, + "learning_rate": 1.8673816932711107e-05, + "loss": 0.7004785537719727, + "step": 1179 + }, + { + "epoch": 0.41585903083700443, + "grad_norm": 1.5057078901191085, + "learning_rate": 1.8670915992917586e-05, + "loss": 0.7409330606460571, + "step": 1180 + }, + { + "epoch": 0.41621145374449336, + "grad_norm": 1.4232223858260633, + "learning_rate": 1.8668012109617933e-05, + "loss": 0.6698065996170044, + "step": 1181 + }, + { + "epoch": 0.41656387665198236, + "grad_norm": 1.5925482634189316, + "learning_rate": 1.8665105283797927e-05, + "loss": 0.7420671582221985, + "step": 1182 + }, + { + "epoch": 0.41691629955947135, + "grad_norm": 1.5560634478711484, + "learning_rate": 1.8662195516444345e-05, + "loss": 0.7719774842262268, + "step": 1183 + }, + { + "epoch": 0.41726872246696034, + "grad_norm": 1.4792437797078573, + "learning_rate": 1.8659282808544966e-05, + "loss": 0.6206108331680298, + "step": 1184 + }, + { + "epoch": 0.41762114537444933, + "grad_norm": 1.3470893025550628, + "learning_rate": 1.865636716108856e-05, + "loss": 0.799741268157959, + "step": 1185 + }, + { + "epoch": 0.4179735682819383, + "grad_norm": 1.419455186886867, + "learning_rate": 1.8653448575064893e-05, + "loss": 0.6839771866798401, + "step": 1186 + }, + { + "epoch": 0.4183259911894273, + "grad_norm": 1.4763673797370565, + "learning_rate": 1.8650527051464744e-05, + "loss": 0.7937930822372437, + "step": 1187 + }, + { + "epoch": 0.4186784140969163, + "grad_norm": 2.8190993538517524, + "learning_rate": 1.8647602591279873e-05, + "loss": 0.6819020509719849, + "step": 1188 + }, + { + "epoch": 0.4190308370044053, + "grad_norm": 1.3567646132379503, + "learning_rate": 1.864467519550305e-05, + "loss": 0.75614994764328, + "step": 1189 + }, + { + "epoch": 0.4193832599118943, + "grad_norm": 1.567742841021855, + "learning_rate": 1.864174486512803e-05, + "loss": 0.6966177225112915, + "step": 1190 + }, + { + "epoch": 0.4197356828193833, + "grad_norm": 1.7710714107881367, + "learning_rate": 1.8638811601149568e-05, + "loss": 0.821509838104248, + "step": 1191 + }, + { + "epoch": 0.4200881057268723, + "grad_norm": 1.2328562386437087, + "learning_rate": 1.8635875404563414e-05, + "loss": 0.5905138254165649, + "step": 1192 + }, + { + "epoch": 0.4204405286343612, + "grad_norm": 1.4647056442197128, + "learning_rate": 1.8632936276366323e-05, + "loss": 0.6856247186660767, + "step": 1193 + }, + { + "epoch": 0.4207929515418502, + "grad_norm": 1.4886760353067057, + "learning_rate": 1.862999421755603e-05, + "loss": 0.745036244392395, + "step": 1194 + }, + { + "epoch": 0.4211453744493392, + "grad_norm": 1.1750279689329006, + "learning_rate": 1.8627049229131276e-05, + "loss": 0.6503005027770996, + "step": 1195 + }, + { + "epoch": 0.4214977973568282, + "grad_norm": 1.5431880343600168, + "learning_rate": 1.86241013120918e-05, + "loss": 0.7498307228088379, + "step": 1196 + }, + { + "epoch": 0.4218502202643172, + "grad_norm": 1.3468463845976426, + "learning_rate": 1.862115046743831e-05, + "loss": 0.7928652763366699, + "step": 1197 + }, + { + "epoch": 0.42220264317180617, + "grad_norm": 1.2342083264732957, + "learning_rate": 1.861819669617254e-05, + "loss": 0.6854137182235718, + "step": 1198 + }, + { + "epoch": 0.42255506607929516, + "grad_norm": 1.2078818370142543, + "learning_rate": 1.86152399992972e-05, + "loss": 0.6196715831756592, + "step": 1199 + }, + { + "epoch": 0.42290748898678415, + "grad_norm": 1.3970249114344502, + "learning_rate": 1.8612280377816e-05, + "loss": 0.6937464475631714, + "step": 1200 + }, + { + "epoch": 0.42325991189427314, + "grad_norm": 1.68603514212184, + "learning_rate": 1.860931783273363e-05, + "loss": 0.7681070566177368, + "step": 1201 + }, + { + "epoch": 0.42361233480176214, + "grad_norm": 1.1472443629032707, + "learning_rate": 1.860635236505579e-05, + "loss": 0.676302969455719, + "step": 1202 + }, + { + "epoch": 0.4239647577092511, + "grad_norm": 1.3856112594345633, + "learning_rate": 1.8603383975789168e-05, + "loss": 0.6533253192901611, + "step": 1203 + }, + { + "epoch": 0.42431718061674006, + "grad_norm": 1.3469284337535972, + "learning_rate": 1.860041266594143e-05, + "loss": 0.689995288848877, + "step": 1204 + }, + { + "epoch": 0.42466960352422906, + "grad_norm": 1.5007772835228577, + "learning_rate": 1.859743843652124e-05, + "loss": 0.8129922747612, + "step": 1205 + }, + { + "epoch": 0.42502202643171805, + "grad_norm": 1.5410683437680426, + "learning_rate": 1.859446128853827e-05, + "loss": 0.8388077616691589, + "step": 1206 + }, + { + "epoch": 0.42537444933920704, + "grad_norm": 1.5558529097869003, + "learning_rate": 1.859148122300316e-05, + "loss": 0.8795225024223328, + "step": 1207 + }, + { + "epoch": 0.42572687224669603, + "grad_norm": 1.1213374735945745, + "learning_rate": 1.858849824092755e-05, + "loss": 0.7340251803398132, + "step": 1208 + }, + { + "epoch": 0.426079295154185, + "grad_norm": 1.4951423694810024, + "learning_rate": 1.8585512343324073e-05, + "loss": 0.8028355240821838, + "step": 1209 + }, + { + "epoch": 0.426431718061674, + "grad_norm": 1.4585659256901293, + "learning_rate": 1.8582523531206345e-05, + "loss": 0.8469998836517334, + "step": 1210 + }, + { + "epoch": 0.426784140969163, + "grad_norm": 1.5383443322846213, + "learning_rate": 1.857953180558898e-05, + "loss": 0.7562716007232666, + "step": 1211 + }, + { + "epoch": 0.427136563876652, + "grad_norm": 1.4113837543209433, + "learning_rate": 1.857653716748757e-05, + "loss": 0.7166177034378052, + "step": 1212 + }, + { + "epoch": 0.427488986784141, + "grad_norm": 1.5418199345701933, + "learning_rate": 1.85735396179187e-05, + "loss": 0.6946159601211548, + "step": 1213 + }, + { + "epoch": 0.42784140969163, + "grad_norm": 1.317478160039542, + "learning_rate": 1.8570539157899953e-05, + "loss": 0.5341482758522034, + "step": 1214 + }, + { + "epoch": 0.4281938325991189, + "grad_norm": 1.4287482623115888, + "learning_rate": 1.8567535788449886e-05, + "loss": 0.8128249645233154, + "step": 1215 + }, + { + "epoch": 0.4285462555066079, + "grad_norm": 1.34325298688053, + "learning_rate": 1.8564529510588046e-05, + "loss": 0.7136335372924805, + "step": 1216 + }, + { + "epoch": 0.4288986784140969, + "grad_norm": 1.358163949395023, + "learning_rate": 1.856152032533498e-05, + "loss": 0.6737562417984009, + "step": 1217 + }, + { + "epoch": 0.4292511013215859, + "grad_norm": 1.306172251281951, + "learning_rate": 1.855850823371221e-05, + "loss": 0.8102772235870361, + "step": 1218 + }, + { + "epoch": 0.4296035242290749, + "grad_norm": 1.4109010281873726, + "learning_rate": 1.855549323674224e-05, + "loss": 0.7389130592346191, + "step": 1219 + }, + { + "epoch": 0.4299559471365639, + "grad_norm": 1.6519920374913426, + "learning_rate": 1.8552475335448575e-05, + "loss": 0.9127305746078491, + "step": 1220 + }, + { + "epoch": 0.43030837004405287, + "grad_norm": 1.4401162301668198, + "learning_rate": 1.8549454530855697e-05, + "loss": 0.7599691152572632, + "step": 1221 + }, + { + "epoch": 0.43066079295154186, + "grad_norm": 1.59834239528244, + "learning_rate": 1.8546430823989075e-05, + "loss": 0.8343819379806519, + "step": 1222 + }, + { + "epoch": 0.43101321585903085, + "grad_norm": 1.7081796080725813, + "learning_rate": 1.8543404215875163e-05, + "loss": 0.7759256362915039, + "step": 1223 + }, + { + "epoch": 0.43136563876651984, + "grad_norm": 1.3364188660639875, + "learning_rate": 1.8540374707541398e-05, + "loss": 0.7803373336791992, + "step": 1224 + }, + { + "epoch": 0.43171806167400884, + "grad_norm": 1.4538494145578122, + "learning_rate": 1.8537342300016208e-05, + "loss": 0.6292921304702759, + "step": 1225 + }, + { + "epoch": 0.43207048458149777, + "grad_norm": 1.4521641959343445, + "learning_rate": 1.8534306994329e-05, + "loss": 0.8495175838470459, + "step": 1226 + }, + { + "epoch": 0.43242290748898676, + "grad_norm": 1.3062742481146943, + "learning_rate": 1.8531268791510167e-05, + "loss": 0.6141406297683716, + "step": 1227 + }, + { + "epoch": 0.43277533039647575, + "grad_norm": 1.576341879030456, + "learning_rate": 1.8528227692591076e-05, + "loss": 0.7087793350219727, + "step": 1228 + }, + { + "epoch": 0.43312775330396475, + "grad_norm": 1.5442094308389636, + "learning_rate": 1.8525183698604098e-05, + "loss": 0.7919498682022095, + "step": 1229 + }, + { + "epoch": 0.43348017621145374, + "grad_norm": 1.317139155945084, + "learning_rate": 1.8522136810582563e-05, + "loss": 0.7408226728439331, + "step": 1230 + }, + { + "epoch": 0.43383259911894273, + "grad_norm": 1.407715848952146, + "learning_rate": 1.85190870295608e-05, + "loss": 0.7140083312988281, + "step": 1231 + }, + { + "epoch": 0.4341850220264317, + "grad_norm": 1.4117801977693214, + "learning_rate": 1.8516034356574118e-05, + "loss": 0.7211521863937378, + "step": 1232 + }, + { + "epoch": 0.4345374449339207, + "grad_norm": 1.1753876244240768, + "learning_rate": 1.85129787926588e-05, + "loss": 0.7103208303451538, + "step": 1233 + }, + { + "epoch": 0.4348898678414097, + "grad_norm": 1.4479636604064312, + "learning_rate": 1.850992033885211e-05, + "loss": 0.816985011100769, + "step": 1234 + }, + { + "epoch": 0.4352422907488987, + "grad_norm": 1.4368000528699751, + "learning_rate": 1.850685899619231e-05, + "loss": 0.6678498983383179, + "step": 1235 + }, + { + "epoch": 0.4355947136563877, + "grad_norm": 1.4259303259837681, + "learning_rate": 1.8503794765718622e-05, + "loss": 0.7895394563674927, + "step": 1236 + }, + { + "epoch": 0.4359471365638766, + "grad_norm": 1.4256180200365283, + "learning_rate": 1.8500727648471258e-05, + "loss": 0.7295971512794495, + "step": 1237 + }, + { + "epoch": 0.4362995594713656, + "grad_norm": 1.552299015894991, + "learning_rate": 1.849765764549141e-05, + "loss": 0.7216300964355469, + "step": 1238 + }, + { + "epoch": 0.4366519823788546, + "grad_norm": 2.585430848560662, + "learning_rate": 1.8494584757821252e-05, + "loss": 0.8088986873626709, + "step": 1239 + }, + { + "epoch": 0.4370044052863436, + "grad_norm": 1.3100612400703413, + "learning_rate": 1.8491508986503928e-05, + "loss": 0.7380663156509399, + "step": 1240 + }, + { + "epoch": 0.4373568281938326, + "grad_norm": 1.6225248085666293, + "learning_rate": 1.8488430332583566e-05, + "loss": 0.8671622276306152, + "step": 1241 + }, + { + "epoch": 0.4377092511013216, + "grad_norm": 1.2548349586148027, + "learning_rate": 1.8485348797105277e-05, + "loss": 0.6649274826049805, + "step": 1242 + }, + { + "epoch": 0.4380616740088106, + "grad_norm": 1.3492988450242405, + "learning_rate": 1.848226438111515e-05, + "loss": 0.740972638130188, + "step": 1243 + }, + { + "epoch": 0.43841409691629957, + "grad_norm": 1.4062352938849376, + "learning_rate": 1.8479177085660237e-05, + "loss": 0.6593915820121765, + "step": 1244 + }, + { + "epoch": 0.43876651982378856, + "grad_norm": 1.567811244473075, + "learning_rate": 1.8476086911788588e-05, + "loss": 0.792604923248291, + "step": 1245 + }, + { + "epoch": 0.43911894273127755, + "grad_norm": 1.583820790059346, + "learning_rate": 1.8472993860549216e-05, + "loss": 0.7521885633468628, + "step": 1246 + }, + { + "epoch": 0.43947136563876654, + "grad_norm": 1.4520072830804587, + "learning_rate": 1.846989793299212e-05, + "loss": 0.7242246270179749, + "step": 1247 + }, + { + "epoch": 0.43982378854625553, + "grad_norm": 1.2892821056189339, + "learning_rate": 1.846679913016827e-05, + "loss": 0.7343394160270691, + "step": 1248 + }, + { + "epoch": 0.44017621145374447, + "grad_norm": 1.2525729631593605, + "learning_rate": 1.846369745312961e-05, + "loss": 0.747876763343811, + "step": 1249 + }, + { + "epoch": 0.44052863436123346, + "grad_norm": 1.428983542355963, + "learning_rate": 1.8460592902929064e-05, + "loss": 0.7280946969985962, + "step": 1250 + }, + { + "epoch": 0.44088105726872245, + "grad_norm": 1.4254243168735732, + "learning_rate": 1.845748548062053e-05, + "loss": 0.7288519144058228, + "step": 1251 + }, + { + "epoch": 0.44123348017621145, + "grad_norm": 1.4847519735948493, + "learning_rate": 1.8454375187258885e-05, + "loss": 0.6269914507865906, + "step": 1252 + }, + { + "epoch": 0.44158590308370044, + "grad_norm": 1.5355271633317282, + "learning_rate": 1.8451262023899973e-05, + "loss": 0.7848949432373047, + "step": 1253 + }, + { + "epoch": 0.44193832599118943, + "grad_norm": 1.580356922946946, + "learning_rate": 1.8448145991600618e-05, + "loss": 0.7306517958641052, + "step": 1254 + }, + { + "epoch": 0.4422907488986784, + "grad_norm": 1.3971874565683924, + "learning_rate": 1.8445027091418614e-05, + "loss": 0.6933906078338623, + "step": 1255 + }, + { + "epoch": 0.4426431718061674, + "grad_norm": 1.2942221540854206, + "learning_rate": 1.8441905324412732e-05, + "loss": 0.8260579109191895, + "step": 1256 + }, + { + "epoch": 0.4429955947136564, + "grad_norm": 1.4276139754434451, + "learning_rate": 1.8438780691642712e-05, + "loss": 0.6818344593048096, + "step": 1257 + }, + { + "epoch": 0.4433480176211454, + "grad_norm": 1.5571344695334373, + "learning_rate": 1.8435653194169274e-05, + "loss": 0.5980014801025391, + "step": 1258 + }, + { + "epoch": 0.4437004405286344, + "grad_norm": 1.6363647319534165, + "learning_rate": 1.8432522833054102e-05, + "loss": 0.7694655656814575, + "step": 1259 + }, + { + "epoch": 0.4440528634361233, + "grad_norm": 1.4888452953161495, + "learning_rate": 1.842938960935986e-05, + "loss": 0.6861646771430969, + "step": 1260 + }, + { + "epoch": 0.4444052863436123, + "grad_norm": 1.5245731543783476, + "learning_rate": 1.8426253524150176e-05, + "loss": 0.7346323728561401, + "step": 1261 + }, + { + "epoch": 0.4447577092511013, + "grad_norm": 1.5555183873270297, + "learning_rate": 1.8423114578489657e-05, + "loss": 0.7116265296936035, + "step": 1262 + }, + { + "epoch": 0.4451101321585903, + "grad_norm": 1.3587295641859045, + "learning_rate": 1.8419972773443877e-05, + "loss": 0.7148594856262207, + "step": 1263 + }, + { + "epoch": 0.4454625550660793, + "grad_norm": 1.4208610042885819, + "learning_rate": 1.8416828110079378e-05, + "loss": 0.6629737615585327, + "step": 1264 + }, + { + "epoch": 0.4458149779735683, + "grad_norm": 1.2215430932959532, + "learning_rate": 1.8413680589463677e-05, + "loss": 0.5734454393386841, + "step": 1265 + }, + { + "epoch": 0.4461674008810573, + "grad_norm": 1.4728067026699625, + "learning_rate": 1.8410530212665258e-05, + "loss": 0.8129212260246277, + "step": 1266 + }, + { + "epoch": 0.44651982378854627, + "grad_norm": 1.5823039225136746, + "learning_rate": 1.8407376980753578e-05, + "loss": 0.7408754825592041, + "step": 1267 + }, + { + "epoch": 0.44687224669603526, + "grad_norm": 2.9520848026313633, + "learning_rate": 1.840422089479906e-05, + "loss": 0.7315034866333008, + "step": 1268 + }, + { + "epoch": 0.44722466960352425, + "grad_norm": 1.453693040198655, + "learning_rate": 1.8401061955873102e-05, + "loss": 0.6774684190750122, + "step": 1269 + }, + { + "epoch": 0.44757709251101324, + "grad_norm": 1.4189733125983666, + "learning_rate": 1.8397900165048055e-05, + "loss": 0.6615294814109802, + "step": 1270 + }, + { + "epoch": 0.4479295154185022, + "grad_norm": 1.465563156151872, + "learning_rate": 1.8394735523397258e-05, + "loss": 0.6757136583328247, + "step": 1271 + }, + { + "epoch": 0.44828193832599117, + "grad_norm": 1.3581337883847424, + "learning_rate": 1.8391568031995004e-05, + "loss": 0.6395466327667236, + "step": 1272 + }, + { + "epoch": 0.44863436123348016, + "grad_norm": 1.3957918327614203, + "learning_rate": 1.8388397691916556e-05, + "loss": 0.6436404585838318, + "step": 1273 + }, + { + "epoch": 0.44898678414096915, + "grad_norm": 1.2217258095016672, + "learning_rate": 1.838522450423815e-05, + "loss": 0.6280484199523926, + "step": 1274 + }, + { + "epoch": 0.44933920704845814, + "grad_norm": 1.3831470857016404, + "learning_rate": 1.8382048470036983e-05, + "loss": 0.7485225200653076, + "step": 1275 + }, + { + "epoch": 0.44969162995594714, + "grad_norm": 1.5437699808102354, + "learning_rate": 1.8378869590391217e-05, + "loss": 0.745079517364502, + "step": 1276 + }, + { + "epoch": 0.45004405286343613, + "grad_norm": 1.5902187054867891, + "learning_rate": 1.8375687866379988e-05, + "loss": 0.656510591506958, + "step": 1277 + }, + { + "epoch": 0.4503964757709251, + "grad_norm": 1.542738255105748, + "learning_rate": 1.8372503299083392e-05, + "loss": 0.7122445106506348, + "step": 1278 + }, + { + "epoch": 0.4507488986784141, + "grad_norm": 1.5368544285826038, + "learning_rate": 1.8369315889582483e-05, + "loss": 0.7402621507644653, + "step": 1279 + }, + { + "epoch": 0.4511013215859031, + "grad_norm": 1.539047411882563, + "learning_rate": 1.8366125638959292e-05, + "loss": 0.79311203956604, + "step": 1280 + }, + { + "epoch": 0.4514537444933921, + "grad_norm": 1.5615114889746888, + "learning_rate": 1.8362932548296815e-05, + "loss": 0.7748456001281738, + "step": 1281 + }, + { + "epoch": 0.45180616740088103, + "grad_norm": 1.4203050333533118, + "learning_rate": 1.8359736618679e-05, + "loss": 0.8285728096961975, + "step": 1282 + }, + { + "epoch": 0.45215859030837, + "grad_norm": 1.5541412727714081, + "learning_rate": 1.835653785119076e-05, + "loss": 0.7874733209609985, + "step": 1283 + }, + { + "epoch": 0.452511013215859, + "grad_norm": 1.990742110424804, + "learning_rate": 1.8353336246917996e-05, + "loss": 0.8984566926956177, + "step": 1284 + }, + { + "epoch": 0.452863436123348, + "grad_norm": 1.5779572276747513, + "learning_rate": 1.8350131806947537e-05, + "loss": 0.7730413675308228, + "step": 1285 + }, + { + "epoch": 0.453215859030837, + "grad_norm": 1.7109096071986905, + "learning_rate": 1.8346924532367195e-05, + "loss": 0.6064612865447998, + "step": 1286 + }, + { + "epoch": 0.453568281938326, + "grad_norm": 1.2417304411100711, + "learning_rate": 1.8343714424265742e-05, + "loss": 0.6946402192115784, + "step": 1287 + }, + { + "epoch": 0.453920704845815, + "grad_norm": 1.4035686433407615, + "learning_rate": 1.8340501483732908e-05, + "loss": 0.6131751537322998, + "step": 1288 + }, + { + "epoch": 0.454273127753304, + "grad_norm": 1.5800587203565855, + "learning_rate": 1.833728571185938e-05, + "loss": 0.7251182794570923, + "step": 1289 + }, + { + "epoch": 0.45462555066079297, + "grad_norm": 1.4036983560957499, + "learning_rate": 1.8334067109736826e-05, + "loss": 0.6548069715499878, + "step": 1290 + }, + { + "epoch": 0.45497797356828196, + "grad_norm": 1.3998869795024185, + "learning_rate": 1.833084567845785e-05, + "loss": 0.7416098117828369, + "step": 1291 + }, + { + "epoch": 0.45533039647577095, + "grad_norm": 2.508404695128388, + "learning_rate": 1.8327621419116034e-05, + "loss": 0.7320964932441711, + "step": 1292 + }, + { + "epoch": 0.4556828193832599, + "grad_norm": 1.3052290617356537, + "learning_rate": 1.8324394332805913e-05, + "loss": 0.5926196575164795, + "step": 1293 + }, + { + "epoch": 0.4560352422907489, + "grad_norm": 1.5674410721277312, + "learning_rate": 1.8321164420622977e-05, + "loss": 0.5294085741043091, + "step": 1294 + }, + { + "epoch": 0.45638766519823787, + "grad_norm": 1.2785938430138426, + "learning_rate": 1.8317931683663688e-05, + "loss": 0.6332723498344421, + "step": 1295 + }, + { + "epoch": 0.45674008810572686, + "grad_norm": 1.5962686180302166, + "learning_rate": 1.8314696123025456e-05, + "loss": 0.8361148834228516, + "step": 1296 + }, + { + "epoch": 0.45709251101321585, + "grad_norm": 1.4587382180744954, + "learning_rate": 1.8311457739806648e-05, + "loss": 0.8097354173660278, + "step": 1297 + }, + { + "epoch": 0.45744493392070484, + "grad_norm": 1.5247898400944095, + "learning_rate": 1.8308216535106606e-05, + "loss": 0.8619102239608765, + "step": 1298 + }, + { + "epoch": 0.45779735682819384, + "grad_norm": 1.7222438621078806, + "learning_rate": 1.8304972510025607e-05, + "loss": 0.8149014711380005, + "step": 1299 + }, + { + "epoch": 0.4581497797356828, + "grad_norm": 1.4821216839710079, + "learning_rate": 1.8301725665664904e-05, + "loss": 0.6217210292816162, + "step": 1300 + }, + { + "epoch": 0.4585022026431718, + "grad_norm": 1.3606031472973286, + "learning_rate": 1.8298476003126695e-05, + "loss": 0.7496612071990967, + "step": 1301 + }, + { + "epoch": 0.4588546255506608, + "grad_norm": 1.3221676149271377, + "learning_rate": 1.8295223523514144e-05, + "loss": 0.743242084980011, + "step": 1302 + }, + { + "epoch": 0.4592070484581498, + "grad_norm": 1.3745674408132749, + "learning_rate": 1.829196822793136e-05, + "loss": 0.6425061821937561, + "step": 1303 + }, + { + "epoch": 0.4595594713656388, + "grad_norm": 1.6216951689157317, + "learning_rate": 1.828871011748342e-05, + "loss": 0.8274835348129272, + "step": 1304 + }, + { + "epoch": 0.45991189427312773, + "grad_norm": 1.2722833909738493, + "learning_rate": 1.828544919327635e-05, + "loss": 0.6403865814208984, + "step": 1305 + }, + { + "epoch": 0.4602643171806167, + "grad_norm": 1.234115960449283, + "learning_rate": 1.828218545641713e-05, + "loss": 0.6585257053375244, + "step": 1306 + }, + { + "epoch": 0.4606167400881057, + "grad_norm": 1.2325421263478973, + "learning_rate": 1.82789189080137e-05, + "loss": 0.6467862129211426, + "step": 1307 + }, + { + "epoch": 0.4609691629955947, + "grad_norm": 1.403654297681647, + "learning_rate": 1.827564954917495e-05, + "loss": 0.8656524419784546, + "step": 1308 + }, + { + "epoch": 0.4613215859030837, + "grad_norm": 1.449712147167455, + "learning_rate": 1.8272377381010726e-05, + "loss": 0.7298469543457031, + "step": 1309 + }, + { + "epoch": 0.4616740088105727, + "grad_norm": 1.575558340533703, + "learning_rate": 1.8269102404631826e-05, + "loss": 0.7342871427536011, + "step": 1310 + }, + { + "epoch": 0.4620264317180617, + "grad_norm": 1.4177026442874099, + "learning_rate": 1.8265824621150005e-05, + "loss": 0.7437269687652588, + "step": 1311 + }, + { + "epoch": 0.4623788546255507, + "grad_norm": 1.370008690924395, + "learning_rate": 1.8262544031677965e-05, + "loss": 0.6761496067047119, + "step": 1312 + }, + { + "epoch": 0.46273127753303966, + "grad_norm": 1.3488719018465838, + "learning_rate": 1.825926063732937e-05, + "loss": 0.6504565477371216, + "step": 1313 + }, + { + "epoch": 0.46308370044052866, + "grad_norm": 1.5002490307110308, + "learning_rate": 1.8255974439218826e-05, + "loss": 0.7058892250061035, + "step": 1314 + }, + { + "epoch": 0.46343612334801765, + "grad_norm": 1.37061056314256, + "learning_rate": 1.8252685438461893e-05, + "loss": 0.704500675201416, + "step": 1315 + }, + { + "epoch": 0.4637885462555066, + "grad_norm": 1.3921050444029468, + "learning_rate": 1.824939363617509e-05, + "loss": 0.7438445091247559, + "step": 1316 + }, + { + "epoch": 0.4641409691629956, + "grad_norm": 1.4372002500080507, + "learning_rate": 1.8246099033475872e-05, + "loss": 0.6610915660858154, + "step": 1317 + }, + { + "epoch": 0.46449339207048457, + "grad_norm": 1.0745723869419856, + "learning_rate": 1.8242801631482666e-05, + "loss": 0.5868711471557617, + "step": 1318 + }, + { + "epoch": 0.46484581497797356, + "grad_norm": 1.192238188456442, + "learning_rate": 1.8239501431314828e-05, + "loss": 0.7403215765953064, + "step": 1319 + }, + { + "epoch": 0.46519823788546255, + "grad_norm": 1.2444894883495399, + "learning_rate": 1.823619843409268e-05, + "loss": 0.6836927533149719, + "step": 1320 + }, + { + "epoch": 0.46555066079295154, + "grad_norm": 1.4619703465719247, + "learning_rate": 1.8232892640937482e-05, + "loss": 0.744488537311554, + "step": 1321 + }, + { + "epoch": 0.46590308370044053, + "grad_norm": 1.6337099192848834, + "learning_rate": 1.822958405297145e-05, + "loss": 0.8203051090240479, + "step": 1322 + }, + { + "epoch": 0.4662555066079295, + "grad_norm": 1.184261838198034, + "learning_rate": 1.8226272671317747e-05, + "loss": 0.6452913284301758, + "step": 1323 + }, + { + "epoch": 0.4666079295154185, + "grad_norm": 1.6458345614686154, + "learning_rate": 1.8222958497100482e-05, + "loss": 0.7362639307975769, + "step": 1324 + }, + { + "epoch": 0.4669603524229075, + "grad_norm": 6.608293048647877, + "learning_rate": 1.8219641531444713e-05, + "loss": 0.8192600011825562, + "step": 1325 + }, + { + "epoch": 0.4673127753303965, + "grad_norm": 1.4257376230679313, + "learning_rate": 1.8216321775476452e-05, + "loss": 0.8391410112380981, + "step": 1326 + }, + { + "epoch": 0.46766519823788544, + "grad_norm": 1.3133795307817668, + "learning_rate": 1.8212999230322648e-05, + "loss": 0.8723593354225159, + "step": 1327 + }, + { + "epoch": 0.46801762114537443, + "grad_norm": 1.4218119484201381, + "learning_rate": 1.8209673897111208e-05, + "loss": 0.6891233921051025, + "step": 1328 + }, + { + "epoch": 0.4683700440528634, + "grad_norm": 1.414801660380672, + "learning_rate": 1.820634577697097e-05, + "loss": 0.6585180759429932, + "step": 1329 + }, + { + "epoch": 0.4687224669603524, + "grad_norm": 1.503205293925671, + "learning_rate": 1.8203014871031732e-05, + "loss": 0.9556418657302856, + "step": 1330 + }, + { + "epoch": 0.4690748898678414, + "grad_norm": 1.491345239113851, + "learning_rate": 1.8199681180424234e-05, + "loss": 0.803380012512207, + "step": 1331 + }, + { + "epoch": 0.4694273127753304, + "grad_norm": 1.6217603270172032, + "learning_rate": 1.819634470628016e-05, + "loss": 0.7090115547180176, + "step": 1332 + }, + { + "epoch": 0.4697797356828194, + "grad_norm": 1.6705712009535991, + "learning_rate": 1.8193005449732134e-05, + "loss": 0.6314720511436462, + "step": 1333 + }, + { + "epoch": 0.4701321585903084, + "grad_norm": 1.4756439095691731, + "learning_rate": 1.8189663411913737e-05, + "loss": 0.72248375415802, + "step": 1334 + }, + { + "epoch": 0.47048458149779737, + "grad_norm": 1.2477075880097683, + "learning_rate": 1.818631859395948e-05, + "loss": 0.6192474961280823, + "step": 1335 + }, + { + "epoch": 0.47083700440528636, + "grad_norm": 1.4944381119847567, + "learning_rate": 1.818297099700483e-05, + "loss": 0.6354564428329468, + "step": 1336 + }, + { + "epoch": 0.47118942731277536, + "grad_norm": 1.3129251382794922, + "learning_rate": 1.817962062218619e-05, + "loss": 0.7577195167541504, + "step": 1337 + }, + { + "epoch": 0.4715418502202643, + "grad_norm": 1.5504293722974503, + "learning_rate": 1.8176267470640908e-05, + "loss": 0.8064994812011719, + "step": 1338 + }, + { + "epoch": 0.4718942731277533, + "grad_norm": 1.211182925950848, + "learning_rate": 1.8172911543507276e-05, + "loss": 0.5994154214859009, + "step": 1339 + }, + { + "epoch": 0.4722466960352423, + "grad_norm": 1.701641381957404, + "learning_rate": 1.8169552841924524e-05, + "loss": 0.7483634948730469, + "step": 1340 + }, + { + "epoch": 0.47259911894273127, + "grad_norm": 1.5346948984560977, + "learning_rate": 1.8166191367032828e-05, + "loss": 0.817699134349823, + "step": 1341 + }, + { + "epoch": 0.47295154185022026, + "grad_norm": 1.4634504483386954, + "learning_rate": 1.8162827119973305e-05, + "loss": 0.7262923717498779, + "step": 1342 + }, + { + "epoch": 0.47330396475770925, + "grad_norm": 1.6796646988667925, + "learning_rate": 1.8159460101888013e-05, + "loss": 0.6097851991653442, + "step": 1343 + }, + { + "epoch": 0.47365638766519824, + "grad_norm": 1.3148094915971675, + "learning_rate": 1.8156090313919944e-05, + "loss": 0.7258971929550171, + "step": 1344 + }, + { + "epoch": 0.47400881057268723, + "grad_norm": 1.198607169385478, + "learning_rate": 1.8152717757213045e-05, + "loss": 0.6300361156463623, + "step": 1345 + }, + { + "epoch": 0.4743612334801762, + "grad_norm": 1.397827708634256, + "learning_rate": 1.8149342432912184e-05, + "loss": 0.7339942455291748, + "step": 1346 + }, + { + "epoch": 0.4747136563876652, + "grad_norm": 1.4524082687419129, + "learning_rate": 1.8145964342163188e-05, + "loss": 0.7520095109939575, + "step": 1347 + }, + { + "epoch": 0.4750660792951542, + "grad_norm": 1.6587168399408485, + "learning_rate": 1.814258348611281e-05, + "loss": 0.7276853322982788, + "step": 1348 + }, + { + "epoch": 0.47541850220264315, + "grad_norm": 1.4463166573664321, + "learning_rate": 1.8139199865908742e-05, + "loss": 0.8004029989242554, + "step": 1349 + }, + { + "epoch": 0.47577092511013214, + "grad_norm": 1.4508723815154267, + "learning_rate": 1.8135813482699623e-05, + "loss": 0.6932536363601685, + "step": 1350 + }, + { + "epoch": 0.47612334801762113, + "grad_norm": 1.8868515127553653, + "learning_rate": 1.8132424337635026e-05, + "loss": 0.7697082161903381, + "step": 1351 + }, + { + "epoch": 0.4764757709251101, + "grad_norm": 1.246718000700102, + "learning_rate": 1.8129032431865453e-05, + "loss": 0.6472513675689697, + "step": 1352 + }, + { + "epoch": 0.4768281938325991, + "grad_norm": 1.413046013449196, + "learning_rate": 1.8125637766542353e-05, + "loss": 0.6483110785484314, + "step": 1353 + }, + { + "epoch": 0.4771806167400881, + "grad_norm": 1.4854860856809686, + "learning_rate": 1.8122240342818113e-05, + "loss": 0.5495485067367554, + "step": 1354 + }, + { + "epoch": 0.4775330396475771, + "grad_norm": 1.2801602602197804, + "learning_rate": 1.811884016184605e-05, + "loss": 0.5235577821731567, + "step": 1355 + }, + { + "epoch": 0.4778854625550661, + "grad_norm": 1.734412256759482, + "learning_rate": 1.811543722478042e-05, + "loss": 0.7852121591567993, + "step": 1356 + }, + { + "epoch": 0.4782378854625551, + "grad_norm": 1.3650060645350073, + "learning_rate": 1.811203153277641e-05, + "loss": 0.6704862713813782, + "step": 1357 + }, + { + "epoch": 0.47859030837004407, + "grad_norm": 1.6553040991032588, + "learning_rate": 1.8108623086990156e-05, + "loss": 0.5964453220367432, + "step": 1358 + }, + { + "epoch": 0.47894273127753306, + "grad_norm": 1.3936312619950861, + "learning_rate": 1.8105211888578708e-05, + "loss": 0.6697995066642761, + "step": 1359 + }, + { + "epoch": 0.479295154185022, + "grad_norm": 1.5031130965144783, + "learning_rate": 1.810179793870007e-05, + "loss": 0.6335821151733398, + "step": 1360 + }, + { + "epoch": 0.479647577092511, + "grad_norm": 1.5635708705560234, + "learning_rate": 1.8098381238513173e-05, + "loss": 0.7925145626068115, + "step": 1361 + }, + { + "epoch": 0.48, + "grad_norm": 1.3011545804458011, + "learning_rate": 1.809496178917787e-05, + "loss": 0.6567563414573669, + "step": 1362 + }, + { + "epoch": 0.480352422907489, + "grad_norm": 1.6816341182204335, + "learning_rate": 1.809153959185497e-05, + "loss": 0.6318811178207397, + "step": 1363 + }, + { + "epoch": 0.48070484581497797, + "grad_norm": 1.6781349693525882, + "learning_rate": 1.8088114647706195e-05, + "loss": 0.7309727668762207, + "step": 1364 + }, + { + "epoch": 0.48105726872246696, + "grad_norm": 1.689289351270497, + "learning_rate": 1.8084686957894207e-05, + "loss": 0.7109836339950562, + "step": 1365 + }, + { + "epoch": 0.48140969162995595, + "grad_norm": 1.5638040238741844, + "learning_rate": 1.8081256523582604e-05, + "loss": 0.7475707530975342, + "step": 1366 + }, + { + "epoch": 0.48176211453744494, + "grad_norm": 1.492251829838995, + "learning_rate": 1.8077823345935904e-05, + "loss": 0.7149914503097534, + "step": 1367 + }, + { + "epoch": 0.48211453744493393, + "grad_norm": 1.5575297411632822, + "learning_rate": 1.8074387426119574e-05, + "loss": 0.7294478416442871, + "step": 1368 + }, + { + "epoch": 0.4824669603524229, + "grad_norm": 1.4689289799329066, + "learning_rate": 1.8070948765299995e-05, + "loss": 0.7115635871887207, + "step": 1369 + }, + { + "epoch": 0.4828193832599119, + "grad_norm": 1.5506146763507274, + "learning_rate": 1.806750736464449e-05, + "loss": 0.7046270966529846, + "step": 1370 + }, + { + "epoch": 0.4831718061674009, + "grad_norm": 1.42427078791196, + "learning_rate": 1.8064063225321305e-05, + "loss": 0.6206589937210083, + "step": 1371 + }, + { + "epoch": 0.48352422907488984, + "grad_norm": 1.2993997688945442, + "learning_rate": 1.8060616348499612e-05, + "loss": 0.7135940194129944, + "step": 1372 + }, + { + "epoch": 0.48387665198237884, + "grad_norm": 1.4643276104475023, + "learning_rate": 1.8057166735349533e-05, + "loss": 0.8360849618911743, + "step": 1373 + }, + { + "epoch": 0.4842290748898678, + "grad_norm": 1.541707883618089, + "learning_rate": 1.805371438704209e-05, + "loss": 0.6842360496520996, + "step": 1374 + }, + { + "epoch": 0.4845814977973568, + "grad_norm": 1.5840103891509227, + "learning_rate": 1.8050259304749254e-05, + "loss": 0.7615031003952026, + "step": 1375 + }, + { + "epoch": 0.4849339207048458, + "grad_norm": 1.655426139564667, + "learning_rate": 1.804680148964392e-05, + "loss": 0.8019323348999023, + "step": 1376 + }, + { + "epoch": 0.4852863436123348, + "grad_norm": 1.4674909380859245, + "learning_rate": 1.8043340942899906e-05, + "loss": 0.7882958054542542, + "step": 1377 + }, + { + "epoch": 0.4856387665198238, + "grad_norm": 1.2708485815687132, + "learning_rate": 1.8039877665691955e-05, + "loss": 0.7504314184188843, + "step": 1378 + }, + { + "epoch": 0.4859911894273128, + "grad_norm": 1.4258268452315883, + "learning_rate": 1.803641165919575e-05, + "loss": 0.6634547710418701, + "step": 1379 + }, + { + "epoch": 0.4863436123348018, + "grad_norm": 1.5654330696713128, + "learning_rate": 1.803294292458789e-05, + "loss": 0.7744965553283691, + "step": 1380 + }, + { + "epoch": 0.48669603524229077, + "grad_norm": 1.4537322254817193, + "learning_rate": 1.8029471463045904e-05, + "loss": 0.6322098970413208, + "step": 1381 + }, + { + "epoch": 0.48704845814977976, + "grad_norm": 1.4410033770501562, + "learning_rate": 1.8025997275748237e-05, + "loss": 0.7675940990447998, + "step": 1382 + }, + { + "epoch": 0.4874008810572687, + "grad_norm": 1.4138967124963124, + "learning_rate": 1.8022520363874275e-05, + "loss": 0.805001974105835, + "step": 1383 + }, + { + "epoch": 0.4877533039647577, + "grad_norm": 1.4447058519334661, + "learning_rate": 1.8019040728604322e-05, + "loss": 0.7647902369499207, + "step": 1384 + }, + { + "epoch": 0.4881057268722467, + "grad_norm": 1.4676048165311881, + "learning_rate": 1.8015558371119604e-05, + "loss": 0.7267208099365234, + "step": 1385 + }, + { + "epoch": 0.4884581497797357, + "grad_norm": 1.698344162431053, + "learning_rate": 1.801207329260227e-05, + "loss": 0.9259899854660034, + "step": 1386 + }, + { + "epoch": 0.48881057268722466, + "grad_norm": 1.6007666753359713, + "learning_rate": 1.8008585494235398e-05, + "loss": 0.7127895951271057, + "step": 1387 + }, + { + "epoch": 0.48916299559471366, + "grad_norm": 1.3612199688450533, + "learning_rate": 1.8005094977202987e-05, + "loss": 0.5890867710113525, + "step": 1388 + }, + { + "epoch": 0.48951541850220265, + "grad_norm": 1.355680060820382, + "learning_rate": 1.800160174268996e-05, + "loss": 0.9388052225112915, + "step": 1389 + }, + { + "epoch": 0.48986784140969164, + "grad_norm": 1.3938222391852138, + "learning_rate": 1.799810579188216e-05, + "loss": 0.7282747626304626, + "step": 1390 + }, + { + "epoch": 0.49022026431718063, + "grad_norm": 1.3481077360000804, + "learning_rate": 1.7994607125966354e-05, + "loss": 0.743558943271637, + "step": 1391 + }, + { + "epoch": 0.4905726872246696, + "grad_norm": 1.5830453320245632, + "learning_rate": 1.7991105746130234e-05, + "loss": 0.794719934463501, + "step": 1392 + }, + { + "epoch": 0.4909251101321586, + "grad_norm": 1.2758935421604947, + "learning_rate": 1.7987601653562402e-05, + "loss": 0.7320685982704163, + "step": 1393 + }, + { + "epoch": 0.49127753303964755, + "grad_norm": 1.7642547814838838, + "learning_rate": 1.798409484945239e-05, + "loss": 0.7376105785369873, + "step": 1394 + }, + { + "epoch": 0.49162995594713654, + "grad_norm": 1.2029848235346605, + "learning_rate": 1.7980585334990652e-05, + "loss": 0.7474706172943115, + "step": 1395 + }, + { + "epoch": 0.49198237885462553, + "grad_norm": 1.2018884579546327, + "learning_rate": 1.797707311136856e-05, + "loss": 0.5799805521965027, + "step": 1396 + }, + { + "epoch": 0.4923348017621145, + "grad_norm": 1.4260726798049534, + "learning_rate": 1.79735581797784e-05, + "loss": 0.7515959739685059, + "step": 1397 + }, + { + "epoch": 0.4926872246696035, + "grad_norm": 1.4843732287701579, + "learning_rate": 1.797004054141339e-05, + "loss": 0.6035799980163574, + "step": 1398 + }, + { + "epoch": 0.4930396475770925, + "grad_norm": 1.4699634461145672, + "learning_rate": 1.796652019746765e-05, + "loss": 0.7613668441772461, + "step": 1399 + }, + { + "epoch": 0.4933920704845815, + "grad_norm": 1.5395256627563776, + "learning_rate": 1.7962997149136226e-05, + "loss": 0.8780882954597473, + "step": 1400 + }, + { + "epoch": 0.4937444933920705, + "grad_norm": 1.4849311758521768, + "learning_rate": 1.795947139761509e-05, + "loss": 0.8661091327667236, + "step": 1401 + }, + { + "epoch": 0.4940969162995595, + "grad_norm": 1.2531714361223334, + "learning_rate": 1.7955942944101124e-05, + "loss": 0.6893571019172668, + "step": 1402 + }, + { + "epoch": 0.4944493392070485, + "grad_norm": 1.4079915487364913, + "learning_rate": 1.7952411789792125e-05, + "loss": 0.787032961845398, + "step": 1403 + }, + { + "epoch": 0.49480176211453747, + "grad_norm": 1.3474472991478739, + "learning_rate": 1.7948877935886812e-05, + "loss": 0.5346347689628601, + "step": 1404 + }, + { + "epoch": 0.4951541850220264, + "grad_norm": 1.5512557601329955, + "learning_rate": 1.7945341383584818e-05, + "loss": 0.8090060949325562, + "step": 1405 + }, + { + "epoch": 0.4955066079295154, + "grad_norm": 1.4268796756971738, + "learning_rate": 1.7941802134086695e-05, + "loss": 0.6321496963500977, + "step": 1406 + }, + { + "epoch": 0.4958590308370044, + "grad_norm": 1.4602718850691796, + "learning_rate": 1.7938260188593903e-05, + "loss": 0.6405632495880127, + "step": 1407 + }, + { + "epoch": 0.4962114537444934, + "grad_norm": 1.3838752085896924, + "learning_rate": 1.7934715548308825e-05, + "loss": 0.7665356397628784, + "step": 1408 + }, + { + "epoch": 0.49656387665198237, + "grad_norm": 1.6983169415711221, + "learning_rate": 1.7931168214434757e-05, + "loss": 0.7960416078567505, + "step": 1409 + }, + { + "epoch": 0.49691629955947136, + "grad_norm": 1.3842449461014021, + "learning_rate": 1.7927618188175908e-05, + "loss": 0.8080639839172363, + "step": 1410 + }, + { + "epoch": 0.49726872246696036, + "grad_norm": 1.3034648934851016, + "learning_rate": 1.79240654707374e-05, + "loss": 0.6503266096115112, + "step": 1411 + }, + { + "epoch": 0.49762114537444935, + "grad_norm": 1.3378534420648176, + "learning_rate": 1.792051006332527e-05, + "loss": 0.6063007116317749, + "step": 1412 + }, + { + "epoch": 0.49797356828193834, + "grad_norm": 1.849150255820523, + "learning_rate": 1.791695196714647e-05, + "loss": 0.6861660480499268, + "step": 1413 + }, + { + "epoch": 0.49832599118942733, + "grad_norm": 1.2217791382902905, + "learning_rate": 1.791339118340886e-05, + "loss": 0.7064980268478394, + "step": 1414 + }, + { + "epoch": 0.4986784140969163, + "grad_norm": 1.4370359000865323, + "learning_rate": 1.7909827713321214e-05, + "loss": 0.6102496981620789, + "step": 1415 + }, + { + "epoch": 0.49903083700440526, + "grad_norm": 1.446734818664789, + "learning_rate": 1.790626155809323e-05, + "loss": 0.7460618019104004, + "step": 1416 + }, + { + "epoch": 0.49938325991189425, + "grad_norm": 1.2988677548719765, + "learning_rate": 1.7902692718935496e-05, + "loss": 0.7124448418617249, + "step": 1417 + }, + { + "epoch": 0.49973568281938324, + "grad_norm": 1.322744101240627, + "learning_rate": 1.7899121197059525e-05, + "loss": 0.7194923162460327, + "step": 1418 + }, + { + "epoch": 0.5000881057268722, + "grad_norm": 1.4429377947794157, + "learning_rate": 1.7895546993677736e-05, + "loss": 0.6633901596069336, + "step": 1419 + }, + { + "epoch": 0.5004405286343613, + "grad_norm": 1.5531583469807302, + "learning_rate": 1.7891970110003463e-05, + "loss": 0.8554216623306274, + "step": 1420 + }, + { + "epoch": 0.5007929515418502, + "grad_norm": 1.4541421669927512, + "learning_rate": 1.7888390547250944e-05, + "loss": 0.7259502410888672, + "step": 1421 + }, + { + "epoch": 0.5011453744493392, + "grad_norm": 1.4299229413313208, + "learning_rate": 1.788480830663533e-05, + "loss": 0.7330816984176636, + "step": 1422 + }, + { + "epoch": 0.5014977973568282, + "grad_norm": 1.5727227347094554, + "learning_rate": 1.7881223389372678e-05, + "loss": 0.7793391346931458, + "step": 1423 + }, + { + "epoch": 0.5018502202643171, + "grad_norm": 1.5101282054621992, + "learning_rate": 1.787763579667996e-05, + "loss": 0.7387483716011047, + "step": 1424 + }, + { + "epoch": 0.5022026431718062, + "grad_norm": 1.2875272836020812, + "learning_rate": 1.787404552977505e-05, + "loss": 0.6665850877761841, + "step": 1425 + }, + { + "epoch": 0.5025550660792951, + "grad_norm": 1.6443234538305773, + "learning_rate": 1.7870452589876733e-05, + "loss": 0.7487791180610657, + "step": 1426 + }, + { + "epoch": 0.5029074889867842, + "grad_norm": 1.5494170755115177, + "learning_rate": 1.78668569782047e-05, + "loss": 0.6048247814178467, + "step": 1427 + }, + { + "epoch": 0.5032599118942731, + "grad_norm": 1.2664597501734751, + "learning_rate": 1.786325869597955e-05, + "loss": 0.7196261882781982, + "step": 1428 + }, + { + "epoch": 0.5036123348017622, + "grad_norm": 1.8296774166979555, + "learning_rate": 1.785965774442278e-05, + "loss": 0.6845135688781738, + "step": 1429 + }, + { + "epoch": 0.5039647577092511, + "grad_norm": 1.4157663102240734, + "learning_rate": 1.785605412475681e-05, + "loss": 0.7314398288726807, + "step": 1430 + }, + { + "epoch": 0.5043171806167401, + "grad_norm": 1.4666969447710358, + "learning_rate": 1.7852447838204957e-05, + "loss": 0.7171268463134766, + "step": 1431 + }, + { + "epoch": 0.5046696035242291, + "grad_norm": 1.33657009662446, + "learning_rate": 1.784883888599144e-05, + "loss": 0.8349916338920593, + "step": 1432 + }, + { + "epoch": 0.505022026431718, + "grad_norm": 1.3501942895276628, + "learning_rate": 1.7845227269341387e-05, + "loss": 0.6375530958175659, + "step": 1433 + }, + { + "epoch": 0.505374449339207, + "grad_norm": 1.42707048545369, + "learning_rate": 1.7841612989480824e-05, + "loss": 0.8156824707984924, + "step": 1434 + }, + { + "epoch": 0.505726872246696, + "grad_norm": 1.4408580248696123, + "learning_rate": 1.7837996047636696e-05, + "loss": 0.7186283469200134, + "step": 1435 + }, + { + "epoch": 0.506079295154185, + "grad_norm": 1.3439268630529597, + "learning_rate": 1.7834376445036834e-05, + "loss": 0.6130756139755249, + "step": 1436 + }, + { + "epoch": 0.506431718061674, + "grad_norm": 1.5285659623162418, + "learning_rate": 1.7830754182909985e-05, + "loss": 0.6948508024215698, + "step": 1437 + }, + { + "epoch": 0.506784140969163, + "grad_norm": 1.3759453423428971, + "learning_rate": 1.7827129262485793e-05, + "loss": 0.7049688100814819, + "step": 1438 + }, + { + "epoch": 0.507136563876652, + "grad_norm": 1.457151343686531, + "learning_rate": 1.7823501684994805e-05, + "loss": 0.7491527795791626, + "step": 1439 + }, + { + "epoch": 0.507488986784141, + "grad_norm": 1.6101324796455516, + "learning_rate": 1.781987145166847e-05, + "loss": 0.8718780279159546, + "step": 1440 + }, + { + "epoch": 0.5078414096916299, + "grad_norm": 1.2572878912363772, + "learning_rate": 1.7816238563739144e-05, + "loss": 0.5675592422485352, + "step": 1441 + }, + { + "epoch": 0.508193832599119, + "grad_norm": 1.4044509323540495, + "learning_rate": 1.7812603022440076e-05, + "loss": 0.7472085952758789, + "step": 1442 + }, + { + "epoch": 0.5085462555066079, + "grad_norm": 1.3371129648202849, + "learning_rate": 1.7808964829005416e-05, + "loss": 0.7673810720443726, + "step": 1443 + }, + { + "epoch": 0.5088986784140969, + "grad_norm": 1.5506550607349072, + "learning_rate": 1.7805323984670224e-05, + "loss": 0.8245630264282227, + "step": 1444 + }, + { + "epoch": 0.5092511013215859, + "grad_norm": 1.488734758513416, + "learning_rate": 1.780168049067045e-05, + "loss": 0.8578429222106934, + "step": 1445 + }, + { + "epoch": 0.5096035242290748, + "grad_norm": 1.3892444083620181, + "learning_rate": 1.7798034348242944e-05, + "loss": 0.6631708145141602, + "step": 1446 + }, + { + "epoch": 0.5099559471365639, + "grad_norm": 1.3121030116229568, + "learning_rate": 1.779438555862546e-05, + "loss": 0.8106615543365479, + "step": 1447 + }, + { + "epoch": 0.5103083700440528, + "grad_norm": 1.1486822439059632, + "learning_rate": 1.7790734123056654e-05, + "loss": 0.7033256888389587, + "step": 1448 + }, + { + "epoch": 0.5106607929515419, + "grad_norm": 1.2259259255559172, + "learning_rate": 1.7787080042776065e-05, + "loss": 0.7124278545379639, + "step": 1449 + }, + { + "epoch": 0.5110132158590308, + "grad_norm": 1.4546377837760451, + "learning_rate": 1.7783423319024144e-05, + "loss": 0.7834827899932861, + "step": 1450 + }, + { + "epoch": 0.5113656387665199, + "grad_norm": 1.4580618513432573, + "learning_rate": 1.777976395304224e-05, + "loss": 0.6762892603874207, + "step": 1451 + }, + { + "epoch": 0.5117180616740088, + "grad_norm": 1.4220157860300873, + "learning_rate": 1.7776101946072586e-05, + "loss": 0.7317261695861816, + "step": 1452 + }, + { + "epoch": 0.5120704845814978, + "grad_norm": 1.3265767127223091, + "learning_rate": 1.7772437299358324e-05, + "loss": 0.6278417110443115, + "step": 1453 + }, + { + "epoch": 0.5124229074889868, + "grad_norm": 1.319373459720871, + "learning_rate": 1.7768770014143485e-05, + "loss": 0.6638025045394897, + "step": 1454 + }, + { + "epoch": 0.5127753303964758, + "grad_norm": 1.400198364176684, + "learning_rate": 1.7765100091673e-05, + "loss": 0.786564826965332, + "step": 1455 + }, + { + "epoch": 0.5131277533039648, + "grad_norm": 1.4579007616104753, + "learning_rate": 1.776142753319269e-05, + "loss": 0.7483570575714111, + "step": 1456 + }, + { + "epoch": 0.5134801762114537, + "grad_norm": 1.5755517235246568, + "learning_rate": 1.7757752339949284e-05, + "loss": 0.7036221027374268, + "step": 1457 + }, + { + "epoch": 0.5138325991189427, + "grad_norm": 1.4840022330643747, + "learning_rate": 1.7754074513190384e-05, + "loss": 0.6903718709945679, + "step": 1458 + }, + { + "epoch": 0.5141850220264317, + "grad_norm": 1.2882226376562813, + "learning_rate": 1.77503940541645e-05, + "loss": 0.7728221416473389, + "step": 1459 + }, + { + "epoch": 0.5145374449339207, + "grad_norm": 1.327669814898394, + "learning_rate": 1.774671096412104e-05, + "loss": 0.7127183675765991, + "step": 1460 + }, + { + "epoch": 0.5148898678414097, + "grad_norm": 1.6330052955229915, + "learning_rate": 1.7743025244310293e-05, + "loss": 0.7801295518875122, + "step": 1461 + }, + { + "epoch": 0.5152422907488987, + "grad_norm": 1.1623220195345323, + "learning_rate": 1.773933689598345e-05, + "loss": 0.632892906665802, + "step": 1462 + }, + { + "epoch": 0.5155947136563876, + "grad_norm": 1.2497961025206838, + "learning_rate": 1.7735645920392587e-05, + "loss": 0.7347458600997925, + "step": 1463 + }, + { + "epoch": 0.5159471365638767, + "grad_norm": 1.5115996209276181, + "learning_rate": 1.7731952318790673e-05, + "loss": 0.6705365777015686, + "step": 1464 + }, + { + "epoch": 0.5162995594713656, + "grad_norm": 1.4475904564128834, + "learning_rate": 1.7728256092431577e-05, + "loss": 0.696006715297699, + "step": 1465 + }, + { + "epoch": 0.5166519823788547, + "grad_norm": 1.3978951424570836, + "learning_rate": 1.7724557242570045e-05, + "loss": 0.5922254323959351, + "step": 1466 + }, + { + "epoch": 0.5170044052863436, + "grad_norm": 1.2709448074189098, + "learning_rate": 1.7720855770461733e-05, + "loss": 0.6162985563278198, + "step": 1467 + }, + { + "epoch": 0.5173568281938326, + "grad_norm": 1.432801158502027, + "learning_rate": 1.7717151677363164e-05, + "loss": 0.7319275140762329, + "step": 1468 + }, + { + "epoch": 0.5177092511013216, + "grad_norm": 1.3993642551309304, + "learning_rate": 1.771344496453177e-05, + "loss": 0.7349969148635864, + "step": 1469 + }, + { + "epoch": 0.5180616740088105, + "grad_norm": 1.287285806622758, + "learning_rate": 1.7709735633225863e-05, + "loss": 0.8153162598609924, + "step": 1470 + }, + { + "epoch": 0.5184140969162996, + "grad_norm": 1.3919169087311665, + "learning_rate": 1.7706023684704642e-05, + "loss": 0.6582974195480347, + "step": 1471 + }, + { + "epoch": 0.5187665198237885, + "grad_norm": 1.704010679983685, + "learning_rate": 1.77023091202282e-05, + "loss": 0.696917712688446, + "step": 1472 + }, + { + "epoch": 0.5191189427312776, + "grad_norm": 1.4310702415713368, + "learning_rate": 1.769859194105752e-05, + "loss": 0.6281285285949707, + "step": 1473 + }, + { + "epoch": 0.5194713656387665, + "grad_norm": 1.6327610148964462, + "learning_rate": 1.7694872148454463e-05, + "loss": 0.7687089443206787, + "step": 1474 + }, + { + "epoch": 0.5198237885462555, + "grad_norm": 1.386403275153257, + "learning_rate": 1.7691149743681783e-05, + "loss": 0.6928491592407227, + "step": 1475 + }, + { + "epoch": 0.5201762114537445, + "grad_norm": 1.3972840676283895, + "learning_rate": 1.7687424728003126e-05, + "loss": 0.63843834400177, + "step": 1476 + }, + { + "epoch": 0.5205286343612335, + "grad_norm": 1.7893361899671325, + "learning_rate": 1.7683697102683012e-05, + "loss": 0.8987904787063599, + "step": 1477 + }, + { + "epoch": 0.5208810572687225, + "grad_norm": 1.3513150269139367, + "learning_rate": 1.767996686898686e-05, + "loss": 0.7027539014816284, + "step": 1478 + }, + { + "epoch": 0.5212334801762114, + "grad_norm": 1.5924485741299983, + "learning_rate": 1.7676234028180964e-05, + "loss": 0.8490183353424072, + "step": 1479 + }, + { + "epoch": 0.5215859030837005, + "grad_norm": 1.251712414046886, + "learning_rate": 1.7672498581532508e-05, + "loss": 0.5885729789733887, + "step": 1480 + }, + { + "epoch": 0.5219383259911894, + "grad_norm": 1.289415742432068, + "learning_rate": 1.766876053030956e-05, + "loss": 0.627627968788147, + "step": 1481 + }, + { + "epoch": 0.5222907488986784, + "grad_norm": 1.2948972408498374, + "learning_rate": 1.766501987578108e-05, + "loss": 0.6441413164138794, + "step": 1482 + }, + { + "epoch": 0.5226431718061674, + "grad_norm": 1.3508329997529829, + "learning_rate": 1.7661276619216888e-05, + "loss": 0.6199722290039062, + "step": 1483 + }, + { + "epoch": 0.5229955947136564, + "grad_norm": 1.2931208995237342, + "learning_rate": 1.7657530761887715e-05, + "loss": 0.6364887952804565, + "step": 1484 + }, + { + "epoch": 0.5233480176211454, + "grad_norm": 1.281527242811407, + "learning_rate": 1.7653782305065158e-05, + "loss": 0.7279890775680542, + "step": 1485 + }, + { + "epoch": 0.5237004405286344, + "grad_norm": 1.5228486275670003, + "learning_rate": 1.7650031250021704e-05, + "loss": 0.6552719473838806, + "step": 1486 + }, + { + "epoch": 0.5240528634361233, + "grad_norm": 1.4461703633182712, + "learning_rate": 1.7646277598030717e-05, + "loss": 0.6778907775878906, + "step": 1487 + }, + { + "epoch": 0.5244052863436124, + "grad_norm": 1.3941119820637071, + "learning_rate": 1.7642521350366447e-05, + "loss": 0.6581870317459106, + "step": 1488 + }, + { + "epoch": 0.5247577092511013, + "grad_norm": 1.6198722329881745, + "learning_rate": 1.7638762508304025e-05, + "loss": 0.8529388904571533, + "step": 1489 + }, + { + "epoch": 0.5251101321585903, + "grad_norm": 1.659639253256808, + "learning_rate": 1.7635001073119458e-05, + "loss": 0.6601512432098389, + "step": 1490 + }, + { + "epoch": 0.5254625550660793, + "grad_norm": 1.5073764890239725, + "learning_rate": 1.7631237046089637e-05, + "loss": 0.6347510814666748, + "step": 1491 + }, + { + "epoch": 0.5258149779735682, + "grad_norm": 1.3256786239827452, + "learning_rate": 1.762747042849233e-05, + "loss": 0.700560986995697, + "step": 1492 + }, + { + "epoch": 0.5261674008810573, + "grad_norm": 1.4060287838972836, + "learning_rate": 1.762370122160619e-05, + "loss": 0.5641219019889832, + "step": 1493 + }, + { + "epoch": 0.5265198237885462, + "grad_norm": 1.3124518756577959, + "learning_rate": 1.761992942671074e-05, + "loss": 0.8017370700836182, + "step": 1494 + }, + { + "epoch": 0.5268722466960353, + "grad_norm": 1.334442798992846, + "learning_rate": 1.7616155045086394e-05, + "loss": 0.6345353126525879, + "step": 1495 + }, + { + "epoch": 0.5272246696035242, + "grad_norm": 1.6841165394853315, + "learning_rate": 1.7612378078014432e-05, + "loss": 0.7118426561355591, + "step": 1496 + }, + { + "epoch": 0.5275770925110133, + "grad_norm": 1.6748084277774182, + "learning_rate": 1.7608598526777017e-05, + "loss": 0.6186550855636597, + "step": 1497 + }, + { + "epoch": 0.5279295154185022, + "grad_norm": 1.4676540893387768, + "learning_rate": 1.7604816392657195e-05, + "loss": 0.8351110219955444, + "step": 1498 + }, + { + "epoch": 0.5282819383259912, + "grad_norm": 1.3183866002309903, + "learning_rate": 1.7601031676938877e-05, + "loss": 0.638684868812561, + "step": 1499 + }, + { + "epoch": 0.5286343612334802, + "grad_norm": 1.291067085285626, + "learning_rate": 1.7597244380906856e-05, + "loss": 0.5118356943130493, + "step": 1500 + }, + { + "epoch": 0.5289867841409691, + "grad_norm": 1.2880504132355877, + "learning_rate": 1.7593454505846807e-05, + "loss": 0.637636125087738, + "step": 1501 + }, + { + "epoch": 0.5293392070484582, + "grad_norm": 1.3905967147162603, + "learning_rate": 1.7589662053045264e-05, + "loss": 0.8412563800811768, + "step": 1502 + }, + { + "epoch": 0.5296916299559471, + "grad_norm": 1.6432072453017084, + "learning_rate": 1.758586702378966e-05, + "loss": 0.7940464019775391, + "step": 1503 + }, + { + "epoch": 0.5300440528634361, + "grad_norm": 1.4898667206132308, + "learning_rate": 1.7582069419368276e-05, + "loss": 0.7136783599853516, + "step": 1504 + }, + { + "epoch": 0.5303964757709251, + "grad_norm": 1.5677232979916986, + "learning_rate": 1.757826924107029e-05, + "loss": 0.7212727069854736, + "step": 1505 + }, + { + "epoch": 0.5307488986784141, + "grad_norm": 2.968905268892082, + "learning_rate": 1.757446649018574e-05, + "loss": 0.7026032209396362, + "step": 1506 + }, + { + "epoch": 0.5311013215859031, + "grad_norm": 1.3050484980835664, + "learning_rate": 1.7570661168005544e-05, + "loss": 0.541954755783081, + "step": 1507 + }, + { + "epoch": 0.5314537444933921, + "grad_norm": 1.3422366313712581, + "learning_rate": 1.7566853275821488e-05, + "loss": 0.6927075386047363, + "step": 1508 + }, + { + "epoch": 0.531806167400881, + "grad_norm": 1.4069640098530838, + "learning_rate": 1.7563042814926237e-05, + "loss": 0.6556441783905029, + "step": 1509 + }, + { + "epoch": 0.5321585903083701, + "grad_norm": 1.710258111864569, + "learning_rate": 1.7559229786613317e-05, + "loss": 0.6895149946212769, + "step": 1510 + }, + { + "epoch": 0.532511013215859, + "grad_norm": 1.3737730722509855, + "learning_rate": 1.755541419217714e-05, + "loss": 0.6178139448165894, + "step": 1511 + }, + { + "epoch": 0.532863436123348, + "grad_norm": 1.5162737493672722, + "learning_rate": 1.7551596032912975e-05, + "loss": 0.7645368576049805, + "step": 1512 + }, + { + "epoch": 0.533215859030837, + "grad_norm": 1.3652252290806937, + "learning_rate": 1.7547775310116973e-05, + "loss": 0.8247367143630981, + "step": 1513 + }, + { + "epoch": 0.533568281938326, + "grad_norm": 1.2941657542151124, + "learning_rate": 1.7543952025086147e-05, + "loss": 0.535837709903717, + "step": 1514 + }, + { + "epoch": 0.533920704845815, + "grad_norm": 1.3232982615818571, + "learning_rate": 1.7540126179118387e-05, + "loss": 0.51450514793396, + "step": 1515 + }, + { + "epoch": 0.5342731277533039, + "grad_norm": 1.3863880461090508, + "learning_rate": 1.7536297773512444e-05, + "loss": 0.7962276935577393, + "step": 1516 + }, + { + "epoch": 0.534625550660793, + "grad_norm": 1.4799750544282257, + "learning_rate": 1.753246680956795e-05, + "loss": 0.7586444616317749, + "step": 1517 + }, + { + "epoch": 0.5349779735682819, + "grad_norm": 1.4967875396536634, + "learning_rate": 1.752863328858539e-05, + "loss": 0.7074990272521973, + "step": 1518 + }, + { + "epoch": 0.535330396475771, + "grad_norm": 1.3158842776684478, + "learning_rate": 1.7524797211866126e-05, + "loss": 0.7409921884536743, + "step": 1519 + }, + { + "epoch": 0.5356828193832599, + "grad_norm": 1.3752676962962187, + "learning_rate": 1.7520958580712394e-05, + "loss": 0.7889251708984375, + "step": 1520 + }, + { + "epoch": 0.536035242290749, + "grad_norm": 2.0871001645404776, + "learning_rate": 1.751711739642728e-05, + "loss": 0.8244975209236145, + "step": 1521 + }, + { + "epoch": 0.5363876651982379, + "grad_norm": 1.441127888748836, + "learning_rate": 1.7513273660314753e-05, + "loss": 0.6573888659477234, + "step": 1522 + }, + { + "epoch": 0.5367400881057268, + "grad_norm": 1.3793459185222714, + "learning_rate": 1.7509427373679643e-05, + "loss": 0.71816086769104, + "step": 1523 + }, + { + "epoch": 0.5370925110132159, + "grad_norm": 1.5200593368820163, + "learning_rate": 1.750557853782764e-05, + "loss": 0.7681000232696533, + "step": 1524 + }, + { + "epoch": 0.5374449339207048, + "grad_norm": 1.443442982592023, + "learning_rate": 1.7501727154065304e-05, + "loss": 0.6777461767196655, + "step": 1525 + }, + { + "epoch": 0.5377973568281939, + "grad_norm": 1.5538840121485165, + "learning_rate": 1.7497873223700063e-05, + "loss": 0.714499831199646, + "step": 1526 + }, + { + "epoch": 0.5381497797356828, + "grad_norm": 1.6085901588908644, + "learning_rate": 1.7494016748040206e-05, + "loss": 0.6587036848068237, + "step": 1527 + }, + { + "epoch": 0.5385022026431718, + "grad_norm": 1.5748960305246453, + "learning_rate": 1.7490157728394887e-05, + "loss": 0.7256105542182922, + "step": 1528 + }, + { + "epoch": 0.5388546255506608, + "grad_norm": 1.7818844853131433, + "learning_rate": 1.7486296166074116e-05, + "loss": 0.6551185846328735, + "step": 1529 + }, + { + "epoch": 0.5392070484581498, + "grad_norm": 1.5961201900224617, + "learning_rate": 1.7482432062388782e-05, + "loss": 0.710479736328125, + "step": 1530 + }, + { + "epoch": 0.5395594713656388, + "grad_norm": 1.3063302832130508, + "learning_rate": 1.7478565418650625e-05, + "loss": 0.7882527709007263, + "step": 1531 + }, + { + "epoch": 0.5399118942731278, + "grad_norm": 1.4227949027781848, + "learning_rate": 1.7474696236172247e-05, + "loss": 0.6163671612739563, + "step": 1532 + }, + { + "epoch": 0.5402643171806167, + "grad_norm": 1.3516530648193832, + "learning_rate": 1.7470824516267125e-05, + "loss": 0.6923140287399292, + "step": 1533 + }, + { + "epoch": 0.5406167400881057, + "grad_norm": 1.259724627030556, + "learning_rate": 1.7466950260249573e-05, + "loss": 0.6473938226699829, + "step": 1534 + }, + { + "epoch": 0.5409691629955947, + "grad_norm": 1.7187178399062975, + "learning_rate": 1.7463073469434792e-05, + "loss": 0.631247878074646, + "step": 1535 + }, + { + "epoch": 0.5413215859030837, + "grad_norm": 1.8932273669088504, + "learning_rate": 1.745919414513883e-05, + "loss": 0.8113377690315247, + "step": 1536 + }, + { + "epoch": 0.5416740088105727, + "grad_norm": 1.4958838672098407, + "learning_rate": 1.7455312288678588e-05, + "loss": 0.7950010299682617, + "step": 1537 + }, + { + "epoch": 0.5420264317180616, + "grad_norm": 1.5066443226404773, + "learning_rate": 1.7451427901371843e-05, + "loss": 0.7279125452041626, + "step": 1538 + }, + { + "epoch": 0.5423788546255507, + "grad_norm": 1.304013044677209, + "learning_rate": 1.7447540984537225e-05, + "loss": 0.6746084690093994, + "step": 1539 + }, + { + "epoch": 0.5427312775330396, + "grad_norm": 1.2714028280363416, + "learning_rate": 1.744365153949422e-05, + "loss": 0.5818569660186768, + "step": 1540 + }, + { + "epoch": 0.5430837004405287, + "grad_norm": 1.3192138998270364, + "learning_rate": 1.743975956756317e-05, + "loss": 0.7408111095428467, + "step": 1541 + }, + { + "epoch": 0.5434361233480176, + "grad_norm": 1.4913068245941434, + "learning_rate": 1.7435865070065282e-05, + "loss": 0.6842402815818787, + "step": 1542 + }, + { + "epoch": 0.5437885462555067, + "grad_norm": 1.3316441616353596, + "learning_rate": 1.7431968048322617e-05, + "loss": 0.6179615259170532, + "step": 1543 + }, + { + "epoch": 0.5441409691629956, + "grad_norm": 1.3347896582759051, + "learning_rate": 1.7428068503658085e-05, + "loss": 0.5943140983581543, + "step": 1544 + }, + { + "epoch": 0.5444933920704845, + "grad_norm": 1.797026236227536, + "learning_rate": 1.742416643739547e-05, + "loss": 0.7901419401168823, + "step": 1545 + }, + { + "epoch": 0.5448458149779736, + "grad_norm": 1.4636864955433957, + "learning_rate": 1.74202618508594e-05, + "loss": 0.7630521655082703, + "step": 1546 + }, + { + "epoch": 0.5451982378854625, + "grad_norm": 1.5322711938826543, + "learning_rate": 1.7416354745375355e-05, + "loss": 0.7662566900253296, + "step": 1547 + }, + { + "epoch": 0.5455506607929516, + "grad_norm": 1.52556111103991, + "learning_rate": 1.7412445122269683e-05, + "loss": 0.5758601427078247, + "step": 1548 + }, + { + "epoch": 0.5459030837004405, + "grad_norm": 1.3681670353760105, + "learning_rate": 1.7408532982869573e-05, + "loss": 0.753425121307373, + "step": 1549 + }, + { + "epoch": 0.5462555066079295, + "grad_norm": 1.7147625296386437, + "learning_rate": 1.7404618328503082e-05, + "loss": 0.6954981088638306, + "step": 1550 + }, + { + "epoch": 0.5466079295154185, + "grad_norm": 1.7209819459128521, + "learning_rate": 1.7400701160499107e-05, + "loss": 0.7608321905136108, + "step": 1551 + }, + { + "epoch": 0.5469603524229075, + "grad_norm": 1.3289181862839086, + "learning_rate": 1.7396781480187403e-05, + "loss": 0.679731011390686, + "step": 1552 + }, + { + "epoch": 0.5473127753303965, + "grad_norm": 1.547015119464835, + "learning_rate": 1.7392859288898586e-05, + "loss": 0.7101309299468994, + "step": 1553 + }, + { + "epoch": 0.5476651982378855, + "grad_norm": 1.3483315531721025, + "learning_rate": 1.7388934587964114e-05, + "loss": 0.7975757122039795, + "step": 1554 + }, + { + "epoch": 0.5480176211453744, + "grad_norm": 1.612241763199232, + "learning_rate": 1.73850073787163e-05, + "loss": 0.9132372140884399, + "step": 1555 + }, + { + "epoch": 0.5483700440528634, + "grad_norm": 1.2910456692590726, + "learning_rate": 1.7381077662488313e-05, + "loss": 0.7375202178955078, + "step": 1556 + }, + { + "epoch": 0.5487224669603524, + "grad_norm": 1.387618503105513, + "learning_rate": 1.7377145440614165e-05, + "loss": 0.7066434025764465, + "step": 1557 + }, + { + "epoch": 0.5490748898678414, + "grad_norm": 1.3715154650071018, + "learning_rate": 1.737321071442873e-05, + "loss": 0.8217945098876953, + "step": 1558 + }, + { + "epoch": 0.5494273127753304, + "grad_norm": 1.629646959297134, + "learning_rate": 1.7369273485267716e-05, + "loss": 0.6946529150009155, + "step": 1559 + }, + { + "epoch": 0.5497797356828193, + "grad_norm": 1.525535566210846, + "learning_rate": 1.7365333754467694e-05, + "loss": 0.7671442627906799, + "step": 1560 + }, + { + "epoch": 0.5501321585903084, + "grad_norm": 1.3988607690634036, + "learning_rate": 1.736139152336608e-05, + "loss": 0.7044692039489746, + "step": 1561 + }, + { + "epoch": 0.5504845814977973, + "grad_norm": 1.368099127753493, + "learning_rate": 1.735744679330114e-05, + "loss": 0.6654937267303467, + "step": 1562 + }, + { + "epoch": 0.5508370044052864, + "grad_norm": 1.2951614076743367, + "learning_rate": 1.7353499565611986e-05, + "loss": 0.6683400869369507, + "step": 1563 + }, + { + "epoch": 0.5511894273127753, + "grad_norm": 1.236687311626723, + "learning_rate": 1.734954984163858e-05, + "loss": 0.6360758543014526, + "step": 1564 + }, + { + "epoch": 0.5515418502202644, + "grad_norm": 1.3363599776517268, + "learning_rate": 1.7345597622721727e-05, + "loss": 0.6982365846633911, + "step": 1565 + }, + { + "epoch": 0.5518942731277533, + "grad_norm": 1.559537817461735, + "learning_rate": 1.7341642910203087e-05, + "loss": 0.8011882305145264, + "step": 1566 + }, + { + "epoch": 0.5522466960352423, + "grad_norm": 2.285492930360211, + "learning_rate": 1.7337685705425156e-05, + "loss": 0.8203347325325012, + "step": 1567 + }, + { + "epoch": 0.5525991189427313, + "grad_norm": 1.2535380811715755, + "learning_rate": 1.7333726009731288e-05, + "loss": 0.653145432472229, + "step": 1568 + }, + { + "epoch": 0.5529515418502202, + "grad_norm": 1.4482870728586805, + "learning_rate": 1.7329763824465676e-05, + "loss": 0.6527417302131653, + "step": 1569 + }, + { + "epoch": 0.5533039647577093, + "grad_norm": 1.44377376134513, + "learning_rate": 1.7325799150973353e-05, + "loss": 0.6965492963790894, + "step": 1570 + }, + { + "epoch": 0.5536563876651982, + "grad_norm": 1.64534067222521, + "learning_rate": 1.7321831990600206e-05, + "loss": 0.6779811382293701, + "step": 1571 + }, + { + "epoch": 0.5540088105726872, + "grad_norm": 1.4062562776851213, + "learning_rate": 1.731786234469296e-05, + "loss": 0.733130931854248, + "step": 1572 + }, + { + "epoch": 0.5543612334801762, + "grad_norm": 1.3711228848627866, + "learning_rate": 1.7313890214599195e-05, + "loss": 0.6482118964195251, + "step": 1573 + }, + { + "epoch": 0.5547136563876652, + "grad_norm": 1.300564860417972, + "learning_rate": 1.7309915601667312e-05, + "loss": 0.5167185068130493, + "step": 1574 + }, + { + "epoch": 0.5550660792951542, + "grad_norm": 1.5636119075248611, + "learning_rate": 1.730593850724658e-05, + "loss": 0.7172712087631226, + "step": 1575 + }, + { + "epoch": 0.5554185022026432, + "grad_norm": 1.3031139662778384, + "learning_rate": 1.730195893268709e-05, + "loss": 0.6786075830459595, + "step": 1576 + }, + { + "epoch": 0.5557709251101322, + "grad_norm": 1.272621756820605, + "learning_rate": 1.7297976879339787e-05, + "loss": 0.6823022365570068, + "step": 1577 + }, + { + "epoch": 0.5561233480176212, + "grad_norm": 1.5517603954080275, + "learning_rate": 1.7293992348556462e-05, + "loss": 0.6882521510124207, + "step": 1578 + }, + { + "epoch": 0.5564757709251101, + "grad_norm": 1.3633741314626187, + "learning_rate": 1.7290005341689726e-05, + "loss": 0.6028990745544434, + "step": 1579 + }, + { + "epoch": 0.5568281938325991, + "grad_norm": 1.6493192591020644, + "learning_rate": 1.728601586009305e-05, + "loss": 0.7759981155395508, + "step": 1580 + }, + { + "epoch": 0.5571806167400881, + "grad_norm": 1.4356044858306343, + "learning_rate": 1.7282023905120743e-05, + "loss": 0.7067322134971619, + "step": 1581 + }, + { + "epoch": 0.5575330396475771, + "grad_norm": 1.6158791701222606, + "learning_rate": 1.727802947812794e-05, + "loss": 0.7972309589385986, + "step": 1582 + }, + { + "epoch": 0.5578854625550661, + "grad_norm": 1.7662811513100274, + "learning_rate": 1.7274032580470634e-05, + "loss": 0.780463457107544, + "step": 1583 + }, + { + "epoch": 0.558237885462555, + "grad_norm": 1.4053617141185595, + "learning_rate": 1.7270033213505638e-05, + "loss": 0.647217869758606, + "step": 1584 + }, + { + "epoch": 0.5585903083700441, + "grad_norm": 1.3125952525291176, + "learning_rate": 1.7266031378590624e-05, + "loss": 0.6253752112388611, + "step": 1585 + }, + { + "epoch": 0.558942731277533, + "grad_norm": 11.7060219187992, + "learning_rate": 1.7262027077084083e-05, + "loss": 0.8427211046218872, + "step": 1586 + }, + { + "epoch": 0.5592951541850221, + "grad_norm": 1.344046568539196, + "learning_rate": 1.7258020310345348e-05, + "loss": 0.6763455867767334, + "step": 1587 + }, + { + "epoch": 0.559647577092511, + "grad_norm": 1.593422172771999, + "learning_rate": 1.72540110797346e-05, + "loss": 0.7333850264549255, + "step": 1588 + }, + { + "epoch": 0.56, + "grad_norm": 1.6040079500892586, + "learning_rate": 1.7249999386612844e-05, + "loss": 0.8572328090667725, + "step": 1589 + }, + { + "epoch": 0.560352422907489, + "grad_norm": 1.5035390542036942, + "learning_rate": 1.7245985232341923e-05, + "loss": 0.7960183620452881, + "step": 1590 + }, + { + "epoch": 0.5607048458149779, + "grad_norm": 1.484772075429922, + "learning_rate": 1.7241968618284518e-05, + "loss": 0.6750795841217041, + "step": 1591 + }, + { + "epoch": 0.561057268722467, + "grad_norm": 1.9239116239416003, + "learning_rate": 1.7237949545804145e-05, + "loss": 0.7828525304794312, + "step": 1592 + }, + { + "epoch": 0.5614096916299559, + "grad_norm": 1.4415449299886975, + "learning_rate": 1.7233928016265158e-05, + "loss": 0.7414604425430298, + "step": 1593 + }, + { + "epoch": 0.561762114537445, + "grad_norm": 1.4483242479736562, + "learning_rate": 1.7229904031032736e-05, + "loss": 0.6853663921356201, + "step": 1594 + }, + { + "epoch": 0.5621145374449339, + "grad_norm": 1.9067676423331832, + "learning_rate": 1.72258775914729e-05, + "loss": 0.7923493385314941, + "step": 1595 + }, + { + "epoch": 0.5624669603524229, + "grad_norm": 1.6239202976244251, + "learning_rate": 1.7221848698952496e-05, + "loss": 0.6776527166366577, + "step": 1596 + }, + { + "epoch": 0.5628193832599119, + "grad_norm": 1.4721879083766742, + "learning_rate": 1.721781735483921e-05, + "loss": 0.6036615371704102, + "step": 1597 + }, + { + "epoch": 0.5631718061674009, + "grad_norm": 1.271294238053108, + "learning_rate": 1.7213783560501564e-05, + "loss": 0.7175784111022949, + "step": 1598 + }, + { + "epoch": 0.5635242290748899, + "grad_norm": 1.609537856897954, + "learning_rate": 1.7209747317308897e-05, + "loss": 0.790808379650116, + "step": 1599 + }, + { + "epoch": 0.5638766519823789, + "grad_norm": 1.211639696248482, + "learning_rate": 1.7205708626631392e-05, + "loss": 0.6230301856994629, + "step": 1600 + }, + { + "epoch": 0.5642290748898678, + "grad_norm": 1.120326299832536, + "learning_rate": 1.720166748984006e-05, + "loss": 0.712124228477478, + "step": 1601 + }, + { + "epoch": 0.5645814977973568, + "grad_norm": 1.1185092917911836, + "learning_rate": 1.719762390830674e-05, + "loss": 0.543883740901947, + "step": 1602 + }, + { + "epoch": 0.5649339207048458, + "grad_norm": 1.3866183721479424, + "learning_rate": 1.71935778834041e-05, + "loss": 0.7619644999504089, + "step": 1603 + }, + { + "epoch": 0.5652863436123348, + "grad_norm": 1.3869247346305908, + "learning_rate": 1.718952941650564e-05, + "loss": 0.6447019577026367, + "step": 1604 + }, + { + "epoch": 0.5656387665198238, + "grad_norm": 1.4175373147115695, + "learning_rate": 1.718547850898569e-05, + "loss": 0.7254266738891602, + "step": 1605 + }, + { + "epoch": 0.5659911894273127, + "grad_norm": 1.3621762521360266, + "learning_rate": 1.7181425162219406e-05, + "loss": 0.632878839969635, + "step": 1606 + }, + { + "epoch": 0.5663436123348018, + "grad_norm": 1.3921274088807207, + "learning_rate": 1.7177369377582776e-05, + "loss": 0.7711806893348694, + "step": 1607 + }, + { + "epoch": 0.5666960352422907, + "grad_norm": 1.1613347832568823, + "learning_rate": 1.7173311156452607e-05, + "loss": 0.6639282703399658, + "step": 1608 + }, + { + "epoch": 0.5670484581497798, + "grad_norm": 1.4423463303361395, + "learning_rate": 1.7169250500206544e-05, + "loss": 0.6918407082557678, + "step": 1609 + }, + { + "epoch": 0.5674008810572687, + "grad_norm": 2.283192950596924, + "learning_rate": 1.716518741022305e-05, + "loss": 0.6602861881256104, + "step": 1610 + }, + { + "epoch": 0.5677533039647578, + "grad_norm": 1.401616641880741, + "learning_rate": 1.7161121887881424e-05, + "loss": 0.5853942632675171, + "step": 1611 + }, + { + "epoch": 0.5681057268722467, + "grad_norm": 1.4206445071697613, + "learning_rate": 1.7157053934561775e-05, + "loss": 0.6793895959854126, + "step": 1612 + }, + { + "epoch": 0.5684581497797356, + "grad_norm": 1.43055320760408, + "learning_rate": 1.7152983551645054e-05, + "loss": 0.7882634401321411, + "step": 1613 + }, + { + "epoch": 0.5688105726872247, + "grad_norm": 1.4602086959676452, + "learning_rate": 1.7148910740513023e-05, + "loss": 0.6530553698539734, + "step": 1614 + }, + { + "epoch": 0.5691629955947136, + "grad_norm": 1.2905537135464573, + "learning_rate": 1.714483550254828e-05, + "loss": 0.6405597925186157, + "step": 1615 + }, + { + "epoch": 0.5695154185022027, + "grad_norm": 1.4236330365126968, + "learning_rate": 1.714075783913424e-05, + "loss": 0.7356796860694885, + "step": 1616 + }, + { + "epoch": 0.5698678414096916, + "grad_norm": 1.3877607090316109, + "learning_rate": 1.7136677751655142e-05, + "loss": 0.7393465042114258, + "step": 1617 + }, + { + "epoch": 0.5702202643171806, + "grad_norm": 1.6092126006316967, + "learning_rate": 1.7132595241496045e-05, + "loss": 0.7205296158790588, + "step": 1618 + }, + { + "epoch": 0.5705726872246696, + "grad_norm": 1.291376266983401, + "learning_rate": 1.7128510310042842e-05, + "loss": 0.7359808683395386, + "step": 1619 + }, + { + "epoch": 0.5709251101321586, + "grad_norm": 1.3759135749970453, + "learning_rate": 1.712442295868224e-05, + "loss": 0.7097065448760986, + "step": 1620 + }, + { + "epoch": 0.5712775330396476, + "grad_norm": 1.3905917375530226, + "learning_rate": 1.7120333188801756e-05, + "loss": 0.66839599609375, + "step": 1621 + }, + { + "epoch": 0.5716299559471366, + "grad_norm": 1.7035593754714837, + "learning_rate": 1.7116241001789753e-05, + "loss": 0.8373857736587524, + "step": 1622 + }, + { + "epoch": 0.5719823788546256, + "grad_norm": 1.4514044348034505, + "learning_rate": 1.7112146399035393e-05, + "loss": 0.6405144333839417, + "step": 1623 + }, + { + "epoch": 0.5723348017621145, + "grad_norm": 1.3537498495813336, + "learning_rate": 1.710804938192867e-05, + "loss": 0.622218906879425, + "step": 1624 + }, + { + "epoch": 0.5726872246696035, + "grad_norm": 1.3235233015291856, + "learning_rate": 1.710394995186039e-05, + "loss": 0.6728596687316895, + "step": 1625 + }, + { + "epoch": 0.5730396475770925, + "grad_norm": 1.457353775792826, + "learning_rate": 1.7099848110222188e-05, + "loss": 0.7749369144439697, + "step": 1626 + }, + { + "epoch": 0.5733920704845815, + "grad_norm": 1.5414707611626788, + "learning_rate": 1.7095743858406506e-05, + "loss": 0.7230759859085083, + "step": 1627 + }, + { + "epoch": 0.5737444933920705, + "grad_norm": 1.540981219180448, + "learning_rate": 1.7091637197806614e-05, + "loss": 0.8243547677993774, + "step": 1628 + }, + { + "epoch": 0.5740969162995595, + "grad_norm": 1.38043003521811, + "learning_rate": 1.708752812981659e-05, + "loss": 0.5860315561294556, + "step": 1629 + }, + { + "epoch": 0.5744493392070484, + "grad_norm": 1.6273061636094053, + "learning_rate": 1.708341665583134e-05, + "loss": 0.6623368859291077, + "step": 1630 + }, + { + "epoch": 0.5748017621145375, + "grad_norm": 1.8119651381751527, + "learning_rate": 1.7079302777246577e-05, + "loss": 0.6467370986938477, + "step": 1631 + }, + { + "epoch": 0.5751541850220264, + "grad_norm": 1.5119118761679917, + "learning_rate": 1.707518649545884e-05, + "loss": 0.6443271636962891, + "step": 1632 + }, + { + "epoch": 0.5755066079295155, + "grad_norm": 1.3128080413830525, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.6995208263397217, + "step": 1633 + }, + { + "epoch": 0.5758590308370044, + "grad_norm": 1.4660315838841709, + "learning_rate": 1.706694672786465e-05, + "loss": 0.698627233505249, + "step": 1634 + }, + { + "epoch": 0.5762114537444933, + "grad_norm": 1.3788458614759633, + "learning_rate": 1.706282324485534e-05, + "loss": 0.713565468788147, + "step": 1635 + }, + { + "epoch": 0.5765638766519824, + "grad_norm": 1.4050651409728825, + "learning_rate": 1.7058697364237342e-05, + "loss": 0.7978894710540771, + "step": 1636 + }, + { + "epoch": 0.5769162995594713, + "grad_norm": 1.374012134646938, + "learning_rate": 1.7054569087411262e-05, + "loss": 0.7361177206039429, + "step": 1637 + }, + { + "epoch": 0.5772687224669604, + "grad_norm": 1.3640656150089683, + "learning_rate": 1.705043841577853e-05, + "loss": 0.5904364585876465, + "step": 1638 + }, + { + "epoch": 0.5776211453744493, + "grad_norm": 1.4706525609098695, + "learning_rate": 1.7046305350741365e-05, + "loss": 0.7122133374214172, + "step": 1639 + }, + { + "epoch": 0.5779735682819384, + "grad_norm": 1.5208627357939872, + "learning_rate": 1.7042169893702826e-05, + "loss": 0.6350806951522827, + "step": 1640 + }, + { + "epoch": 0.5783259911894273, + "grad_norm": 1.4511692718944456, + "learning_rate": 1.7038032046066767e-05, + "loss": 0.6332669258117676, + "step": 1641 + }, + { + "epoch": 0.5786784140969163, + "grad_norm": 1.415207402865657, + "learning_rate": 1.7033891809237865e-05, + "loss": 0.6645903587341309, + "step": 1642 + }, + { + "epoch": 0.5790308370044053, + "grad_norm": 1.6697269215763402, + "learning_rate": 1.7029749184621593e-05, + "loss": 0.8156411051750183, + "step": 1643 + }, + { + "epoch": 0.5793832599118943, + "grad_norm": 1.3789808786486863, + "learning_rate": 1.7025604173624247e-05, + "loss": 0.6778720617294312, + "step": 1644 + }, + { + "epoch": 0.5797356828193833, + "grad_norm": 1.5882994058774447, + "learning_rate": 1.702145677765293e-05, + "loss": 0.6774875521659851, + "step": 1645 + }, + { + "epoch": 0.5800881057268722, + "grad_norm": 1.7790432286964633, + "learning_rate": 1.701730699811555e-05, + "loss": 0.9239652156829834, + "step": 1646 + }, + { + "epoch": 0.5804405286343612, + "grad_norm": 1.3647594896468807, + "learning_rate": 1.701315483642083e-05, + "loss": 0.6841437816619873, + "step": 1647 + }, + { + "epoch": 0.5807929515418502, + "grad_norm": 1.7199469103031315, + "learning_rate": 1.7009000293978308e-05, + "loss": 0.7540775537490845, + "step": 1648 + }, + { + "epoch": 0.5811453744493392, + "grad_norm": 1.0742597088843755, + "learning_rate": 1.7004843372198306e-05, + "loss": 0.5534735321998596, + "step": 1649 + }, + { + "epoch": 0.5814977973568282, + "grad_norm": 1.326312979627632, + "learning_rate": 1.7000684072491984e-05, + "loss": 0.5398745536804199, + "step": 1650 + }, + { + "epoch": 0.5818502202643172, + "grad_norm": 1.583833147288038, + "learning_rate": 1.6996522396271285e-05, + "loss": 0.7249305248260498, + "step": 1651 + }, + { + "epoch": 0.5822026431718061, + "grad_norm": 2.3893378173132973, + "learning_rate": 1.6992358344948976e-05, + "loss": 0.819263219833374, + "step": 1652 + }, + { + "epoch": 0.5825550660792952, + "grad_norm": 1.4489156713328724, + "learning_rate": 1.6988191919938618e-05, + "loss": 0.7421448826789856, + "step": 1653 + }, + { + "epoch": 0.5829074889867841, + "grad_norm": 1.832209725536692, + "learning_rate": 1.6984023122654584e-05, + "loss": 0.7665672302246094, + "step": 1654 + }, + { + "epoch": 0.5832599118942732, + "grad_norm": 1.390589552129084, + "learning_rate": 1.697985195451205e-05, + "loss": 0.7226558327674866, + "step": 1655 + }, + { + "epoch": 0.5836123348017621, + "grad_norm": 1.5091001050977364, + "learning_rate": 1.6975678416926995e-05, + "loss": 0.6702080965042114, + "step": 1656 + }, + { + "epoch": 0.583964757709251, + "grad_norm": 1.460442381139403, + "learning_rate": 1.697150251131621e-05, + "loss": 0.5843878984451294, + "step": 1657 + }, + { + "epoch": 0.5843171806167401, + "grad_norm": 1.37517469234843, + "learning_rate": 1.6967324239097287e-05, + "loss": 0.707448422908783, + "step": 1658 + }, + { + "epoch": 0.584669603524229, + "grad_norm": 1.8436282149841139, + "learning_rate": 1.6963143601688615e-05, + "loss": 0.7619093060493469, + "step": 1659 + }, + { + "epoch": 0.5850220264317181, + "grad_norm": 1.5399166464925174, + "learning_rate": 1.695896060050939e-05, + "loss": 0.6550310850143433, + "step": 1660 + }, + { + "epoch": 0.585374449339207, + "grad_norm": 1.6689625417691945, + "learning_rate": 1.6954775236979616e-05, + "loss": 0.7202504277229309, + "step": 1661 + }, + { + "epoch": 0.5857268722466961, + "grad_norm": 1.4936106294591966, + "learning_rate": 1.6950587512520085e-05, + "loss": 0.7941907644271851, + "step": 1662 + }, + { + "epoch": 0.586079295154185, + "grad_norm": 1.3939181305394832, + "learning_rate": 1.6946397428552406e-05, + "loss": 0.6349755525588989, + "step": 1663 + }, + { + "epoch": 0.586431718061674, + "grad_norm": 1.4663377684980818, + "learning_rate": 1.6942204986498978e-05, + "loss": 0.6220123171806335, + "step": 1664 + }, + { + "epoch": 0.586784140969163, + "grad_norm": 1.3729457618271874, + "learning_rate": 1.693801018778301e-05, + "loss": 0.6617282629013062, + "step": 1665 + }, + { + "epoch": 0.587136563876652, + "grad_norm": 1.6745607368825612, + "learning_rate": 1.6933813033828496e-05, + "loss": 0.7424415349960327, + "step": 1666 + }, + { + "epoch": 0.587488986784141, + "grad_norm": 1.4332695932293307, + "learning_rate": 1.6929613526060254e-05, + "loss": 0.7245291471481323, + "step": 1667 + }, + { + "epoch": 0.5878414096916299, + "grad_norm": 1.7631957554533126, + "learning_rate": 1.692541166590387e-05, + "loss": 0.7037352323532104, + "step": 1668 + }, + { + "epoch": 0.588193832599119, + "grad_norm": 1.563153866597813, + "learning_rate": 1.6921207454785754e-05, + "loss": 0.7452583312988281, + "step": 1669 + }, + { + "epoch": 0.5885462555066079, + "grad_norm": 1.8223456889525438, + "learning_rate": 1.6917000894133106e-05, + "loss": 0.7773720026016235, + "step": 1670 + }, + { + "epoch": 0.5888986784140969, + "grad_norm": 1.6663522681826546, + "learning_rate": 1.6912791985373916e-05, + "loss": 0.5820617079734802, + "step": 1671 + }, + { + "epoch": 0.5892511013215859, + "grad_norm": 1.4638050818442514, + "learning_rate": 1.6908580729936983e-05, + "loss": 0.7513154745101929, + "step": 1672 + }, + { + "epoch": 0.5896035242290749, + "grad_norm": 1.4916906386520274, + "learning_rate": 1.6904367129251898e-05, + "loss": 0.6741763949394226, + "step": 1673 + }, + { + "epoch": 0.5899559471365639, + "grad_norm": 1.4430048165358413, + "learning_rate": 1.690015118474904e-05, + "loss": 0.7290149331092834, + "step": 1674 + }, + { + "epoch": 0.5903083700440529, + "grad_norm": 1.431209358109114, + "learning_rate": 1.6895932897859596e-05, + "loss": 0.651113748550415, + "step": 1675 + }, + { + "epoch": 0.5906607929515418, + "grad_norm": 1.5475090754915908, + "learning_rate": 1.6891712270015546e-05, + "loss": 0.8062121272087097, + "step": 1676 + }, + { + "epoch": 0.5910132158590309, + "grad_norm": 1.6532405105419041, + "learning_rate": 1.6887489302649657e-05, + "loss": 0.7168683409690857, + "step": 1677 + }, + { + "epoch": 0.5913656387665198, + "grad_norm": 1.5137609810465338, + "learning_rate": 1.6883263997195497e-05, + "loss": 0.6751970052719116, + "step": 1678 + }, + { + "epoch": 0.5917180616740088, + "grad_norm": 1.3244566227591112, + "learning_rate": 1.687903635508742e-05, + "loss": 0.5176222324371338, + "step": 1679 + }, + { + "epoch": 0.5920704845814978, + "grad_norm": 1.532290203616517, + "learning_rate": 1.6874806377760587e-05, + "loss": 0.605686366558075, + "step": 1680 + }, + { + "epoch": 0.5924229074889867, + "grad_norm": 1.55000273332987, + "learning_rate": 1.6870574066650945e-05, + "loss": 0.6927961111068726, + "step": 1681 + }, + { + "epoch": 0.5927753303964758, + "grad_norm": 1.7129667821490024, + "learning_rate": 1.6866339423195223e-05, + "loss": 0.7434122562408447, + "step": 1682 + }, + { + "epoch": 0.5931277533039647, + "grad_norm": 1.6508583062240207, + "learning_rate": 1.6862102448830956e-05, + "loss": 0.5646539926528931, + "step": 1683 + }, + { + "epoch": 0.5934801762114538, + "grad_norm": 1.6845514517525704, + "learning_rate": 1.6857863144996464e-05, + "loss": 0.6666921377182007, + "step": 1684 + }, + { + "epoch": 0.5938325991189427, + "grad_norm": 1.7487162446625693, + "learning_rate": 1.6853621513130857e-05, + "loss": 0.6630325317382812, + "step": 1685 + }, + { + "epoch": 0.5941850220264318, + "grad_norm": 1.6744610818707069, + "learning_rate": 1.6849377554674042e-05, + "loss": 0.6519981622695923, + "step": 1686 + }, + { + "epoch": 0.5945374449339207, + "grad_norm": 1.523793082989738, + "learning_rate": 1.6845131271066705e-05, + "loss": 0.7958102822303772, + "step": 1687 + }, + { + "epoch": 0.5948898678414097, + "grad_norm": 1.6258620864429363, + "learning_rate": 1.6840882663750333e-05, + "loss": 0.6136632561683655, + "step": 1688 + }, + { + "epoch": 0.5952422907488987, + "grad_norm": 1.5696515726783535, + "learning_rate": 1.683663173416719e-05, + "loss": 0.6177657842636108, + "step": 1689 + }, + { + "epoch": 0.5955947136563877, + "grad_norm": 1.3990187120155009, + "learning_rate": 1.683237848376034e-05, + "loss": 0.7489751577377319, + "step": 1690 + }, + { + "epoch": 0.5959471365638767, + "grad_norm": 1.7037734397554838, + "learning_rate": 1.6828122913973625e-05, + "loss": 0.6749632954597473, + "step": 1691 + }, + { + "epoch": 0.5962995594713656, + "grad_norm": 1.7564038851615957, + "learning_rate": 1.682386502625168e-05, + "loss": 0.6340545415878296, + "step": 1692 + }, + { + "epoch": 0.5966519823788546, + "grad_norm": 1.2684465272191359, + "learning_rate": 1.6819604822039924e-05, + "loss": 0.6141117811203003, + "step": 1693 + }, + { + "epoch": 0.5970044052863436, + "grad_norm": 1.6665435860950566, + "learning_rate": 1.681534230278457e-05, + "loss": 0.7937319874763489, + "step": 1694 + }, + { + "epoch": 0.5973568281938326, + "grad_norm": 1.376760638279742, + "learning_rate": 1.68110774699326e-05, + "loss": 0.6196104288101196, + "step": 1695 + }, + { + "epoch": 0.5977092511013216, + "grad_norm": 1.755256295612453, + "learning_rate": 1.68068103249318e-05, + "loss": 0.6856463551521301, + "step": 1696 + }, + { + "epoch": 0.5980616740088106, + "grad_norm": 1.423055172614558, + "learning_rate": 1.680254086923073e-05, + "loss": 0.754359245300293, + "step": 1697 + }, + { + "epoch": 0.5984140969162995, + "grad_norm": 1.5540819723583295, + "learning_rate": 1.6798269104278738e-05, + "loss": 0.6663862466812134, + "step": 1698 + }, + { + "epoch": 0.5987665198237886, + "grad_norm": 1.8192134096199304, + "learning_rate": 1.6793995031525955e-05, + "loss": 0.7072615027427673, + "step": 1699 + }, + { + "epoch": 0.5991189427312775, + "grad_norm": 1.3664015344189913, + "learning_rate": 1.678971865242329e-05, + "loss": 0.5722007751464844, + "step": 1700 + }, + { + "epoch": 0.5994713656387666, + "grad_norm": 1.5146739460913152, + "learning_rate": 1.6785439968422456e-05, + "loss": 0.8254455327987671, + "step": 1701 + }, + { + "epoch": 0.5998237885462555, + "grad_norm": 1.6221807995806083, + "learning_rate": 1.678115898097592e-05, + "loss": 0.5726041793823242, + "step": 1702 + }, + { + "epoch": 0.6001762114537444, + "grad_norm": 1.7173506198717712, + "learning_rate": 1.6776875691536946e-05, + "loss": 0.6480926275253296, + "step": 1703 + }, + { + "epoch": 0.6005286343612335, + "grad_norm": 1.801703791100917, + "learning_rate": 1.677259010155958e-05, + "loss": 0.6469742059707642, + "step": 1704 + }, + { + "epoch": 0.6008810572687224, + "grad_norm": 1.6534691770392222, + "learning_rate": 1.6768302212498647e-05, + "loss": 0.814565896987915, + "step": 1705 + }, + { + "epoch": 0.6012334801762115, + "grad_norm": 1.6212192399903926, + "learning_rate": 1.6764012025809745e-05, + "loss": 0.7063060402870178, + "step": 1706 + }, + { + "epoch": 0.6015859030837004, + "grad_norm": 1.2993416112883407, + "learning_rate": 1.6759719542949268e-05, + "loss": 0.6523685455322266, + "step": 1707 + }, + { + "epoch": 0.6019383259911895, + "grad_norm": 1.7291371377992661, + "learning_rate": 1.6755424765374378e-05, + "loss": 0.7361165285110474, + "step": 1708 + }, + { + "epoch": 0.6022907488986784, + "grad_norm": 1.1307969866596985, + "learning_rate": 1.6751127694543012e-05, + "loss": 0.45241934061050415, + "step": 1709 + }, + { + "epoch": 0.6026431718061674, + "grad_norm": 1.3734078208692269, + "learning_rate": 1.6746828331913903e-05, + "loss": 0.6610431671142578, + "step": 1710 + }, + { + "epoch": 0.6029955947136564, + "grad_norm": 1.6659887779271019, + "learning_rate": 1.674252667894654e-05, + "loss": 0.7572601437568665, + "step": 1711 + }, + { + "epoch": 0.6033480176211454, + "grad_norm": 1.3828996049540105, + "learning_rate": 1.6738222737101205e-05, + "loss": 0.7021572589874268, + "step": 1712 + }, + { + "epoch": 0.6037004405286344, + "grad_norm": 1.5581462402658262, + "learning_rate": 1.6733916507838952e-05, + "loss": 0.7742347121238708, + "step": 1713 + }, + { + "epoch": 0.6040528634361233, + "grad_norm": 1.5666267075277038, + "learning_rate": 1.6729607992621613e-05, + "loss": 0.6453407406806946, + "step": 1714 + }, + { + "epoch": 0.6044052863436123, + "grad_norm": 1.279025328652212, + "learning_rate": 1.6725297192911793e-05, + "loss": 0.7004555463790894, + "step": 1715 + }, + { + "epoch": 0.6047577092511013, + "grad_norm": 1.3482721305547676, + "learning_rate": 1.6720984110172875e-05, + "loss": 0.6979051232337952, + "step": 1716 + }, + { + "epoch": 0.6051101321585903, + "grad_norm": 1.5059245296578512, + "learning_rate": 1.671666874586902e-05, + "loss": 0.6387851238250732, + "step": 1717 + }, + { + "epoch": 0.6054625550660793, + "grad_norm": 1.5397561778856637, + "learning_rate": 1.671235110146515e-05, + "loss": 0.9083811044692993, + "step": 1718 + }, + { + "epoch": 0.6058149779735683, + "grad_norm": 1.637790853716126, + "learning_rate": 1.6708031178426984e-05, + "loss": 0.747002363204956, + "step": 1719 + }, + { + "epoch": 0.6061674008810573, + "grad_norm": 1.6617583077406621, + "learning_rate": 1.6703708978220986e-05, + "loss": 0.7553372383117676, + "step": 1720 + }, + { + "epoch": 0.6065198237885463, + "grad_norm": 1.72002611544435, + "learning_rate": 1.669938450231442e-05, + "loss": 0.762795090675354, + "step": 1721 + }, + { + "epoch": 0.6068722466960352, + "grad_norm": 1.3894206198813077, + "learning_rate": 1.669505775217531e-05, + "loss": 0.739936113357544, + "step": 1722 + }, + { + "epoch": 0.6072246696035243, + "grad_norm": 1.625344781935558, + "learning_rate": 1.6690728729272456e-05, + "loss": 0.8439112305641174, + "step": 1723 + }, + { + "epoch": 0.6075770925110132, + "grad_norm": 1.5345011506472854, + "learning_rate": 1.6686397435075416e-05, + "loss": 0.6144756078720093, + "step": 1724 + }, + { + "epoch": 0.6079295154185022, + "grad_norm": 1.3674442510472364, + "learning_rate": 1.6682063871054534e-05, + "loss": 0.569161057472229, + "step": 1725 + }, + { + "epoch": 0.6082819383259912, + "grad_norm": 1.6372827589624075, + "learning_rate": 1.6677728038680926e-05, + "loss": 0.7523979544639587, + "step": 1726 + }, + { + "epoch": 0.6086343612334801, + "grad_norm": 1.453986649514636, + "learning_rate": 1.6673389939426463e-05, + "loss": 0.6394520401954651, + "step": 1727 + }, + { + "epoch": 0.6089867841409692, + "grad_norm": 1.358198647287584, + "learning_rate": 1.66690495747638e-05, + "loss": 0.5975633859634399, + "step": 1728 + }, + { + "epoch": 0.6093392070484581, + "grad_norm": 1.6192297143942058, + "learning_rate": 1.666470694616636e-05, + "loss": 0.736790657043457, + "step": 1729 + }, + { + "epoch": 0.6096916299559472, + "grad_norm": 1.4234241508654442, + "learning_rate": 1.6660362055108316e-05, + "loss": 0.7693831920623779, + "step": 1730 + }, + { + "epoch": 0.6100440528634361, + "grad_norm": 1.8032471376275176, + "learning_rate": 1.665601490306464e-05, + "loss": 0.7322608232498169, + "step": 1731 + }, + { + "epoch": 0.6103964757709252, + "grad_norm": 1.3709677099617412, + "learning_rate": 1.6651665491511043e-05, + "loss": 0.6478679180145264, + "step": 1732 + }, + { + "epoch": 0.6107488986784141, + "grad_norm": 1.8838571148858527, + "learning_rate": 1.6647313821924022e-05, + "loss": 0.7125877141952515, + "step": 1733 + }, + { + "epoch": 0.6111013215859031, + "grad_norm": 1.5594770538222507, + "learning_rate": 1.664295989578083e-05, + "loss": 0.8999321460723877, + "step": 1734 + }, + { + "epoch": 0.6114537444933921, + "grad_norm": 1.618421596120734, + "learning_rate": 1.663860371455949e-05, + "loss": 0.6908334493637085, + "step": 1735 + }, + { + "epoch": 0.611806167400881, + "grad_norm": 1.5552403174407248, + "learning_rate": 1.663424527973879e-05, + "loss": 0.6708767414093018, + "step": 1736 + }, + { + "epoch": 0.61215859030837, + "grad_norm": 1.4907630752773764, + "learning_rate": 1.6629884592798283e-05, + "loss": 0.6991565823554993, + "step": 1737 + }, + { + "epoch": 0.612511013215859, + "grad_norm": 1.430459100414143, + "learning_rate": 1.6625521655218287e-05, + "loss": 0.6224193572998047, + "step": 1738 + }, + { + "epoch": 0.612863436123348, + "grad_norm": 1.6355889531807317, + "learning_rate": 1.662115646847988e-05, + "loss": 0.701459527015686, + "step": 1739 + }, + { + "epoch": 0.613215859030837, + "grad_norm": 1.508424771304017, + "learning_rate": 1.6616789034064914e-05, + "loss": 0.784063458442688, + "step": 1740 + }, + { + "epoch": 0.613568281938326, + "grad_norm": 1.4868333492675876, + "learning_rate": 1.661241935345599e-05, + "loss": 0.7604146003723145, + "step": 1741 + }, + { + "epoch": 0.613920704845815, + "grad_norm": 1.7090188741959023, + "learning_rate": 1.6608047428136482e-05, + "loss": 0.6347941160202026, + "step": 1742 + }, + { + "epoch": 0.614273127753304, + "grad_norm": 1.6487656059998825, + "learning_rate": 1.6603673259590524e-05, + "loss": 0.7559434175491333, + "step": 1743 + }, + { + "epoch": 0.6146255506607929, + "grad_norm": 1.5969979245345363, + "learning_rate": 1.6599296849303007e-05, + "loss": 0.742524266242981, + "step": 1744 + }, + { + "epoch": 0.614977973568282, + "grad_norm": 1.2238633556789393, + "learning_rate": 1.6594918198759586e-05, + "loss": 0.697594165802002, + "step": 1745 + }, + { + "epoch": 0.6153303964757709, + "grad_norm": 1.4536023257551807, + "learning_rate": 1.659053730944668e-05, + "loss": 0.7876765131950378, + "step": 1746 + }, + { + "epoch": 0.6156828193832599, + "grad_norm": 1.489887595585156, + "learning_rate": 1.658615418285146e-05, + "loss": 0.7514386177062988, + "step": 1747 + }, + { + "epoch": 0.6160352422907489, + "grad_norm": 1.6935500501856253, + "learning_rate": 1.658176882046187e-05, + "loss": 0.6220899820327759, + "step": 1748 + }, + { + "epoch": 0.6163876651982378, + "grad_norm": 1.9395284146525182, + "learning_rate": 1.6577381223766592e-05, + "loss": 0.7376539707183838, + "step": 1749 + }, + { + "epoch": 0.6167400881057269, + "grad_norm": 1.6373866531670291, + "learning_rate": 1.6572991394255084e-05, + "loss": 0.8296281099319458, + "step": 1750 + }, + { + "epoch": 0.6170925110132158, + "grad_norm": 1.545978766740828, + "learning_rate": 1.656859933341756e-05, + "loss": 0.7316757440567017, + "step": 1751 + }, + { + "epoch": 0.6174449339207049, + "grad_norm": 1.5280854263636194, + "learning_rate": 1.6564205042744986e-05, + "loss": 0.6933871507644653, + "step": 1752 + }, + { + "epoch": 0.6177973568281938, + "grad_norm": 1.890269396017501, + "learning_rate": 1.655980852372908e-05, + "loss": 0.6835601329803467, + "step": 1753 + }, + { + "epoch": 0.6181497797356829, + "grad_norm": 1.3967466693425752, + "learning_rate": 1.655540977786233e-05, + "loss": 0.6752027869224548, + "step": 1754 + }, + { + "epoch": 0.6185022026431718, + "grad_norm": 1.4944496246124994, + "learning_rate": 1.6551008806637976e-05, + "loss": 0.6092851758003235, + "step": 1755 + }, + { + "epoch": 0.6188546255506608, + "grad_norm": 1.3266652259646856, + "learning_rate": 1.6546605611550008e-05, + "loss": 0.682563066482544, + "step": 1756 + }, + { + "epoch": 0.6192070484581498, + "grad_norm": 1.5302981352911342, + "learning_rate": 1.654220019409317e-05, + "loss": 0.8674311637878418, + "step": 1757 + }, + { + "epoch": 0.6195594713656387, + "grad_norm": 1.4437314589210788, + "learning_rate": 1.6537792555762966e-05, + "loss": 0.7209165096282959, + "step": 1758 + }, + { + "epoch": 0.6199118942731278, + "grad_norm": 1.5958855115050472, + "learning_rate": 1.6533382698055655e-05, + "loss": 0.7795991897583008, + "step": 1759 + }, + { + "epoch": 0.6202643171806167, + "grad_norm": 1.6392261912532398, + "learning_rate": 1.6528970622468245e-05, + "loss": 0.6749448776245117, + "step": 1760 + }, + { + "epoch": 0.6206167400881057, + "grad_norm": 1.5291165267411688, + "learning_rate": 1.6524556330498494e-05, + "loss": 0.9127920866012573, + "step": 1761 + }, + { + "epoch": 0.6209691629955947, + "grad_norm": 1.5402491362904795, + "learning_rate": 1.6520139823644922e-05, + "loss": 0.6224071979522705, + "step": 1762 + }, + { + "epoch": 0.6213215859030837, + "grad_norm": 1.426673111398807, + "learning_rate": 1.6515721103406798e-05, + "loss": 0.6955251693725586, + "step": 1763 + }, + { + "epoch": 0.6216740088105727, + "grad_norm": 1.7187740007003602, + "learning_rate": 1.6511300171284132e-05, + "loss": 0.676613986492157, + "step": 1764 + }, + { + "epoch": 0.6220264317180617, + "grad_norm": 1.4024924612217573, + "learning_rate": 1.65068770287777e-05, + "loss": 0.7482033967971802, + "step": 1765 + }, + { + "epoch": 0.6223788546255506, + "grad_norm": 1.4659804586317469, + "learning_rate": 1.6502451677389015e-05, + "loss": 0.6019684076309204, + "step": 1766 + }, + { + "epoch": 0.6227312775330397, + "grad_norm": 1.419796458872072, + "learning_rate": 1.649802411862035e-05, + "loss": 0.6796068549156189, + "step": 1767 + }, + { + "epoch": 0.6230837004405286, + "grad_norm": 2.234008541241949, + "learning_rate": 1.6493594353974724e-05, + "loss": 0.6351302862167358, + "step": 1768 + }, + { + "epoch": 0.6234361233480176, + "grad_norm": 1.4257561009443, + "learning_rate": 1.6489162384955906e-05, + "loss": 0.6093732714653015, + "step": 1769 + }, + { + "epoch": 0.6237885462555066, + "grad_norm": 1.842168854503522, + "learning_rate": 1.6484728213068405e-05, + "loss": 0.8181271553039551, + "step": 1770 + }, + { + "epoch": 0.6241409691629956, + "grad_norm": 1.821206401126196, + "learning_rate": 1.6480291839817488e-05, + "loss": 0.7093993425369263, + "step": 1771 + }, + { + "epoch": 0.6244933920704846, + "grad_norm": 1.416340976430299, + "learning_rate": 1.6475853266709165e-05, + "loss": 0.6895081996917725, + "step": 1772 + }, + { + "epoch": 0.6248458149779735, + "grad_norm": 1.5970315552720198, + "learning_rate": 1.6471412495250195e-05, + "loss": 0.6706013679504395, + "step": 1773 + }, + { + "epoch": 0.6251982378854626, + "grad_norm": 1.5170788749866242, + "learning_rate": 1.6466969526948082e-05, + "loss": 0.6700015664100647, + "step": 1774 + }, + { + "epoch": 0.6255506607929515, + "grad_norm": 1.5173815641058028, + "learning_rate": 1.6462524363311072e-05, + "loss": 0.6591087579727173, + "step": 1775 + }, + { + "epoch": 0.6259030837004406, + "grad_norm": 1.6219345446237772, + "learning_rate": 1.6458077005848164e-05, + "loss": 0.7775006294250488, + "step": 1776 + }, + { + "epoch": 0.6262555066079295, + "grad_norm": 1.6260525304572828, + "learning_rate": 1.6453627456069093e-05, + "loss": 0.8459682464599609, + "step": 1777 + }, + { + "epoch": 0.6266079295154185, + "grad_norm": 1.4031571304990242, + "learning_rate": 1.6449175715484346e-05, + "loss": 0.6536898612976074, + "step": 1778 + }, + { + "epoch": 0.6269603524229075, + "grad_norm": 1.5129603585000657, + "learning_rate": 1.6444721785605148e-05, + "loss": 0.7543610334396362, + "step": 1779 + }, + { + "epoch": 0.6273127753303964, + "grad_norm": 1.6228520645077271, + "learning_rate": 1.6440265667943474e-05, + "loss": 0.7416362762451172, + "step": 1780 + }, + { + "epoch": 0.6276651982378855, + "grad_norm": 1.4583654660578542, + "learning_rate": 1.6435807364012035e-05, + "loss": 0.5505499839782715, + "step": 1781 + }, + { + "epoch": 0.6280176211453744, + "grad_norm": 1.5252426453600672, + "learning_rate": 1.6431346875324284e-05, + "loss": 0.792723536491394, + "step": 1782 + }, + { + "epoch": 0.6283700440528635, + "grad_norm": 1.3655475423968058, + "learning_rate": 1.6426884203394416e-05, + "loss": 0.6313158273696899, + "step": 1783 + }, + { + "epoch": 0.6287224669603524, + "grad_norm": 1.6057168635576118, + "learning_rate": 1.642241934973738e-05, + "loss": 0.6168874502182007, + "step": 1784 + }, + { + "epoch": 0.6290748898678414, + "grad_norm": 1.78997265433784, + "learning_rate": 1.6417952315868845e-05, + "loss": 0.6995766162872314, + "step": 1785 + }, + { + "epoch": 0.6294273127753304, + "grad_norm": 1.4835625331683349, + "learning_rate": 1.641348310330523e-05, + "loss": 0.8046826124191284, + "step": 1786 + }, + { + "epoch": 0.6297797356828194, + "grad_norm": 1.4892920408023869, + "learning_rate": 1.6409011713563697e-05, + "loss": 0.7227291464805603, + "step": 1787 + }, + { + "epoch": 0.6301321585903084, + "grad_norm": 1.4682105257113767, + "learning_rate": 1.6404538148162145e-05, + "loss": 0.6463631391525269, + "step": 1788 + }, + { + "epoch": 0.6304845814977974, + "grad_norm": 2.4977643907634, + "learning_rate": 1.640006240861921e-05, + "loss": 0.7473348379135132, + "step": 1789 + }, + { + "epoch": 0.6308370044052863, + "grad_norm": 1.4291329366827183, + "learning_rate": 1.6395584496454263e-05, + "loss": 0.7311505079269409, + "step": 1790 + }, + { + "epoch": 0.6311894273127753, + "grad_norm": 1.5618530036111458, + "learning_rate": 1.639110441318742e-05, + "loss": 0.7259535789489746, + "step": 1791 + }, + { + "epoch": 0.6315418502202643, + "grad_norm": 1.515515721890048, + "learning_rate": 1.6386622160339522e-05, + "loss": 0.5777252912521362, + "step": 1792 + }, + { + "epoch": 0.6318942731277533, + "grad_norm": 1.3190322559386176, + "learning_rate": 1.638213773943216e-05, + "loss": 0.5510598421096802, + "step": 1793 + }, + { + "epoch": 0.6322466960352423, + "grad_norm": 1.5085803548323364, + "learning_rate": 1.637765115198766e-05, + "loss": 0.6448229551315308, + "step": 1794 + }, + { + "epoch": 0.6325991189427312, + "grad_norm": 1.5827276696724286, + "learning_rate": 1.6373162399529067e-05, + "loss": 0.7359289526939392, + "step": 1795 + }, + { + "epoch": 0.6329515418502203, + "grad_norm": 1.5346140091491929, + "learning_rate": 1.6368671483580185e-05, + "loss": 0.616656482219696, + "step": 1796 + }, + { + "epoch": 0.6333039647577092, + "grad_norm": 1.4291822350961465, + "learning_rate": 1.6364178405665534e-05, + "loss": 0.5966289043426514, + "step": 1797 + }, + { + "epoch": 0.6336563876651983, + "grad_norm": 1.8727626569458464, + "learning_rate": 1.6359683167310375e-05, + "loss": 0.7475985288619995, + "step": 1798 + }, + { + "epoch": 0.6340088105726872, + "grad_norm": 1.4494645750595028, + "learning_rate": 1.63551857700407e-05, + "loss": 0.6030765771865845, + "step": 1799 + }, + { + "epoch": 0.6343612334801763, + "grad_norm": 1.6637248682130477, + "learning_rate": 1.6350686215383237e-05, + "loss": 0.6193016171455383, + "step": 1800 + }, + { + "epoch": 0.6347136563876652, + "grad_norm": 1.3604775956740969, + "learning_rate": 1.6346184504865442e-05, + "loss": 0.6404513120651245, + "step": 1801 + }, + { + "epoch": 0.6350660792951542, + "grad_norm": 1.5539318450371893, + "learning_rate": 1.6341680640015515e-05, + "loss": 0.8453506231307983, + "step": 1802 + }, + { + "epoch": 0.6354185022026432, + "grad_norm": 1.3642622033336096, + "learning_rate": 1.6337174622362366e-05, + "loss": 0.6094445586204529, + "step": 1803 + }, + { + "epoch": 0.6357709251101321, + "grad_norm": 1.5112522647253264, + "learning_rate": 1.6332666453435653e-05, + "loss": 0.7352159023284912, + "step": 1804 + }, + { + "epoch": 0.6361233480176212, + "grad_norm": 1.4529963307650198, + "learning_rate": 1.632815613476576e-05, + "loss": 0.7395339608192444, + "step": 1805 + }, + { + "epoch": 0.6364757709251101, + "grad_norm": 1.4350925789909401, + "learning_rate": 1.63236436678838e-05, + "loss": 0.7246927618980408, + "step": 1806 + }, + { + "epoch": 0.6368281938325991, + "grad_norm": 1.3653208723694477, + "learning_rate": 1.6319129054321616e-05, + "loss": 0.6913329362869263, + "step": 1807 + }, + { + "epoch": 0.6371806167400881, + "grad_norm": 1.9893098285493216, + "learning_rate": 1.6314612295611772e-05, + "loss": 0.6410515308380127, + "step": 1808 + }, + { + "epoch": 0.6375330396475771, + "grad_norm": 2.6583918764324665, + "learning_rate": 1.6310093393287574e-05, + "loss": 0.690910816192627, + "step": 1809 + }, + { + "epoch": 0.6378854625550661, + "grad_norm": 1.4623649413484192, + "learning_rate": 1.6305572348883044e-05, + "loss": 0.6520562171936035, + "step": 1810 + }, + { + "epoch": 0.6382378854625551, + "grad_norm": 1.6850706181935027, + "learning_rate": 1.630104916393294e-05, + "loss": 0.6966608166694641, + "step": 1811 + }, + { + "epoch": 0.638590308370044, + "grad_norm": 1.7161033790648312, + "learning_rate": 1.6296523839972743e-05, + "loss": 0.826806902885437, + "step": 1812 + }, + { + "epoch": 0.6389427312775331, + "grad_norm": 1.431569634617566, + "learning_rate": 1.6291996378538653e-05, + "loss": 0.6695773601531982, + "step": 1813 + }, + { + "epoch": 0.639295154185022, + "grad_norm": 1.4264708644101765, + "learning_rate": 1.6287466781167607e-05, + "loss": 0.5725491046905518, + "step": 1814 + }, + { + "epoch": 0.639647577092511, + "grad_norm": 1.2779233324378096, + "learning_rate": 1.628293504939727e-05, + "loss": 0.5543544292449951, + "step": 1815 + }, + { + "epoch": 0.64, + "grad_norm": 3.2997728941963564, + "learning_rate": 1.6278401184766007e-05, + "loss": 0.6964641809463501, + "step": 1816 + }, + { + "epoch": 0.640352422907489, + "grad_norm": 1.3065245679172277, + "learning_rate": 1.6273865188812935e-05, + "loss": 0.675407886505127, + "step": 1817 + }, + { + "epoch": 0.640704845814978, + "grad_norm": 1.4883059032141013, + "learning_rate": 1.626932706307788e-05, + "loss": 0.6304433345794678, + "step": 1818 + }, + { + "epoch": 0.6410572687224669, + "grad_norm": 1.5529882690454875, + "learning_rate": 1.62647868091014e-05, + "loss": 0.7432112693786621, + "step": 1819 + }, + { + "epoch": 0.641409691629956, + "grad_norm": 1.5761551228008874, + "learning_rate": 1.6260244428424763e-05, + "loss": 0.730377197265625, + "step": 1820 + }, + { + "epoch": 0.6417621145374449, + "grad_norm": 1.7239403694554825, + "learning_rate": 1.6255699922589968e-05, + "loss": 0.694229006767273, + "step": 1821 + }, + { + "epoch": 0.642114537444934, + "grad_norm": 1.5664915948077012, + "learning_rate": 1.6251153293139735e-05, + "loss": 0.7284739017486572, + "step": 1822 + }, + { + "epoch": 0.6424669603524229, + "grad_norm": 1.4047714992661522, + "learning_rate": 1.6246604541617507e-05, + "loss": 0.6028950214385986, + "step": 1823 + }, + { + "epoch": 0.642819383259912, + "grad_norm": 1.65079248713073, + "learning_rate": 1.6242053669567432e-05, + "loss": 0.6776808500289917, + "step": 1824 + }, + { + "epoch": 0.6431718061674009, + "grad_norm": 1.7695857292474644, + "learning_rate": 1.6237500678534396e-05, + "loss": 0.7743366956710815, + "step": 1825 + }, + { + "epoch": 0.6435242290748898, + "grad_norm": 1.594351471613888, + "learning_rate": 1.6232945570064e-05, + "loss": 0.6356723308563232, + "step": 1826 + }, + { + "epoch": 0.6438766519823789, + "grad_norm": 1.4846113103688028, + "learning_rate": 1.622838834570256e-05, + "loss": 0.7356402277946472, + "step": 1827 + }, + { + "epoch": 0.6442290748898678, + "grad_norm": 1.455165750941624, + "learning_rate": 1.622382900699711e-05, + "loss": 0.7639342546463013, + "step": 1828 + }, + { + "epoch": 0.6445814977973569, + "grad_norm": 2.0823946019481987, + "learning_rate": 1.6219267555495407e-05, + "loss": 0.6969513297080994, + "step": 1829 + }, + { + "epoch": 0.6449339207048458, + "grad_norm": 1.418146430885783, + "learning_rate": 1.621470399274592e-05, + "loss": 0.7532765865325928, + "step": 1830 + }, + { + "epoch": 0.6452863436123348, + "grad_norm": 1.3893974330709622, + "learning_rate": 1.6210138320297835e-05, + "loss": 0.5801100730895996, + "step": 1831 + }, + { + "epoch": 0.6456387665198238, + "grad_norm": 1.5780391931120195, + "learning_rate": 1.6205570539701056e-05, + "loss": 0.8006102442741394, + "step": 1832 + }, + { + "epoch": 0.6459911894273128, + "grad_norm": 1.4094927188728377, + "learning_rate": 1.6201000652506203e-05, + "loss": 0.6507089138031006, + "step": 1833 + }, + { + "epoch": 0.6463436123348018, + "grad_norm": 1.9684758989320281, + "learning_rate": 1.619642866026461e-05, + "loss": 0.7407999634742737, + "step": 1834 + }, + { + "epoch": 0.6466960352422908, + "grad_norm": 1.4160609898798358, + "learning_rate": 1.619185456452833e-05, + "loss": 0.6964670419692993, + "step": 1835 + }, + { + "epoch": 0.6470484581497797, + "grad_norm": 1.6614634508995256, + "learning_rate": 1.6187278366850122e-05, + "loss": 0.7095489501953125, + "step": 1836 + }, + { + "epoch": 0.6474008810572687, + "grad_norm": 2.0391949894277017, + "learning_rate": 1.6182700068783463e-05, + "loss": 0.6968166828155518, + "step": 1837 + }, + { + "epoch": 0.6477533039647577, + "grad_norm": 1.3206477384834772, + "learning_rate": 1.617811967188254e-05, + "loss": 0.7745821475982666, + "step": 1838 + }, + { + "epoch": 0.6481057268722467, + "grad_norm": 1.4803456865319338, + "learning_rate": 1.6173537177702266e-05, + "loss": 0.7071934938430786, + "step": 1839 + }, + { + "epoch": 0.6484581497797357, + "grad_norm": 1.7225763324537737, + "learning_rate": 1.6168952587798242e-05, + "loss": 0.6481701135635376, + "step": 1840 + }, + { + "epoch": 0.6488105726872246, + "grad_norm": 1.4447543914645467, + "learning_rate": 1.6164365903726805e-05, + "loss": 0.6349890232086182, + "step": 1841 + }, + { + "epoch": 0.6491629955947137, + "grad_norm": 1.3913908457554178, + "learning_rate": 1.6159777127044982e-05, + "loss": 0.6067368388175964, + "step": 1842 + }, + { + "epoch": 0.6495154185022026, + "grad_norm": 1.3943413375617566, + "learning_rate": 1.6155186259310523e-05, + "loss": 0.7170778512954712, + "step": 1843 + }, + { + "epoch": 0.6498678414096917, + "grad_norm": 1.4309397568408155, + "learning_rate": 1.6150593302081888e-05, + "loss": 0.5623376965522766, + "step": 1844 + }, + { + "epoch": 0.6502202643171806, + "grad_norm": 1.442096873601557, + "learning_rate": 1.6145998256918238e-05, + "loss": 0.7295233607292175, + "step": 1845 + }, + { + "epoch": 0.6505726872246697, + "grad_norm": 1.513681766461532, + "learning_rate": 1.6141401125379454e-05, + "loss": 0.6991151571273804, + "step": 1846 + }, + { + "epoch": 0.6509251101321586, + "grad_norm": 1.568060173563952, + "learning_rate": 1.6136801909026113e-05, + "loss": 0.7553545236587524, + "step": 1847 + }, + { + "epoch": 0.6512775330396475, + "grad_norm": 1.560177534517688, + "learning_rate": 1.613220060941951e-05, + "loss": 0.8280071020126343, + "step": 1848 + }, + { + "epoch": 0.6516299559471366, + "grad_norm": 1.3846780543862842, + "learning_rate": 1.6127597228121636e-05, + "loss": 0.662299633026123, + "step": 1849 + }, + { + "epoch": 0.6519823788546255, + "grad_norm": 1.519733781984336, + "learning_rate": 1.6122991766695206e-05, + "loss": 0.6493197679519653, + "step": 1850 + }, + { + "epoch": 0.6523348017621146, + "grad_norm": 1.5074834442694671, + "learning_rate": 1.6118384226703623e-05, + "loss": 0.5910629034042358, + "step": 1851 + }, + { + "epoch": 0.6526872246696035, + "grad_norm": 1.5082942143966174, + "learning_rate": 1.611377460971101e-05, + "loss": 0.7124426364898682, + "step": 1852 + }, + { + "epoch": 0.6530396475770925, + "grad_norm": 1.6734021483912949, + "learning_rate": 1.610916291728218e-05, + "loss": 0.6081063747406006, + "step": 1853 + }, + { + "epoch": 0.6533920704845815, + "grad_norm": 1.5485445677219123, + "learning_rate": 1.6104549150982666e-05, + "loss": 0.7536673545837402, + "step": 1854 + }, + { + "epoch": 0.6537444933920705, + "grad_norm": 1.5239612944966212, + "learning_rate": 1.6099933312378695e-05, + "loss": 0.6514976024627686, + "step": 1855 + }, + { + "epoch": 0.6540969162995595, + "grad_norm": 1.3951117738157057, + "learning_rate": 1.6095315403037205e-05, + "loss": 0.6595193147659302, + "step": 1856 + }, + { + "epoch": 0.6544493392070485, + "grad_norm": 1.5562205804379312, + "learning_rate": 1.6090695424525826e-05, + "loss": 0.666920006275177, + "step": 1857 + }, + { + "epoch": 0.6548017621145374, + "grad_norm": 1.5350434119319913, + "learning_rate": 1.6086073378412902e-05, + "loss": 0.5984979271888733, + "step": 1858 + }, + { + "epoch": 0.6551541850220264, + "grad_norm": 1.8541188470544154, + "learning_rate": 1.608144926626747e-05, + "loss": 0.8021191358566284, + "step": 1859 + }, + { + "epoch": 0.6555066079295154, + "grad_norm": 1.5029675710659876, + "learning_rate": 1.6076823089659272e-05, + "loss": 0.7368075847625732, + "step": 1860 + }, + { + "epoch": 0.6558590308370044, + "grad_norm": 1.596711606351331, + "learning_rate": 1.6072194850158755e-05, + "loss": 0.7923766374588013, + "step": 1861 + }, + { + "epoch": 0.6562114537444934, + "grad_norm": 1.6332800469997777, + "learning_rate": 1.606756454933706e-05, + "loss": 0.6907824873924255, + "step": 1862 + }, + { + "epoch": 0.6565638766519823, + "grad_norm": 1.5674543537069574, + "learning_rate": 1.606293218876603e-05, + "loss": 0.7366634607315063, + "step": 1863 + }, + { + "epoch": 0.6569162995594714, + "grad_norm": 1.7550517656533429, + "learning_rate": 1.6058297770018208e-05, + "loss": 0.7166022658348083, + "step": 1864 + }, + { + "epoch": 0.6572687224669603, + "grad_norm": 1.5153527205809505, + "learning_rate": 1.6053661294666833e-05, + "loss": 0.6969404220581055, + "step": 1865 + }, + { + "epoch": 0.6576211453744494, + "grad_norm": 1.5681332930444218, + "learning_rate": 1.6049022764285846e-05, + "loss": 0.7182974815368652, + "step": 1866 + }, + { + "epoch": 0.6579735682819383, + "grad_norm": 2.620263422686914, + "learning_rate": 1.6044382180449886e-05, + "loss": 0.7469301819801331, + "step": 1867 + }, + { + "epoch": 0.6583259911894274, + "grad_norm": 1.458082221775431, + "learning_rate": 1.603973954473428e-05, + "loss": 0.7097122073173523, + "step": 1868 + }, + { + "epoch": 0.6586784140969163, + "grad_norm": 1.3404337000381439, + "learning_rate": 1.6035094858715065e-05, + "loss": 0.6907291412353516, + "step": 1869 + }, + { + "epoch": 0.6590308370044052, + "grad_norm": 1.5576579616406543, + "learning_rate": 1.6030448123968963e-05, + "loss": 0.6259130239486694, + "step": 1870 + }, + { + "epoch": 0.6593832599118943, + "grad_norm": 1.6431810286043311, + "learning_rate": 1.6025799342073397e-05, + "loss": 0.6948051452636719, + "step": 1871 + }, + { + "epoch": 0.6597356828193832, + "grad_norm": 1.3540961323396474, + "learning_rate": 1.602114851460648e-05, + "loss": 0.7037572264671326, + "step": 1872 + }, + { + "epoch": 0.6600881057268723, + "grad_norm": 1.565352238933419, + "learning_rate": 1.6016495643147036e-05, + "loss": 0.7728864550590515, + "step": 1873 + }, + { + "epoch": 0.6604405286343612, + "grad_norm": 1.4345290675539004, + "learning_rate": 1.601184072927456e-05, + "loss": 0.7782067060470581, + "step": 1874 + }, + { + "epoch": 0.6607929515418502, + "grad_norm": 1.4505913839056241, + "learning_rate": 1.6007183774569246e-05, + "loss": 0.6168591976165771, + "step": 1875 + }, + { + "epoch": 0.6611453744493392, + "grad_norm": 1.6465062301007323, + "learning_rate": 1.6002524780611995e-05, + "loss": 0.702346920967102, + "step": 1876 + }, + { + "epoch": 0.6614977973568282, + "grad_norm": 1.6478258582343996, + "learning_rate": 1.5997863748984384e-05, + "loss": 0.6084239482879639, + "step": 1877 + }, + { + "epoch": 0.6618502202643172, + "grad_norm": 1.5841429013244157, + "learning_rate": 1.5993200681268696e-05, + "loss": 0.8307315707206726, + "step": 1878 + }, + { + "epoch": 0.6622026431718062, + "grad_norm": 1.8073980879357947, + "learning_rate": 1.5988535579047888e-05, + "loss": 0.6465811729431152, + "step": 1879 + }, + { + "epoch": 0.6625550660792952, + "grad_norm": 1.5593829827457022, + "learning_rate": 1.598386844390562e-05, + "loss": 0.71415114402771, + "step": 1880 + }, + { + "epoch": 0.6629074889867841, + "grad_norm": 6.602062472303997, + "learning_rate": 1.5979199277426243e-05, + "loss": 0.7135012149810791, + "step": 1881 + }, + { + "epoch": 0.6632599118942731, + "grad_norm": 1.584805815321856, + "learning_rate": 1.597452808119479e-05, + "loss": 0.840306282043457, + "step": 1882 + }, + { + "epoch": 0.6636123348017621, + "grad_norm": 1.454651140369818, + "learning_rate": 1.596985485679699e-05, + "loss": 0.622429609298706, + "step": 1883 + }, + { + "epoch": 0.6639647577092511, + "grad_norm": 1.5798478269154124, + "learning_rate": 1.5965179605819248e-05, + "loss": 0.6505612134933472, + "step": 1884 + }, + { + "epoch": 0.66431718061674, + "grad_norm": 1.4292089389404006, + "learning_rate": 1.5960502329848683e-05, + "loss": 0.7665247917175293, + "step": 1885 + }, + { + "epoch": 0.6646696035242291, + "grad_norm": 1.614107737492675, + "learning_rate": 1.5955823030473068e-05, + "loss": 0.7780051231384277, + "step": 1886 + }, + { + "epoch": 0.665022026431718, + "grad_norm": 1.4074097920809756, + "learning_rate": 1.5951141709280886e-05, + "loss": 0.6311650276184082, + "step": 1887 + }, + { + "epoch": 0.6653744493392071, + "grad_norm": 1.287734360896639, + "learning_rate": 1.5946458367861302e-05, + "loss": 0.7126712799072266, + "step": 1888 + }, + { + "epoch": 0.665726872246696, + "grad_norm": 1.3823278268773909, + "learning_rate": 1.5941773007804165e-05, + "loss": 0.6979397535324097, + "step": 1889 + }, + { + "epoch": 0.6660792951541851, + "grad_norm": 1.5067230035216896, + "learning_rate": 1.5937085630700003e-05, + "loss": 0.7065495252609253, + "step": 1890 + }, + { + "epoch": 0.666431718061674, + "grad_norm": 1.373677820269664, + "learning_rate": 1.593239623814004e-05, + "loss": 0.6157221794128418, + "step": 1891 + }, + { + "epoch": 0.6667841409691629, + "grad_norm": 1.6157271272896285, + "learning_rate": 1.5927704831716177e-05, + "loss": 0.6835625171661377, + "step": 1892 + }, + { + "epoch": 0.667136563876652, + "grad_norm": 1.5002309814069255, + "learning_rate": 1.5923011413021e-05, + "loss": 0.6416822671890259, + "step": 1893 + }, + { + "epoch": 0.6674889867841409, + "grad_norm": 1.4507514621746327, + "learning_rate": 1.5918315983647782e-05, + "loss": 0.7307168245315552, + "step": 1894 + }, + { + "epoch": 0.66784140969163, + "grad_norm": 1.3321086634513644, + "learning_rate": 1.5913618545190468e-05, + "loss": 0.5464824438095093, + "step": 1895 + }, + { + "epoch": 0.6681938325991189, + "grad_norm": 1.544912001907108, + "learning_rate": 1.5908919099243698e-05, + "loss": 0.6634502410888672, + "step": 1896 + }, + { + "epoch": 0.668546255506608, + "grad_norm": 1.2985703589965545, + "learning_rate": 1.5904217647402788e-05, + "loss": 0.719158411026001, + "step": 1897 + }, + { + "epoch": 0.6688986784140969, + "grad_norm": 1.5083721998375157, + "learning_rate": 1.5899514191263733e-05, + "loss": 0.7547527551651001, + "step": 1898 + }, + { + "epoch": 0.6692511013215859, + "grad_norm": 1.6226125781851348, + "learning_rate": 1.5894808732423207e-05, + "loss": 0.7549886703491211, + "step": 1899 + }, + { + "epoch": 0.6696035242290749, + "grad_norm": 1.5327056521201368, + "learning_rate": 1.589010127247857e-05, + "loss": 0.7107831239700317, + "step": 1900 + }, + { + "epoch": 0.6699559471365639, + "grad_norm": 1.5679371113552734, + "learning_rate": 1.588539181302786e-05, + "loss": 0.855078935623169, + "step": 1901 + }, + { + "epoch": 0.6703083700440529, + "grad_norm": 1.4970896726818788, + "learning_rate": 1.5880680355669792e-05, + "loss": 0.8235266208648682, + "step": 1902 + }, + { + "epoch": 0.6706607929515418, + "grad_norm": 1.339674008175079, + "learning_rate": 1.587596690200375e-05, + "loss": 0.6060166358947754, + "step": 1903 + }, + { + "epoch": 0.6710132158590308, + "grad_norm": 1.4603163291197105, + "learning_rate": 1.5871251453629817e-05, + "loss": 0.7325272560119629, + "step": 1904 + }, + { + "epoch": 0.6713656387665198, + "grad_norm": 1.5470128203990354, + "learning_rate": 1.586653401214873e-05, + "loss": 0.674901008605957, + "step": 1905 + }, + { + "epoch": 0.6717180616740088, + "grad_norm": 1.3515017914848853, + "learning_rate": 1.5861814579161928e-05, + "loss": 0.767164945602417, + "step": 1906 + }, + { + "epoch": 0.6720704845814978, + "grad_norm": 1.3633425183694836, + "learning_rate": 1.5857093156271496e-05, + "loss": 0.5691556930541992, + "step": 1907 + }, + { + "epoch": 0.6724229074889868, + "grad_norm": 1.3106038540183678, + "learning_rate": 1.585236974508022e-05, + "loss": 0.6885931491851807, + "step": 1908 + }, + { + "epoch": 0.6727753303964757, + "grad_norm": 1.143239709830434, + "learning_rate": 1.5847644347191545e-05, + "loss": 0.6227391958236694, + "step": 1909 + }, + { + "epoch": 0.6731277533039648, + "grad_norm": 1.4883434470080177, + "learning_rate": 1.5842916964209602e-05, + "loss": 0.6084527969360352, + "step": 1910 + }, + { + "epoch": 0.6734801762114537, + "grad_norm": 1.7178691294348742, + "learning_rate": 1.583818759773919e-05, + "loss": 0.7001935243606567, + "step": 1911 + }, + { + "epoch": 0.6738325991189428, + "grad_norm": 1.684468384573203, + "learning_rate": 1.5833456249385774e-05, + "loss": 0.8263465166091919, + "step": 1912 + }, + { + "epoch": 0.6741850220264317, + "grad_norm": 1.6085564780466834, + "learning_rate": 1.582872292075551e-05, + "loss": 0.662792444229126, + "step": 1913 + }, + { + "epoch": 0.6745374449339208, + "grad_norm": 1.7464203558320361, + "learning_rate": 1.582398761345521e-05, + "loss": 0.7093051075935364, + "step": 1914 + }, + { + "epoch": 0.6748898678414097, + "grad_norm": 1.4885122105608484, + "learning_rate": 1.5819250329092364e-05, + "loss": 0.7264106273651123, + "step": 1915 + }, + { + "epoch": 0.6752422907488986, + "grad_norm": 1.5383309179609377, + "learning_rate": 1.581451106927513e-05, + "loss": 0.6561543345451355, + "step": 1916 + }, + { + "epoch": 0.6755947136563877, + "grad_norm": 1.634971670239321, + "learning_rate": 1.580976983561235e-05, + "loss": 0.6563262939453125, + "step": 1917 + }, + { + "epoch": 0.6759471365638766, + "grad_norm": 1.2931579342976025, + "learning_rate": 1.5805026629713512e-05, + "loss": 0.5224509239196777, + "step": 1918 + }, + { + "epoch": 0.6762995594713657, + "grad_norm": 1.4840746720603137, + "learning_rate": 1.5800281453188793e-05, + "loss": 0.6565898656845093, + "step": 1919 + }, + { + "epoch": 0.6766519823788546, + "grad_norm": 1.4375600407888718, + "learning_rate": 1.5795534307649032e-05, + "loss": 0.7954028844833374, + "step": 1920 + }, + { + "epoch": 0.6770044052863436, + "grad_norm": 1.3454762773409146, + "learning_rate": 1.579078519470574e-05, + "loss": 0.6624404788017273, + "step": 1921 + }, + { + "epoch": 0.6773568281938326, + "grad_norm": 1.5514355338443828, + "learning_rate": 1.5786034115971083e-05, + "loss": 0.840311586856842, + "step": 1922 + }, + { + "epoch": 0.6777092511013216, + "grad_norm": 1.5163172495660509, + "learning_rate": 1.578128107305792e-05, + "loss": 0.6967859864234924, + "step": 1923 + }, + { + "epoch": 0.6780616740088106, + "grad_norm": 1.0735596232953704, + "learning_rate": 1.5776526067579746e-05, + "loss": 0.5295379161834717, + "step": 1924 + }, + { + "epoch": 0.6784140969162996, + "grad_norm": 1.8118747234451476, + "learning_rate": 1.5771769101150752e-05, + "loss": 0.6758475303649902, + "step": 1925 + }, + { + "epoch": 0.6787665198237885, + "grad_norm": 1.3510918406813899, + "learning_rate": 1.576701017538577e-05, + "loss": 0.6891785860061646, + "step": 1926 + }, + { + "epoch": 0.6791189427312775, + "grad_norm": 1.4115910497948105, + "learning_rate": 1.5762249291900304e-05, + "loss": 0.6507086157798767, + "step": 1927 + }, + { + "epoch": 0.6794713656387665, + "grad_norm": 1.4168935733459347, + "learning_rate": 1.5757486452310537e-05, + "loss": 0.6220029592514038, + "step": 1928 + }, + { + "epoch": 0.6798237885462555, + "grad_norm": 1.5134078284046213, + "learning_rate": 1.5752721658233294e-05, + "loss": 0.7742874622344971, + "step": 1929 + }, + { + "epoch": 0.6801762114537445, + "grad_norm": 1.513809055671425, + "learning_rate": 1.5747954911286085e-05, + "loss": 0.6895851492881775, + "step": 1930 + }, + { + "epoch": 0.6805286343612335, + "grad_norm": 1.6367265924041048, + "learning_rate": 1.5743186213087062e-05, + "loss": 0.71466064453125, + "step": 1931 + }, + { + "epoch": 0.6808810572687225, + "grad_norm": 1.506916023064254, + "learning_rate": 1.5738415565255056e-05, + "loss": 0.6465627551078796, + "step": 1932 + }, + { + "epoch": 0.6812334801762114, + "grad_norm": 1.3796886447957644, + "learning_rate": 1.5733642969409553e-05, + "loss": 0.7592962980270386, + "step": 1933 + }, + { + "epoch": 0.6815859030837005, + "grad_norm": 1.662185742102518, + "learning_rate": 1.57288684271707e-05, + "loss": 0.7641816735267639, + "step": 1934 + }, + { + "epoch": 0.6819383259911894, + "grad_norm": 1.5600426648231815, + "learning_rate": 1.5724091940159306e-05, + "loss": 0.7015130519866943, + "step": 1935 + }, + { + "epoch": 0.6822907488986785, + "grad_norm": 1.5031237824980206, + "learning_rate": 1.5719313509996833e-05, + "loss": 0.7851461172103882, + "step": 1936 + }, + { + "epoch": 0.6826431718061674, + "grad_norm": 1.5670991097913773, + "learning_rate": 1.571453313830542e-05, + "loss": 0.7924813628196716, + "step": 1937 + }, + { + "epoch": 0.6829955947136563, + "grad_norm": 1.3030215719290177, + "learning_rate": 1.570975082670785e-05, + "loss": 0.6082741022109985, + "step": 1938 + }, + { + "epoch": 0.6833480176211454, + "grad_norm": 1.5878638287998994, + "learning_rate": 1.5704966576827563e-05, + "loss": 0.7307756543159485, + "step": 1939 + }, + { + "epoch": 0.6837004405286343, + "grad_norm": 1.421111197077357, + "learning_rate": 1.570018039028867e-05, + "loss": 0.6877273917198181, + "step": 1940 + }, + { + "epoch": 0.6840528634361234, + "grad_norm": 3.222041323215856, + "learning_rate": 1.5695392268715934e-05, + "loss": 0.7702943086624146, + "step": 1941 + }, + { + "epoch": 0.6844052863436123, + "grad_norm": 1.3182333231384877, + "learning_rate": 1.569060221373477e-05, + "loss": 0.6576820611953735, + "step": 1942 + }, + { + "epoch": 0.6847577092511014, + "grad_norm": 1.6178003008675335, + "learning_rate": 1.568581022697125e-05, + "loss": 0.6605322360992432, + "step": 1943 + }, + { + "epoch": 0.6851101321585903, + "grad_norm": 1.5479637201173908, + "learning_rate": 1.568101631005211e-05, + "loss": 0.8065364360809326, + "step": 1944 + }, + { + "epoch": 0.6854625550660793, + "grad_norm": 1.5909483515555374, + "learning_rate": 1.5676220464604726e-05, + "loss": 0.8018748164176941, + "step": 1945 + }, + { + "epoch": 0.6858149779735683, + "grad_norm": 1.4496461628107289, + "learning_rate": 1.567142269225715e-05, + "loss": 0.6114683151245117, + "step": 1946 + }, + { + "epoch": 0.6861674008810573, + "grad_norm": 1.4567709922330223, + "learning_rate": 1.566662299463807e-05, + "loss": 0.8470789194107056, + "step": 1947 + }, + { + "epoch": 0.6865198237885463, + "grad_norm": 1.4716494157627575, + "learning_rate": 1.5661821373376837e-05, + "loss": 0.7133561372756958, + "step": 1948 + }, + { + "epoch": 0.6868722466960352, + "grad_norm": 1.6398709503866558, + "learning_rate": 1.5657017830103448e-05, + "loss": 0.9101625084877014, + "step": 1949 + }, + { + "epoch": 0.6872246696035242, + "grad_norm": 1.8312595153810016, + "learning_rate": 1.565221236644856e-05, + "loss": 0.7395101189613342, + "step": 1950 + }, + { + "epoch": 0.6875770925110132, + "grad_norm": 1.4532682115054107, + "learning_rate": 1.5647404984043474e-05, + "loss": 0.7421061992645264, + "step": 1951 + }, + { + "epoch": 0.6879295154185022, + "grad_norm": 1.4495130982943423, + "learning_rate": 1.5642595684520154e-05, + "loss": 0.8744432330131531, + "step": 1952 + }, + { + "epoch": 0.6882819383259912, + "grad_norm": 1.6475850419823541, + "learning_rate": 1.56377844695112e-05, + "loss": 0.8043868541717529, + "step": 1953 + }, + { + "epoch": 0.6886343612334802, + "grad_norm": 1.444538108927131, + "learning_rate": 1.5632971340649873e-05, + "loss": 0.6231396198272705, + "step": 1954 + }, + { + "epoch": 0.6889867841409691, + "grad_norm": 1.3765988847280666, + "learning_rate": 1.562815629957008e-05, + "loss": 0.7791434526443481, + "step": 1955 + }, + { + "epoch": 0.6893392070484582, + "grad_norm": 1.2135950275511538, + "learning_rate": 1.5623339347906383e-05, + "loss": 0.5652475357055664, + "step": 1956 + }, + { + "epoch": 0.6896916299559471, + "grad_norm": 1.4607959644694648, + "learning_rate": 1.561852048729398e-05, + "loss": 0.611067533493042, + "step": 1957 + }, + { + "epoch": 0.6900440528634362, + "grad_norm": 1.2569255893474116, + "learning_rate": 1.5613699719368724e-05, + "loss": 0.7580389976501465, + "step": 1958 + }, + { + "epoch": 0.6903964757709251, + "grad_norm": 1.516048041026883, + "learning_rate": 1.560887704576712e-05, + "loss": 0.6841205954551697, + "step": 1959 + }, + { + "epoch": 0.690748898678414, + "grad_norm": 1.7678860610521125, + "learning_rate": 1.5604052468126315e-05, + "loss": 0.7600575089454651, + "step": 1960 + }, + { + "epoch": 0.6911013215859031, + "grad_norm": 1.458096987341084, + "learning_rate": 1.55992259880841e-05, + "loss": 0.7547114491462708, + "step": 1961 + }, + { + "epoch": 0.691453744493392, + "grad_norm": 1.3490975617996133, + "learning_rate": 1.5594397607278912e-05, + "loss": 0.6917474865913391, + "step": 1962 + }, + { + "epoch": 0.6918061674008811, + "grad_norm": 1.378212312699651, + "learning_rate": 1.5589567327349845e-05, + "loss": 0.6820487976074219, + "step": 1963 + }, + { + "epoch": 0.69215859030837, + "grad_norm": 1.4687305992297937, + "learning_rate": 1.5584735149936628e-05, + "loss": 0.6513597965240479, + "step": 1964 + }, + { + "epoch": 0.6925110132158591, + "grad_norm": 1.4807223837447299, + "learning_rate": 1.5579901076679625e-05, + "loss": 0.668257474899292, + "step": 1965 + }, + { + "epoch": 0.692863436123348, + "grad_norm": 1.5130451892313703, + "learning_rate": 1.5575065109219864e-05, + "loss": 0.7600705623626709, + "step": 1966 + }, + { + "epoch": 0.693215859030837, + "grad_norm": 1.5218611988458295, + "learning_rate": 1.5570227249198993e-05, + "loss": 0.8140011429786682, + "step": 1967 + }, + { + "epoch": 0.693568281938326, + "grad_norm": 1.1438716908088957, + "learning_rate": 1.556538749825933e-05, + "loss": 0.610436201095581, + "step": 1968 + }, + { + "epoch": 0.693920704845815, + "grad_norm": 1.7706616264872619, + "learning_rate": 1.556054585804381e-05, + "loss": 0.7745693922042847, + "step": 1969 + }, + { + "epoch": 0.694273127753304, + "grad_norm": 1.4076568647110412, + "learning_rate": 1.5555702330196024e-05, + "loss": 0.5809592008590698, + "step": 1970 + }, + { + "epoch": 0.6946255506607929, + "grad_norm": 1.220751429593537, + "learning_rate": 1.5550856916360195e-05, + "loss": 0.6354515552520752, + "step": 1971 + }, + { + "epoch": 0.694977973568282, + "grad_norm": 1.4513364815061058, + "learning_rate": 1.5546009618181194e-05, + "loss": 0.8076149225234985, + "step": 1972 + }, + { + "epoch": 0.6953303964757709, + "grad_norm": 1.6702158357132753, + "learning_rate": 1.5541160437304524e-05, + "loss": 0.7553249597549438, + "step": 1973 + }, + { + "epoch": 0.6956828193832599, + "grad_norm": 1.4495619596653457, + "learning_rate": 1.5536309375376332e-05, + "loss": 0.6109169125556946, + "step": 1974 + }, + { + "epoch": 0.6960352422907489, + "grad_norm": 1.4052818449921982, + "learning_rate": 1.5531456434043404e-05, + "loss": 0.8184436559677124, + "step": 1975 + }, + { + "epoch": 0.6963876651982379, + "grad_norm": 1.3611746850672197, + "learning_rate": 1.5526601614953164e-05, + "loss": 0.6823909878730774, + "step": 1976 + }, + { + "epoch": 0.6967400881057269, + "grad_norm": 1.3254402340100906, + "learning_rate": 1.5521744919753668e-05, + "loss": 0.6669045090675354, + "step": 1977 + }, + { + "epoch": 0.6970925110132159, + "grad_norm": 1.7752129025350782, + "learning_rate": 1.5516886350093617e-05, + "loss": 0.8054187297821045, + "step": 1978 + }, + { + "epoch": 0.6974449339207048, + "grad_norm": 1.6379915816078137, + "learning_rate": 1.551202590762234e-05, + "loss": 0.7089184522628784, + "step": 1979 + }, + { + "epoch": 0.6977973568281939, + "grad_norm": 1.5207382048575195, + "learning_rate": 1.5507163593989804e-05, + "loss": 0.7908214330673218, + "step": 1980 + }, + { + "epoch": 0.6981497797356828, + "grad_norm": 1.454323961299799, + "learning_rate": 1.5502299410846626e-05, + "loss": 0.8859039545059204, + "step": 1981 + }, + { + "epoch": 0.6985022026431718, + "grad_norm": 1.5085321450966587, + "learning_rate": 1.549743335984403e-05, + "loss": 0.7156866788864136, + "step": 1982 + }, + { + "epoch": 0.6988546255506608, + "grad_norm": 1.4496904801370623, + "learning_rate": 1.5492565442633894e-05, + "loss": 0.6158934831619263, + "step": 1983 + }, + { + "epoch": 0.6992070484581497, + "grad_norm": 1.5453977055484032, + "learning_rate": 1.548769566086873e-05, + "loss": 0.6689192056655884, + "step": 1984 + }, + { + "epoch": 0.6995594713656388, + "grad_norm": 1.4591630403591411, + "learning_rate": 1.548282401620167e-05, + "loss": 0.6695841550827026, + "step": 1985 + }, + { + "epoch": 0.6999118942731277, + "grad_norm": 1.6161480882103554, + "learning_rate": 1.5477950510286488e-05, + "loss": 0.7196098566055298, + "step": 1986 + }, + { + "epoch": 0.7002643171806168, + "grad_norm": 1.5261033448052712, + "learning_rate": 1.5473075144777586e-05, + "loss": 0.7811123132705688, + "step": 1987 + }, + { + "epoch": 0.7006167400881057, + "grad_norm": 1.3902237132074229, + "learning_rate": 1.5468197921330006e-05, + "loss": 0.6341326236724854, + "step": 1988 + }, + { + "epoch": 0.7009691629955948, + "grad_norm": 1.4052051959904983, + "learning_rate": 1.5463318841599408e-05, + "loss": 0.6344352960586548, + "step": 1989 + }, + { + "epoch": 0.7013215859030837, + "grad_norm": 1.5015659436227353, + "learning_rate": 1.5458437907242084e-05, + "loss": 0.6708072423934937, + "step": 1990 + }, + { + "epoch": 0.7016740088105727, + "grad_norm": 1.4551372124338164, + "learning_rate": 1.5453555119914963e-05, + "loss": 0.7018578052520752, + "step": 1991 + }, + { + "epoch": 0.7020264317180617, + "grad_norm": 1.4651591378979865, + "learning_rate": 1.5448670481275604e-05, + "loss": 0.6966190338134766, + "step": 1992 + }, + { + "epoch": 0.7023788546255506, + "grad_norm": 1.2815956936347872, + "learning_rate": 1.5443783992982182e-05, + "loss": 0.6280171871185303, + "step": 1993 + }, + { + "epoch": 0.7027312775330397, + "grad_norm": 1.451492070117077, + "learning_rate": 1.5438895656693512e-05, + "loss": 0.6644559502601624, + "step": 1994 + }, + { + "epoch": 0.7030837004405286, + "grad_norm": 1.5030450433681415, + "learning_rate": 1.543400547406903e-05, + "loss": 0.776411771774292, + "step": 1995 + }, + { + "epoch": 0.7034361233480176, + "grad_norm": 1.428531901666428, + "learning_rate": 1.5429113446768805e-05, + "loss": 0.6353679895401001, + "step": 1996 + }, + { + "epoch": 0.7037885462555066, + "grad_norm": 1.468487936335314, + "learning_rate": 1.5424219576453526e-05, + "loss": 0.686774492263794, + "step": 1997 + }, + { + "epoch": 0.7041409691629956, + "grad_norm": 1.2525683766202464, + "learning_rate": 1.5419323864784508e-05, + "loss": 0.5296701192855835, + "step": 1998 + }, + { + "epoch": 0.7044933920704846, + "grad_norm": 1.264413948230812, + "learning_rate": 1.5414426313423692e-05, + "loss": 0.6246802806854248, + "step": 1999 + }, + { + "epoch": 0.7048458149779736, + "grad_norm": 1.44172793688486, + "learning_rate": 1.5409526924033646e-05, + "loss": 0.6633912920951843, + "step": 2000 + }, + { + "epoch": 0.7051982378854625, + "grad_norm": 1.720413855985522, + "learning_rate": 1.540462569827756e-05, + "loss": 0.7324577569961548, + "step": 2001 + }, + { + "epoch": 0.7055506607929516, + "grad_norm": 1.6372387419200998, + "learning_rate": 1.539972263781925e-05, + "loss": 0.7988085746765137, + "step": 2002 + }, + { + "epoch": 0.7059030837004405, + "grad_norm": 1.4528481393218415, + "learning_rate": 1.539481774432315e-05, + "loss": 0.6761256456375122, + "step": 2003 + }, + { + "epoch": 0.7062555066079295, + "grad_norm": 1.6101005409981786, + "learning_rate": 1.538991101945431e-05, + "loss": 0.6647740006446838, + "step": 2004 + }, + { + "epoch": 0.7066079295154185, + "grad_norm": 1.5047715708456952, + "learning_rate": 1.538500246487843e-05, + "loss": 0.7111536860466003, + "step": 2005 + }, + { + "epoch": 0.7069603524229074, + "grad_norm": 1.8533704165409681, + "learning_rate": 1.5380092082261797e-05, + "loss": 0.7395933270454407, + "step": 2006 + }, + { + "epoch": 0.7073127753303965, + "grad_norm": 1.4630720873509298, + "learning_rate": 1.5375179873271335e-05, + "loss": 0.6158996820449829, + "step": 2007 + }, + { + "epoch": 0.7076651982378854, + "grad_norm": 1.4746770670226905, + "learning_rate": 1.537026583957459e-05, + "loss": 0.7259848117828369, + "step": 2008 + }, + { + "epoch": 0.7080176211453745, + "grad_norm": 1.6674311554666914, + "learning_rate": 1.5365349982839723e-05, + "loss": 0.8370928764343262, + "step": 2009 + }, + { + "epoch": 0.7083700440528634, + "grad_norm": 1.3618230849109776, + "learning_rate": 1.536043230473551e-05, + "loss": 0.6041784882545471, + "step": 2010 + }, + { + "epoch": 0.7087224669603525, + "grad_norm": 1.4112680073946362, + "learning_rate": 1.535551280693135e-05, + "loss": 0.688548743724823, + "step": 2011 + }, + { + "epoch": 0.7090748898678414, + "grad_norm": 1.6056330275270763, + "learning_rate": 1.5350591491097265e-05, + "loss": 0.573681652545929, + "step": 2012 + }, + { + "epoch": 0.7094273127753304, + "grad_norm": 2.0956667904129636, + "learning_rate": 1.5345668358903886e-05, + "loss": 0.6919670104980469, + "step": 2013 + }, + { + "epoch": 0.7097797356828194, + "grad_norm": 1.6440284625605202, + "learning_rate": 1.534074341202246e-05, + "loss": 0.6693999767303467, + "step": 2014 + }, + { + "epoch": 0.7101321585903083, + "grad_norm": 1.5023686452775393, + "learning_rate": 1.533581665212486e-05, + "loss": 0.7204093337059021, + "step": 2015 + }, + { + "epoch": 0.7104845814977974, + "grad_norm": 1.7353596990699613, + "learning_rate": 1.5330888080883555e-05, + "loss": 0.6196314096450806, + "step": 2016 + }, + { + "epoch": 0.7108370044052863, + "grad_norm": 1.4190743094269347, + "learning_rate": 1.5325957699971657e-05, + "loss": 0.7292872071266174, + "step": 2017 + }, + { + "epoch": 0.7111894273127753, + "grad_norm": 1.7578012075664924, + "learning_rate": 1.532102551106287e-05, + "loss": 0.7514410018920898, + "step": 2018 + }, + { + "epoch": 0.7115418502202643, + "grad_norm": 1.329552917806312, + "learning_rate": 1.531609151583152e-05, + "loss": 0.7683345079421997, + "step": 2019 + }, + { + "epoch": 0.7118942731277533, + "grad_norm": 1.8323846391695044, + "learning_rate": 1.5311155715952536e-05, + "loss": 0.6994156837463379, + "step": 2020 + }, + { + "epoch": 0.7122466960352423, + "grad_norm": 1.3407977210543047, + "learning_rate": 1.5306218113101482e-05, + "loss": 0.5530328750610352, + "step": 2021 + }, + { + "epoch": 0.7125991189427313, + "grad_norm": 1.6814720781682417, + "learning_rate": 1.530127870895451e-05, + "loss": 0.6126301884651184, + "step": 2022 + }, + { + "epoch": 0.7129515418502202, + "grad_norm": 1.9618212705640916, + "learning_rate": 1.5296337505188403e-05, + "loss": 0.7514982223510742, + "step": 2023 + }, + { + "epoch": 0.7133039647577093, + "grad_norm": 1.742411408925072, + "learning_rate": 1.529139450348054e-05, + "loss": 0.7087191939353943, + "step": 2024 + }, + { + "epoch": 0.7136563876651982, + "grad_norm": 1.3195305972662899, + "learning_rate": 1.5286449705508914e-05, + "loss": 0.5713562965393066, + "step": 2025 + }, + { + "epoch": 0.7140088105726872, + "grad_norm": 1.3621779724967453, + "learning_rate": 1.5281503112952136e-05, + "loss": 0.6796679496765137, + "step": 2026 + }, + { + "epoch": 0.7143612334801762, + "grad_norm": 1.8247081007192694, + "learning_rate": 1.5276554727489415e-05, + "loss": 0.7902421355247498, + "step": 2027 + }, + { + "epoch": 0.7147136563876652, + "grad_norm": 1.3608050254188053, + "learning_rate": 1.527160455080058e-05, + "loss": 0.6645491123199463, + "step": 2028 + }, + { + "epoch": 0.7150660792951542, + "grad_norm": 1.489658346292968, + "learning_rate": 1.5266652584566056e-05, + "loss": 0.6077255606651306, + "step": 2029 + }, + { + "epoch": 0.7154185022026431, + "grad_norm": 1.412193602346091, + "learning_rate": 1.5261698830466888e-05, + "loss": 0.6219078302383423, + "step": 2030 + }, + { + "epoch": 0.7157709251101322, + "grad_norm": 1.280704281307457, + "learning_rate": 1.5256743290184713e-05, + "loss": 0.5895035266876221, + "step": 2031 + }, + { + "epoch": 0.7161233480176211, + "grad_norm": 1.497416305314063, + "learning_rate": 1.5251785965401786e-05, + "loss": 0.6735520958900452, + "step": 2032 + }, + { + "epoch": 0.7164757709251102, + "grad_norm": 1.353147232010895, + "learning_rate": 1.524682685780097e-05, + "loss": 0.6212488412857056, + "step": 2033 + }, + { + "epoch": 0.7168281938325991, + "grad_norm": 1.5786628078958613, + "learning_rate": 1.524186596906572e-05, + "loss": 0.7181172966957092, + "step": 2034 + }, + { + "epoch": 0.7171806167400882, + "grad_norm": 3.1301800941750906, + "learning_rate": 1.5236903300880107e-05, + "loss": 0.7156587839126587, + "step": 2035 + }, + { + "epoch": 0.7175330396475771, + "grad_norm": 1.513371130481219, + "learning_rate": 1.52319388549288e-05, + "loss": 0.6989034414291382, + "step": 2036 + }, + { + "epoch": 0.7178854625550661, + "grad_norm": 1.5183441818080943, + "learning_rate": 1.5226972632897079e-05, + "loss": 0.7224982976913452, + "step": 2037 + }, + { + "epoch": 0.7182378854625551, + "grad_norm": 1.5033480023563544, + "learning_rate": 1.522200463647082e-05, + "loss": 0.6871547698974609, + "step": 2038 + }, + { + "epoch": 0.718590308370044, + "grad_norm": 1.5898527901911406, + "learning_rate": 1.5217034867336498e-05, + "loss": 0.725049614906311, + "step": 2039 + }, + { + "epoch": 0.718942731277533, + "grad_norm": 2.079980258079047, + "learning_rate": 1.5212063327181197e-05, + "loss": 0.7105863094329834, + "step": 2040 + }, + { + "epoch": 0.719295154185022, + "grad_norm": 1.4720898042575539, + "learning_rate": 1.5207090017692605e-05, + "loss": 0.5823827981948853, + "step": 2041 + }, + { + "epoch": 0.719647577092511, + "grad_norm": 1.9166232714289464, + "learning_rate": 1.5202114940559005e-05, + "loss": 0.7087944746017456, + "step": 2042 + }, + { + "epoch": 0.72, + "grad_norm": 1.40676061171607, + "learning_rate": 1.5197138097469275e-05, + "loss": 0.6678824424743652, + "step": 2043 + }, + { + "epoch": 0.720352422907489, + "grad_norm": 1.8181396920642288, + "learning_rate": 1.5192159490112904e-05, + "loss": 0.7318846583366394, + "step": 2044 + }, + { + "epoch": 0.720704845814978, + "grad_norm": 1.4972370605408583, + "learning_rate": 1.5187179120179969e-05, + "loss": 0.7245825529098511, + "step": 2045 + }, + { + "epoch": 0.721057268722467, + "grad_norm": 1.8554569851295908, + "learning_rate": 1.5182196989361155e-05, + "loss": 0.7691583633422852, + "step": 2046 + }, + { + "epoch": 0.7214096916299559, + "grad_norm": 1.8926959198228865, + "learning_rate": 1.517721309934774e-05, + "loss": 0.7961187362670898, + "step": 2047 + }, + { + "epoch": 0.721762114537445, + "grad_norm": 1.4465824812635413, + "learning_rate": 1.51722274518316e-05, + "loss": 0.7163759469985962, + "step": 2048 + }, + { + "epoch": 0.7221145374449339, + "grad_norm": 1.5931659235074929, + "learning_rate": 1.51672400485052e-05, + "loss": 0.6807754039764404, + "step": 2049 + }, + { + "epoch": 0.7224669603524229, + "grad_norm": 1.6629043788678177, + "learning_rate": 1.516225089106162e-05, + "loss": 0.7026433348655701, + "step": 2050 + }, + { + "epoch": 0.7228193832599119, + "grad_norm": 1.5979782761024863, + "learning_rate": 1.5157259981194514e-05, + "loss": 0.8230476379394531, + "step": 2051 + }, + { + "epoch": 0.7231718061674008, + "grad_norm": 1.7451468269512191, + "learning_rate": 1.5152267320598149e-05, + "loss": 0.6466805934906006, + "step": 2052 + }, + { + "epoch": 0.7235242290748899, + "grad_norm": 1.441654513994546, + "learning_rate": 1.5147272910967368e-05, + "loss": 0.7203368544578552, + "step": 2053 + }, + { + "epoch": 0.7238766519823788, + "grad_norm": 1.3552926542352444, + "learning_rate": 1.5142276753997627e-05, + "loss": 0.6455702781677246, + "step": 2054 + }, + { + "epoch": 0.7242290748898679, + "grad_norm": 1.4569594560235375, + "learning_rate": 1.5137278851384958e-05, + "loss": 0.609260082244873, + "step": 2055 + }, + { + "epoch": 0.7245814977973568, + "grad_norm": 1.8083723333355965, + "learning_rate": 1.5132279204826e-05, + "loss": 0.8320673704147339, + "step": 2056 + }, + { + "epoch": 0.7249339207048459, + "grad_norm": 1.5846751172626037, + "learning_rate": 1.512727781601797e-05, + "loss": 0.8497718572616577, + "step": 2057 + }, + { + "epoch": 0.7252863436123348, + "grad_norm": 1.3523103900088498, + "learning_rate": 1.5122274686658695e-05, + "loss": 0.6398370265960693, + "step": 2058 + }, + { + "epoch": 0.7256387665198238, + "grad_norm": 1.4475161405549521, + "learning_rate": 1.511726981844657e-05, + "loss": 0.7562476396560669, + "step": 2059 + }, + { + "epoch": 0.7259911894273128, + "grad_norm": 1.8369611551341436, + "learning_rate": 1.51122632130806e-05, + "loss": 0.7948570251464844, + "step": 2060 + }, + { + "epoch": 0.7263436123348017, + "grad_norm": 1.9057892039367437, + "learning_rate": 1.5107254872260366e-05, + "loss": 0.7062652111053467, + "step": 2061 + }, + { + "epoch": 0.7266960352422908, + "grad_norm": 1.666793884988277, + "learning_rate": 1.5102244797686049e-05, + "loss": 0.6290205717086792, + "step": 2062 + }, + { + "epoch": 0.7270484581497797, + "grad_norm": 1.7111515682842917, + "learning_rate": 1.5097232991058409e-05, + "loss": 0.727097749710083, + "step": 2063 + }, + { + "epoch": 0.7274008810572687, + "grad_norm": 1.6005396217530683, + "learning_rate": 1.5092219454078803e-05, + "loss": 0.783380389213562, + "step": 2064 + }, + { + "epoch": 0.7277533039647577, + "grad_norm": 1.4872748126751951, + "learning_rate": 1.5087204188449165e-05, + "loss": 0.6190629601478577, + "step": 2065 + }, + { + "epoch": 0.7281057268722467, + "grad_norm": 1.5426042958975894, + "learning_rate": 1.5082187195872026e-05, + "loss": 0.6749798059463501, + "step": 2066 + }, + { + "epoch": 0.7284581497797357, + "grad_norm": 1.524694880675492, + "learning_rate": 1.5077168478050494e-05, + "loss": 0.6581153273582458, + "step": 2067 + }, + { + "epoch": 0.7288105726872247, + "grad_norm": 1.433767292714838, + "learning_rate": 1.5072148036688279e-05, + "loss": 0.6886252760887146, + "step": 2068 + }, + { + "epoch": 0.7291629955947136, + "grad_norm": 1.651630016781231, + "learning_rate": 1.506712587348965e-05, + "loss": 0.6893814206123352, + "step": 2069 + }, + { + "epoch": 0.7295154185022027, + "grad_norm": 1.7840073958291343, + "learning_rate": 1.5062101990159486e-05, + "loss": 0.8242654800415039, + "step": 2070 + }, + { + "epoch": 0.7298678414096916, + "grad_norm": 1.4785860236042563, + "learning_rate": 1.5057076388403229e-05, + "loss": 0.6331228017807007, + "step": 2071 + }, + { + "epoch": 0.7302202643171806, + "grad_norm": 1.999658994203056, + "learning_rate": 1.5052049069926927e-05, + "loss": 0.6440649032592773, + "step": 2072 + }, + { + "epoch": 0.7305726872246696, + "grad_norm": 1.4709264297577982, + "learning_rate": 1.5047020036437187e-05, + "loss": 0.7575498819351196, + "step": 2073 + }, + { + "epoch": 0.7309251101321586, + "grad_norm": 1.8032604054381702, + "learning_rate": 1.5041989289641215e-05, + "loss": 0.7530438899993896, + "step": 2074 + }, + { + "epoch": 0.7312775330396476, + "grad_norm": 1.5344556457224068, + "learning_rate": 1.5036956831246792e-05, + "loss": 0.6035616397857666, + "step": 2075 + }, + { + "epoch": 0.7316299559471365, + "grad_norm": 1.5603807233808964, + "learning_rate": 1.5031922662962279e-05, + "loss": 0.8199492692947388, + "step": 2076 + }, + { + "epoch": 0.7319823788546256, + "grad_norm": 1.4221584765379676, + "learning_rate": 1.5026886786496624e-05, + "loss": 0.7700716257095337, + "step": 2077 + }, + { + "epoch": 0.7323348017621145, + "grad_norm": 1.363028657258907, + "learning_rate": 1.5021849203559347e-05, + "loss": 0.6147816777229309, + "step": 2078 + }, + { + "epoch": 0.7326872246696036, + "grad_norm": 1.5628142146943151, + "learning_rate": 1.5016809915860549e-05, + "loss": 0.6841654777526855, + "step": 2079 + }, + { + "epoch": 0.7330396475770925, + "grad_norm": 1.7910877668379601, + "learning_rate": 1.5011768925110915e-05, + "loss": 0.7212510108947754, + "step": 2080 + }, + { + "epoch": 0.7333920704845815, + "grad_norm": 1.5222211216380177, + "learning_rate": 1.5006726233021702e-05, + "loss": 0.6695969104766846, + "step": 2081 + }, + { + "epoch": 0.7337444933920705, + "grad_norm": 1.391558192885713, + "learning_rate": 1.500168184130475e-05, + "loss": 0.5991939306259155, + "step": 2082 + }, + { + "epoch": 0.7340969162995594, + "grad_norm": 1.4191544168706896, + "learning_rate": 1.4996635751672467e-05, + "loss": 0.7127671241760254, + "step": 2083 + }, + { + "epoch": 0.7344493392070485, + "grad_norm": 1.6905086418980109, + "learning_rate": 1.4991587965837853e-05, + "loss": 0.6874737739562988, + "step": 2084 + }, + { + "epoch": 0.7348017621145374, + "grad_norm": 1.3584519480933235, + "learning_rate": 1.4986538485514466e-05, + "loss": 0.6695086359977722, + "step": 2085 + }, + { + "epoch": 0.7351541850220265, + "grad_norm": 1.694264564137899, + "learning_rate": 1.4981487312416452e-05, + "loss": 0.8366880416870117, + "step": 2086 + }, + { + "epoch": 0.7355066079295154, + "grad_norm": 1.4589826786561007, + "learning_rate": 1.4976434448258519e-05, + "loss": 0.6448042988777161, + "step": 2087 + }, + { + "epoch": 0.7358590308370044, + "grad_norm": 1.8583566766216881, + "learning_rate": 1.4971379894755969e-05, + "loss": 0.7015181183815002, + "step": 2088 + }, + { + "epoch": 0.7362114537444934, + "grad_norm": 1.702091122213854, + "learning_rate": 1.4966323653624657e-05, + "loss": 0.6842815279960632, + "step": 2089 + }, + { + "epoch": 0.7365638766519824, + "grad_norm": 1.7134163669939546, + "learning_rate": 1.4961265726581025e-05, + "loss": 0.6866877675056458, + "step": 2090 + }, + { + "epoch": 0.7369162995594714, + "grad_norm": 1.537334961209543, + "learning_rate": 1.4956206115342076e-05, + "loss": 0.5486865043640137, + "step": 2091 + }, + { + "epoch": 0.7372687224669604, + "grad_norm": 1.7196744065626985, + "learning_rate": 1.4951144821625396e-05, + "loss": 0.7241986989974976, + "step": 2092 + }, + { + "epoch": 0.7376211453744493, + "grad_norm": 1.647893211532232, + "learning_rate": 1.4946081847149134e-05, + "loss": 0.8400537967681885, + "step": 2093 + }, + { + "epoch": 0.7379735682819383, + "grad_norm": 2.2262132208657146, + "learning_rate": 1.4941017193632013e-05, + "loss": 0.6050147414207458, + "step": 2094 + }, + { + "epoch": 0.7383259911894273, + "grad_norm": 1.337421477916073, + "learning_rate": 1.4935950862793322e-05, + "loss": 0.6744229197502136, + "step": 2095 + }, + { + "epoch": 0.7386784140969163, + "grad_norm": 1.4345512538147223, + "learning_rate": 1.493088285635293e-05, + "loss": 0.6902294158935547, + "step": 2096 + }, + { + "epoch": 0.7390308370044053, + "grad_norm": 1.8712136012401615, + "learning_rate": 1.492581317603126e-05, + "loss": 0.6328809261322021, + "step": 2097 + }, + { + "epoch": 0.7393832599118942, + "grad_norm": 1.4287618993627116, + "learning_rate": 1.4920741823549316e-05, + "loss": 0.5740914344787598, + "step": 2098 + }, + { + "epoch": 0.7397356828193833, + "grad_norm": 2.181624869430245, + "learning_rate": 1.491566880062866e-05, + "loss": 0.676064133644104, + "step": 2099 + }, + { + "epoch": 0.7400881057268722, + "grad_norm": 1.5152586818427025, + "learning_rate": 1.4910594108991427e-05, + "loss": 0.655153751373291, + "step": 2100 + }, + { + "epoch": 0.7404405286343613, + "grad_norm": 1.7534591753196083, + "learning_rate": 1.4905517750360321e-05, + "loss": 0.7406177520751953, + "step": 2101 + }, + { + "epoch": 0.7407929515418502, + "grad_norm": 1.777307095945404, + "learning_rate": 1.4900439726458602e-05, + "loss": 0.6568606495857239, + "step": 2102 + }, + { + "epoch": 0.7411453744493393, + "grad_norm": 1.661203262476052, + "learning_rate": 1.4895360039010101e-05, + "loss": 0.8073545098304749, + "step": 2103 + }, + { + "epoch": 0.7414977973568282, + "grad_norm": 1.6727123321226325, + "learning_rate": 1.4890278689739219e-05, + "loss": 0.6350502967834473, + "step": 2104 + }, + { + "epoch": 0.7418502202643171, + "grad_norm": 1.475293376760879, + "learning_rate": 1.4885195680370915e-05, + "loss": 0.6419750452041626, + "step": 2105 + }, + { + "epoch": 0.7422026431718062, + "grad_norm": 1.5480091112446772, + "learning_rate": 1.4880111012630706e-05, + "loss": 0.72661292552948, + "step": 2106 + }, + { + "epoch": 0.7425550660792951, + "grad_norm": 1.5125479406066336, + "learning_rate": 1.4875024688244683e-05, + "loss": 0.6996778845787048, + "step": 2107 + }, + { + "epoch": 0.7429074889867842, + "grad_norm": 1.7343888178448454, + "learning_rate": 1.4869936708939497e-05, + "loss": 0.8383389711380005, + "step": 2108 + }, + { + "epoch": 0.7432599118942731, + "grad_norm": 1.6950461405964057, + "learning_rate": 1.4864847076442358e-05, + "loss": 0.6863676905632019, + "step": 2109 + }, + { + "epoch": 0.7436123348017621, + "grad_norm": 1.781136801701718, + "learning_rate": 1.4859755792481032e-05, + "loss": 0.8493780493736267, + "step": 2110 + }, + { + "epoch": 0.7439647577092511, + "grad_norm": 1.3754571175527768, + "learning_rate": 1.4854662858783857e-05, + "loss": 0.6172446012496948, + "step": 2111 + }, + { + "epoch": 0.7443171806167401, + "grad_norm": 6.860121931549926, + "learning_rate": 1.4849568277079724e-05, + "loss": 0.8390353918075562, + "step": 2112 + }, + { + "epoch": 0.7446696035242291, + "grad_norm": 1.8563178731324264, + "learning_rate": 1.4844472049098087e-05, + "loss": 0.7108968496322632, + "step": 2113 + }, + { + "epoch": 0.7450220264317181, + "grad_norm": 1.5680406370173388, + "learning_rate": 1.4839374176568956e-05, + "loss": 0.7322912812232971, + "step": 2114 + }, + { + "epoch": 0.745374449339207, + "grad_norm": 1.5999840343791083, + "learning_rate": 1.4834274661222896e-05, + "loss": 0.6371238231658936, + "step": 2115 + }, + { + "epoch": 0.745726872246696, + "grad_norm": 1.6793360349519253, + "learning_rate": 1.4829173504791035e-05, + "loss": 0.8346511125564575, + "step": 2116 + }, + { + "epoch": 0.746079295154185, + "grad_norm": 1.5530745059154032, + "learning_rate": 1.4824070709005063e-05, + "loss": 0.5893645286560059, + "step": 2117 + }, + { + "epoch": 0.746431718061674, + "grad_norm": 1.298803943907695, + "learning_rate": 1.4818966275597213e-05, + "loss": 0.60541832447052, + "step": 2118 + }, + { + "epoch": 0.746784140969163, + "grad_norm": 2.0046684565684108, + "learning_rate": 1.4813860206300286e-05, + "loss": 0.5823955535888672, + "step": 2119 + }, + { + "epoch": 0.747136563876652, + "grad_norm": 1.8094924676670123, + "learning_rate": 1.480875250284763e-05, + "loss": 0.6751007437705994, + "step": 2120 + }, + { + "epoch": 0.747488986784141, + "grad_norm": 1.5760168475146599, + "learning_rate": 1.4803643166973155e-05, + "loss": 0.6878843307495117, + "step": 2121 + }, + { + "epoch": 0.7478414096916299, + "grad_norm": 1.4061876649605263, + "learning_rate": 1.4798532200411319e-05, + "loss": 0.6732173562049866, + "step": 2122 + }, + { + "epoch": 0.748193832599119, + "grad_norm": 1.558565097379613, + "learning_rate": 1.479341960489714e-05, + "loss": 0.6383658647537231, + "step": 2123 + }, + { + "epoch": 0.7485462555066079, + "grad_norm": 1.8120908321553708, + "learning_rate": 1.4788305382166174e-05, + "loss": 0.7444638013839722, + "step": 2124 + }, + { + "epoch": 0.748898678414097, + "grad_norm": 1.7437949253948153, + "learning_rate": 1.4783189533954555e-05, + "loss": 0.5492427349090576, + "step": 2125 + }, + { + "epoch": 0.7492511013215859, + "grad_norm": 1.60343309806789, + "learning_rate": 1.4778072061998944e-05, + "loss": 0.6193333864212036, + "step": 2126 + }, + { + "epoch": 0.7496035242290748, + "grad_norm": 2.019729643045431, + "learning_rate": 1.4772952968036572e-05, + "loss": 0.853213906288147, + "step": 2127 + }, + { + "epoch": 0.7499559471365639, + "grad_norm": 1.4306248677016198, + "learning_rate": 1.4767832253805203e-05, + "loss": 0.6128672361373901, + "step": 2128 + }, + { + "epoch": 0.7503083700440528, + "grad_norm": 1.7550432779472305, + "learning_rate": 1.4762709921043166e-05, + "loss": 0.7298723459243774, + "step": 2129 + }, + { + "epoch": 0.7506607929515419, + "grad_norm": 1.3773404123246435, + "learning_rate": 1.475758597148933e-05, + "loss": 0.6578782796859741, + "step": 2130 + }, + { + "epoch": 0.7510132158590308, + "grad_norm": 1.6603784675007325, + "learning_rate": 1.4752460406883122e-05, + "loss": 0.6490681171417236, + "step": 2131 + }, + { + "epoch": 0.7513656387665198, + "grad_norm": 1.530112138397779, + "learning_rate": 1.4747333228964502e-05, + "loss": 0.657980740070343, + "step": 2132 + }, + { + "epoch": 0.7517180616740088, + "grad_norm": 1.9937499661396574, + "learning_rate": 1.4742204439473999e-05, + "loss": 0.8431578874588013, + "step": 2133 + }, + { + "epoch": 0.7520704845814978, + "grad_norm": 1.7351787739786175, + "learning_rate": 1.4737074040152667e-05, + "loss": 0.7217377424240112, + "step": 2134 + }, + { + "epoch": 0.7524229074889868, + "grad_norm": 2.232953474209366, + "learning_rate": 1.4731942032742127e-05, + "loss": 0.6299912333488464, + "step": 2135 + }, + { + "epoch": 0.7527753303964758, + "grad_norm": 1.6053563211063129, + "learning_rate": 1.4726808418984527e-05, + "loss": 0.6325603723526001, + "step": 2136 + }, + { + "epoch": 0.7531277533039648, + "grad_norm": 1.7427287871247603, + "learning_rate": 1.4721673200622572e-05, + "loss": 0.6785098314285278, + "step": 2137 + }, + { + "epoch": 0.7534801762114537, + "grad_norm": 2.5780020778792068, + "learning_rate": 1.471653637939951e-05, + "loss": 0.7311918139457703, + "step": 2138 + }, + { + "epoch": 0.7538325991189427, + "grad_norm": 1.498799685922224, + "learning_rate": 1.4711397957059132e-05, + "loss": 0.7117096781730652, + "step": 2139 + }, + { + "epoch": 0.7541850220264317, + "grad_norm": 1.4519847744536865, + "learning_rate": 1.4706257935345772e-05, + "loss": 0.6709408760070801, + "step": 2140 + }, + { + "epoch": 0.7545374449339207, + "grad_norm": 1.9629689982019365, + "learning_rate": 1.4701116316004307e-05, + "loss": 0.6478008031845093, + "step": 2141 + }, + { + "epoch": 0.7548898678414097, + "grad_norm": 1.5362345610055923, + "learning_rate": 1.4695973100780154e-05, + "loss": 0.6414140462875366, + "step": 2142 + }, + { + "epoch": 0.7552422907488987, + "grad_norm": 1.7088547501964069, + "learning_rate": 1.4690828291419283e-05, + "loss": 0.6947815418243408, + "step": 2143 + }, + { + "epoch": 0.7555947136563876, + "grad_norm": 1.6244554419934112, + "learning_rate": 1.4685681889668187e-05, + "loss": 0.6614837646484375, + "step": 2144 + }, + { + "epoch": 0.7559471365638767, + "grad_norm": 1.87010430937903, + "learning_rate": 1.4680533897273913e-05, + "loss": 0.7803678512573242, + "step": 2145 + }, + { + "epoch": 0.7562995594713656, + "grad_norm": 1.975192105020327, + "learning_rate": 1.4675384315984045e-05, + "loss": 0.8411567211151123, + "step": 2146 + }, + { + "epoch": 0.7566519823788547, + "grad_norm": 2.4329758477488177, + "learning_rate": 1.4670233147546708e-05, + "loss": 0.8379243016242981, + "step": 2147 + }, + { + "epoch": 0.7570044052863436, + "grad_norm": 1.6153137773652926, + "learning_rate": 1.4665080393710558e-05, + "loss": 0.6419194936752319, + "step": 2148 + }, + { + "epoch": 0.7573568281938327, + "grad_norm": 1.8383077301350303, + "learning_rate": 1.4659926056224798e-05, + "loss": 0.7791979908943176, + "step": 2149 + }, + { + "epoch": 0.7577092511013216, + "grad_norm": 1.72203201226436, + "learning_rate": 1.465477013683916e-05, + "loss": 0.7237389087677002, + "step": 2150 + }, + { + "epoch": 0.7580616740088105, + "grad_norm": 1.5129431088418641, + "learning_rate": 1.464961263730393e-05, + "loss": 0.6750755906105042, + "step": 2151 + }, + { + "epoch": 0.7584140969162996, + "grad_norm": 1.3799525283393634, + "learning_rate": 1.4644453559369904e-05, + "loss": 0.5412150621414185, + "step": 2152 + }, + { + "epoch": 0.7587665198237885, + "grad_norm": 1.7752121571388841, + "learning_rate": 1.463929290478844e-05, + "loss": 0.7009850740432739, + "step": 2153 + }, + { + "epoch": 0.7591189427312776, + "grad_norm": 1.5166585489574307, + "learning_rate": 1.4634130675311411e-05, + "loss": 0.8678998351097107, + "step": 2154 + }, + { + "epoch": 0.7594713656387665, + "grad_norm": 2.0127463717616347, + "learning_rate": 1.4628966872691241e-05, + "loss": 0.7395705580711365, + "step": 2155 + }, + { + "epoch": 0.7598237885462555, + "grad_norm": 1.5739842401493016, + "learning_rate": 1.4623801498680875e-05, + "loss": 0.5950812101364136, + "step": 2156 + }, + { + "epoch": 0.7601762114537445, + "grad_norm": 1.6474041176538503, + "learning_rate": 1.46186345550338e-05, + "loss": 0.7133630514144897, + "step": 2157 + }, + { + "epoch": 0.7605286343612335, + "grad_norm": 1.4644647660974064, + "learning_rate": 1.4613466043504026e-05, + "loss": 0.7551965117454529, + "step": 2158 + }, + { + "epoch": 0.7608810572687225, + "grad_norm": 1.4284086636489846, + "learning_rate": 1.4608295965846111e-05, + "loss": 0.6654022932052612, + "step": 2159 + }, + { + "epoch": 0.7612334801762115, + "grad_norm": 3.5518990487711126, + "learning_rate": 1.460312432381513e-05, + "loss": 0.8081967830657959, + "step": 2160 + }, + { + "epoch": 0.7615859030837004, + "grad_norm": 1.8113760087057564, + "learning_rate": 1.4597951119166696e-05, + "loss": 0.7478348016738892, + "step": 2161 + }, + { + "epoch": 0.7619383259911894, + "grad_norm": 2.9384500423152833, + "learning_rate": 1.4592776353656948e-05, + "loss": 0.7866748571395874, + "step": 2162 + }, + { + "epoch": 0.7622907488986784, + "grad_norm": 1.4185631764668494, + "learning_rate": 1.4587600029042563e-05, + "loss": 0.6675869226455688, + "step": 2163 + }, + { + "epoch": 0.7626431718061674, + "grad_norm": 1.934904377243222, + "learning_rate": 1.4582422147080739e-05, + "loss": 0.6881103515625, + "step": 2164 + }, + { + "epoch": 0.7629955947136564, + "grad_norm": 1.6886719056667128, + "learning_rate": 1.457724270952921e-05, + "loss": 0.7298593521118164, + "step": 2165 + }, + { + "epoch": 0.7633480176211453, + "grad_norm": 1.5123877451607526, + "learning_rate": 1.4572061718146224e-05, + "loss": 0.7102776765823364, + "step": 2166 + }, + { + "epoch": 0.7637004405286344, + "grad_norm": 1.6706836844885837, + "learning_rate": 1.4566879174690576e-05, + "loss": 0.7767213582992554, + "step": 2167 + }, + { + "epoch": 0.7640528634361233, + "grad_norm": 1.4702267439170456, + "learning_rate": 1.4561695080921573e-05, + "loss": 0.7480257749557495, + "step": 2168 + }, + { + "epoch": 0.7644052863436124, + "grad_norm": 1.4326376726611632, + "learning_rate": 1.4556509438599057e-05, + "loss": 0.7419564723968506, + "step": 2169 + }, + { + "epoch": 0.7647577092511013, + "grad_norm": 1.4787079836022163, + "learning_rate": 1.4551322249483388e-05, + "loss": 0.6820264458656311, + "step": 2170 + }, + { + "epoch": 0.7651101321585904, + "grad_norm": 1.3819947250134947, + "learning_rate": 1.4546133515335462e-05, + "loss": 0.5947732329368591, + "step": 2171 + }, + { + "epoch": 0.7654625550660793, + "grad_norm": 1.6478975280830812, + "learning_rate": 1.4540943237916685e-05, + "loss": 0.6772021055221558, + "step": 2172 + }, + { + "epoch": 0.7658149779735682, + "grad_norm": 1.7643629263201115, + "learning_rate": 1.4535751418989e-05, + "loss": 0.7822210192680359, + "step": 2173 + }, + { + "epoch": 0.7661674008810573, + "grad_norm": 1.6079996302057808, + "learning_rate": 1.4530558060314866e-05, + "loss": 0.6208021640777588, + "step": 2174 + }, + { + "epoch": 0.7665198237885462, + "grad_norm": 1.5681481752797541, + "learning_rate": 1.4525363163657264e-05, + "loss": 0.8017063140869141, + "step": 2175 + }, + { + "epoch": 0.7668722466960353, + "grad_norm": 1.4681783580715917, + "learning_rate": 1.4520166730779704e-05, + "loss": 0.738383948802948, + "step": 2176 + }, + { + "epoch": 0.7672246696035242, + "grad_norm": 1.742058488341915, + "learning_rate": 1.4514968763446213e-05, + "loss": 0.7698314785957336, + "step": 2177 + }, + { + "epoch": 0.7675770925110132, + "grad_norm": 1.7037031257568012, + "learning_rate": 1.4509769263421337e-05, + "loss": 0.789836049079895, + "step": 2178 + }, + { + "epoch": 0.7679295154185022, + "grad_norm": 1.8506345351591968, + "learning_rate": 1.4504568232470145e-05, + "loss": 0.6437339782714844, + "step": 2179 + }, + { + "epoch": 0.7682819383259912, + "grad_norm": 2.04999468198658, + "learning_rate": 1.4499365672358226e-05, + "loss": 0.6684735417366028, + "step": 2180 + }, + { + "epoch": 0.7686343612334802, + "grad_norm": 1.5077038126146909, + "learning_rate": 1.4494161584851687e-05, + "loss": 0.6577454805374146, + "step": 2181 + }, + { + "epoch": 0.7689867841409692, + "grad_norm": 1.3277471323795764, + "learning_rate": 1.4488955971717154e-05, + "loss": 0.5975776314735413, + "step": 2182 + }, + { + "epoch": 0.7693392070484582, + "grad_norm": 1.8819815707164231, + "learning_rate": 1.4483748834721767e-05, + "loss": 0.6385577917098999, + "step": 2183 + }, + { + "epoch": 0.7696916299559471, + "grad_norm": 1.4452778349053288, + "learning_rate": 1.4478540175633193e-05, + "loss": 0.6295928955078125, + "step": 2184 + }, + { + "epoch": 0.7700440528634361, + "grad_norm": 1.5790897154124113, + "learning_rate": 1.4473329996219605e-05, + "loss": 0.6848496198654175, + "step": 2185 + }, + { + "epoch": 0.7703964757709251, + "grad_norm": 1.410283277756768, + "learning_rate": 1.44681182982497e-05, + "loss": 0.6476501226425171, + "step": 2186 + }, + { + "epoch": 0.7707488986784141, + "grad_norm": 1.5220085975801703, + "learning_rate": 1.4462905083492683e-05, + "loss": 0.750103235244751, + "step": 2187 + }, + { + "epoch": 0.771101321585903, + "grad_norm": 1.3838063845924222, + "learning_rate": 1.4457690353718285e-05, + "loss": 0.668454110622406, + "step": 2188 + }, + { + "epoch": 0.7714537444933921, + "grad_norm": 1.3695000422583874, + "learning_rate": 1.4452474110696738e-05, + "loss": 0.6671048402786255, + "step": 2189 + }, + { + "epoch": 0.771806167400881, + "grad_norm": 1.404147919130693, + "learning_rate": 1.4447256356198797e-05, + "loss": 0.6261379718780518, + "step": 2190 + }, + { + "epoch": 0.7721585903083701, + "grad_norm": 1.6192228095415668, + "learning_rate": 1.4442037091995726e-05, + "loss": 0.6128308176994324, + "step": 2191 + }, + { + "epoch": 0.772511013215859, + "grad_norm": 1.629684954387357, + "learning_rate": 1.4436816319859306e-05, + "loss": 0.7709108591079712, + "step": 2192 + }, + { + "epoch": 0.7728634361233481, + "grad_norm": 1.7604991326643686, + "learning_rate": 1.4431594041561822e-05, + "loss": 0.6242028474807739, + "step": 2193 + }, + { + "epoch": 0.773215859030837, + "grad_norm": 1.7562103574700596, + "learning_rate": 1.4426370258876079e-05, + "loss": 0.8030718564987183, + "step": 2194 + }, + { + "epoch": 0.7735682819383259, + "grad_norm": 1.5182882363444798, + "learning_rate": 1.4421144973575386e-05, + "loss": 0.7785710692405701, + "step": 2195 + }, + { + "epoch": 0.773920704845815, + "grad_norm": 1.5453752656669346, + "learning_rate": 1.4415918187433564e-05, + "loss": 0.6846014857292175, + "step": 2196 + }, + { + "epoch": 0.7742731277533039, + "grad_norm": 1.6007643935951585, + "learning_rate": 1.4410689902224947e-05, + "loss": 0.7883827686309814, + "step": 2197 + }, + { + "epoch": 0.774625550660793, + "grad_norm": 2.0453745328196065, + "learning_rate": 1.4405460119724377e-05, + "loss": 0.8285650610923767, + "step": 2198 + }, + { + "epoch": 0.7749779735682819, + "grad_norm": 1.5026043059194256, + "learning_rate": 1.4400228841707193e-05, + "loss": 0.6101093292236328, + "step": 2199 + }, + { + "epoch": 0.775330396475771, + "grad_norm": 1.4888885445589903, + "learning_rate": 1.4394996069949262e-05, + "loss": 0.6627891063690186, + "step": 2200 + }, + { + "epoch": 0.7756828193832599, + "grad_norm": 1.4487650646569075, + "learning_rate": 1.4389761806226943e-05, + "loss": 0.6755822896957397, + "step": 2201 + }, + { + "epoch": 0.7760352422907489, + "grad_norm": 1.438634659048083, + "learning_rate": 1.4384526052317106e-05, + "loss": 0.6718465089797974, + "step": 2202 + }, + { + "epoch": 0.7763876651982379, + "grad_norm": 1.4171659147035778, + "learning_rate": 1.4379288809997121e-05, + "loss": 0.5857758522033691, + "step": 2203 + }, + { + "epoch": 0.7767400881057269, + "grad_norm": 1.1200186604200135, + "learning_rate": 1.4374050081044876e-05, + "loss": 0.5861783027648926, + "step": 2204 + }, + { + "epoch": 0.7770925110132159, + "grad_norm": 1.442532656158601, + "learning_rate": 1.4368809867238754e-05, + "loss": 0.6862374544143677, + "step": 2205 + }, + { + "epoch": 0.7774449339207048, + "grad_norm": 1.6455201954220524, + "learning_rate": 1.4363568170357646e-05, + "loss": 0.6787701845169067, + "step": 2206 + }, + { + "epoch": 0.7777973568281938, + "grad_norm": 1.4101038203667695, + "learning_rate": 1.435832499218094e-05, + "loss": 0.5671687126159668, + "step": 2207 + }, + { + "epoch": 0.7781497797356828, + "grad_norm": 1.5479554264257531, + "learning_rate": 1.435308033448854e-05, + "loss": 0.8243429064750671, + "step": 2208 + }, + { + "epoch": 0.7785022026431718, + "grad_norm": 1.3676716888852272, + "learning_rate": 1.4347834199060835e-05, + "loss": 0.5880655646324158, + "step": 2209 + }, + { + "epoch": 0.7788546255506608, + "grad_norm": 2.451624357800272, + "learning_rate": 1.4342586587678734e-05, + "loss": 0.7085679769515991, + "step": 2210 + }, + { + "epoch": 0.7792070484581498, + "grad_norm": 1.546990179750224, + "learning_rate": 1.4337337502123627e-05, + "loss": 0.7011853456497192, + "step": 2211 + }, + { + "epoch": 0.7795594713656387, + "grad_norm": 1.6003260447933962, + "learning_rate": 1.4332086944177426e-05, + "loss": 0.755327582359314, + "step": 2212 + }, + { + "epoch": 0.7799118942731278, + "grad_norm": 1.3917359947430683, + "learning_rate": 1.4326834915622522e-05, + "loss": 0.7152736186981201, + "step": 2213 + }, + { + "epoch": 0.7802643171806167, + "grad_norm": 1.3821995576878587, + "learning_rate": 1.4321581418241825e-05, + "loss": 0.6744083166122437, + "step": 2214 + }, + { + "epoch": 0.7806167400881058, + "grad_norm": 1.5294456027931242, + "learning_rate": 1.4316326453818728e-05, + "loss": 0.6112288236618042, + "step": 2215 + }, + { + "epoch": 0.7809691629955947, + "grad_norm": 1.2620758120071194, + "learning_rate": 1.4311070024137128e-05, + "loss": 0.5569246411323547, + "step": 2216 + }, + { + "epoch": 0.7813215859030836, + "grad_norm": 1.474883531826743, + "learning_rate": 1.4305812130981418e-05, + "loss": 0.6214494705200195, + "step": 2217 + }, + { + "epoch": 0.7816740088105727, + "grad_norm": 1.4094788075709526, + "learning_rate": 1.4300552776136497e-05, + "loss": 0.5401003956794739, + "step": 2218 + }, + { + "epoch": 0.7820264317180616, + "grad_norm": 1.433294268920241, + "learning_rate": 1.4295291961387742e-05, + "loss": 0.5128720998764038, + "step": 2219 + }, + { + "epoch": 0.7823788546255507, + "grad_norm": 1.352265751544302, + "learning_rate": 1.4290029688521043e-05, + "loss": 0.5495916604995728, + "step": 2220 + }, + { + "epoch": 0.7827312775330396, + "grad_norm": 1.6131865642068703, + "learning_rate": 1.4284765959322772e-05, + "loss": 0.628544807434082, + "step": 2221 + }, + { + "epoch": 0.7830837004405287, + "grad_norm": 1.443784571277232, + "learning_rate": 1.427950077557981e-05, + "loss": 0.7171294689178467, + "step": 2222 + }, + { + "epoch": 0.7834361233480176, + "grad_norm": 1.3723589201513293, + "learning_rate": 1.4274234139079513e-05, + "loss": 0.7436389327049255, + "step": 2223 + }, + { + "epoch": 0.7837885462555066, + "grad_norm": 1.5295286402885273, + "learning_rate": 1.426896605160975e-05, + "loss": 0.7154244780540466, + "step": 2224 + }, + { + "epoch": 0.7841409691629956, + "grad_norm": 1.4385555847293963, + "learning_rate": 1.426369651495886e-05, + "loss": 0.6433268189430237, + "step": 2225 + }, + { + "epoch": 0.7844933920704846, + "grad_norm": 1.4177681718218336, + "learning_rate": 1.4258425530915703e-05, + "loss": 0.6612321734428406, + "step": 2226 + }, + { + "epoch": 0.7848458149779736, + "grad_norm": 1.962010974229914, + "learning_rate": 1.42531531012696e-05, + "loss": 0.6384811401367188, + "step": 2227 + }, + { + "epoch": 0.7851982378854625, + "grad_norm": 1.4927220821701634, + "learning_rate": 1.4247879227810384e-05, + "loss": 0.5592762231826782, + "step": 2228 + }, + { + "epoch": 0.7855506607929515, + "grad_norm": 1.6376570609433725, + "learning_rate": 1.4242603912328367e-05, + "loss": 0.6904512643814087, + "step": 2229 + }, + { + "epoch": 0.7859030837004405, + "grad_norm": 1.7784965930873091, + "learning_rate": 1.4237327156614358e-05, + "loss": 0.7165266871452332, + "step": 2230 + }, + { + "epoch": 0.7862555066079295, + "grad_norm": 1.6275397333714936, + "learning_rate": 1.423204896245965e-05, + "loss": 0.8567172288894653, + "step": 2231 + }, + { + "epoch": 0.7866079295154185, + "grad_norm": 1.6554990252792119, + "learning_rate": 1.4226769331656028e-05, + "loss": 0.6595934629440308, + "step": 2232 + }, + { + "epoch": 0.7869603524229075, + "grad_norm": 1.8034278962736743, + "learning_rate": 1.4221488265995755e-05, + "loss": 0.750861644744873, + "step": 2233 + }, + { + "epoch": 0.7873127753303965, + "grad_norm": 1.3674194021669617, + "learning_rate": 1.4216205767271597e-05, + "loss": 0.7146387696266174, + "step": 2234 + }, + { + "epoch": 0.7876651982378855, + "grad_norm": 1.9347692502503655, + "learning_rate": 1.4210921837276792e-05, + "loss": 0.58647221326828, + "step": 2235 + }, + { + "epoch": 0.7880176211453744, + "grad_norm": 1.4888974250205094, + "learning_rate": 1.4205636477805072e-05, + "loss": 0.6893318891525269, + "step": 2236 + }, + { + "epoch": 0.7883700440528635, + "grad_norm": 1.1833417050311776, + "learning_rate": 1.4200349690650654e-05, + "loss": 0.5545464158058167, + "step": 2237 + }, + { + "epoch": 0.7887224669603524, + "grad_norm": 1.6014523598259138, + "learning_rate": 1.4195061477608234e-05, + "loss": 0.6088600158691406, + "step": 2238 + }, + { + "epoch": 0.7890748898678414, + "grad_norm": 1.3513904877886467, + "learning_rate": 1.4189771840472997e-05, + "loss": 0.6330769658088684, + "step": 2239 + }, + { + "epoch": 0.7894273127753304, + "grad_norm": 1.4283770062393895, + "learning_rate": 1.4184480781040613e-05, + "loss": 0.678654670715332, + "step": 2240 + }, + { + "epoch": 0.7897797356828193, + "grad_norm": 1.445633946040222, + "learning_rate": 1.417918830110723e-05, + "loss": 0.6259177923202515, + "step": 2241 + }, + { + "epoch": 0.7901321585903084, + "grad_norm": 1.408151849302333, + "learning_rate": 1.4173894402469477e-05, + "loss": 0.634982168674469, + "step": 2242 + }, + { + "epoch": 0.7904845814977973, + "grad_norm": 1.37778450193705, + "learning_rate": 1.4168599086924473e-05, + "loss": 0.6610612869262695, + "step": 2243 + }, + { + "epoch": 0.7908370044052864, + "grad_norm": 1.386127288755765, + "learning_rate": 1.416330235626981e-05, + "loss": 0.6952961683273315, + "step": 2244 + }, + { + "epoch": 0.7911894273127753, + "grad_norm": 1.6165363001234343, + "learning_rate": 1.4158004212303565e-05, + "loss": 0.5055881142616272, + "step": 2245 + }, + { + "epoch": 0.7915418502202644, + "grad_norm": 1.4841191669035856, + "learning_rate": 1.4152704656824288e-05, + "loss": 0.7284455299377441, + "step": 2246 + }, + { + "epoch": 0.7918942731277533, + "grad_norm": 1.3583334859782668, + "learning_rate": 1.414740369163102e-05, + "loss": 0.6985108852386475, + "step": 2247 + }, + { + "epoch": 0.7922466960352423, + "grad_norm": 1.3664811170856164, + "learning_rate": 1.4142101318523271e-05, + "loss": 0.5967550277709961, + "step": 2248 + }, + { + "epoch": 0.7925991189427313, + "grad_norm": 1.5695298710984633, + "learning_rate": 1.4136797539301033e-05, + "loss": 0.7696695327758789, + "step": 2249 + }, + { + "epoch": 0.7929515418502202, + "grad_norm": 1.3234775564665824, + "learning_rate": 1.413149235576477e-05, + "loss": 0.8131378293037415, + "step": 2250 + }, + { + "epoch": 0.7933039647577093, + "grad_norm": 1.8429663529686, + "learning_rate": 1.4126185769715428e-05, + "loss": 0.8029932975769043, + "step": 2251 + }, + { + "epoch": 0.7936563876651982, + "grad_norm": 1.720051288151631, + "learning_rate": 1.412087778295443e-05, + "loss": 0.7408573031425476, + "step": 2252 + }, + { + "epoch": 0.7940088105726872, + "grad_norm": 1.8037723298533723, + "learning_rate": 1.411556839728367e-05, + "loss": 0.8624325394630432, + "step": 2253 + }, + { + "epoch": 0.7943612334801762, + "grad_norm": 1.5291561523904078, + "learning_rate": 1.411025761450552e-05, + "loss": 0.7635384798049927, + "step": 2254 + }, + { + "epoch": 0.7947136563876652, + "grad_norm": 1.5012301776005823, + "learning_rate": 1.4104945436422832e-05, + "loss": 0.5612920522689819, + "step": 2255 + }, + { + "epoch": 0.7950660792951542, + "grad_norm": 1.5891725973137842, + "learning_rate": 1.4099631864838912e-05, + "loss": 0.5792248845100403, + "step": 2256 + }, + { + "epoch": 0.7954185022026432, + "grad_norm": 1.427703140365858, + "learning_rate": 1.4094316901557563e-05, + "loss": 0.7405142188072205, + "step": 2257 + }, + { + "epoch": 0.7957709251101321, + "grad_norm": 1.5302016454534209, + "learning_rate": 1.4089000548383044e-05, + "loss": 0.630780816078186, + "step": 2258 + }, + { + "epoch": 0.7961233480176212, + "grad_norm": 1.5690685088460359, + "learning_rate": 1.4083682807120092e-05, + "loss": 0.6737201809883118, + "step": 2259 + }, + { + "epoch": 0.7964757709251101, + "grad_norm": 4.158789316506426, + "learning_rate": 1.4078363679573918e-05, + "loss": 0.6469985842704773, + "step": 2260 + }, + { + "epoch": 0.7968281938325992, + "grad_norm": 1.4774582614404035, + "learning_rate": 1.4073043167550198e-05, + "loss": 0.6315224170684814, + "step": 2261 + }, + { + "epoch": 0.7971806167400881, + "grad_norm": 1.1766652256758812, + "learning_rate": 1.4067721272855079e-05, + "loss": 0.6785402297973633, + "step": 2262 + }, + { + "epoch": 0.797533039647577, + "grad_norm": 1.4677269844033833, + "learning_rate": 1.406239799729518e-05, + "loss": 0.7131394147872925, + "step": 2263 + }, + { + "epoch": 0.7978854625550661, + "grad_norm": 1.5575833651180606, + "learning_rate": 1.405707334267759e-05, + "loss": 0.6921142339706421, + "step": 2264 + }, + { + "epoch": 0.798237885462555, + "grad_norm": 1.375694666198905, + "learning_rate": 1.4051747310809863e-05, + "loss": 0.695213794708252, + "step": 2265 + }, + { + "epoch": 0.7985903083700441, + "grad_norm": 1.8529986724322307, + "learning_rate": 1.4046419903500013e-05, + "loss": 0.7081988453865051, + "step": 2266 + }, + { + "epoch": 0.798942731277533, + "grad_norm": 1.4461573292928833, + "learning_rate": 1.4041091122556539e-05, + "loss": 0.6404637098312378, + "step": 2267 + }, + { + "epoch": 0.7992951541850221, + "grad_norm": 1.3566691109367863, + "learning_rate": 1.403576096978839e-05, + "loss": 0.6404134631156921, + "step": 2268 + }, + { + "epoch": 0.799647577092511, + "grad_norm": 1.5118859398886633, + "learning_rate": 1.4030429447004992e-05, + "loss": 0.7963751554489136, + "step": 2269 + }, + { + "epoch": 0.8, + "grad_norm": 1.632997404115334, + "learning_rate": 1.4025096556016224e-05, + "loss": 0.6648174524307251, + "step": 2270 + }, + { + "epoch": 0.800352422907489, + "grad_norm": 1.4103532345019565, + "learning_rate": 1.4019762298632445e-05, + "loss": 0.6661815047264099, + "step": 2271 + }, + { + "epoch": 0.800704845814978, + "grad_norm": 1.7237738440956045, + "learning_rate": 1.4014426676664462e-05, + "loss": 0.6194477081298828, + "step": 2272 + }, + { + "epoch": 0.801057268722467, + "grad_norm": 1.8457235726726873, + "learning_rate": 1.400908969192356e-05, + "loss": 0.6869276762008667, + "step": 2273 + }, + { + "epoch": 0.8014096916299559, + "grad_norm": 1.7545140114513338, + "learning_rate": 1.4003751346221472e-05, + "loss": 0.7352420091629028, + "step": 2274 + }, + { + "epoch": 0.801762114537445, + "grad_norm": 1.5994812918128933, + "learning_rate": 1.3998411641370405e-05, + "loss": 0.8212440609931946, + "step": 2275 + }, + { + "epoch": 0.8021145374449339, + "grad_norm": 1.5868623288152288, + "learning_rate": 1.3993070579183021e-05, + "loss": 0.6897045969963074, + "step": 2276 + }, + { + "epoch": 0.8024669603524229, + "grad_norm": 1.716974382638037, + "learning_rate": 1.3987728161472442e-05, + "loss": 0.8406906127929688, + "step": 2277 + }, + { + "epoch": 0.8028193832599119, + "grad_norm": 1.6664794009014727, + "learning_rate": 1.3982384390052257e-05, + "loss": 0.6236976385116577, + "step": 2278 + }, + { + "epoch": 0.8031718061674009, + "grad_norm": 1.7056031446043847, + "learning_rate": 1.3977039266736508e-05, + "loss": 0.8110965490341187, + "step": 2279 + }, + { + "epoch": 0.8035242290748899, + "grad_norm": 1.6273998334271178, + "learning_rate": 1.3971692793339697e-05, + "loss": 0.635534405708313, + "step": 2280 + }, + { + "epoch": 0.8038766519823789, + "grad_norm": 1.5382566365445476, + "learning_rate": 1.3966344971676789e-05, + "loss": 0.7806028127670288, + "step": 2281 + }, + { + "epoch": 0.8042290748898678, + "grad_norm": 1.7131487498074927, + "learning_rate": 1.3960995803563195e-05, + "loss": 0.6635935306549072, + "step": 2282 + }, + { + "epoch": 0.8045814977973569, + "grad_norm": 1.6068551029738092, + "learning_rate": 1.39556452908148e-05, + "loss": 0.6064634323120117, + "step": 2283 + }, + { + "epoch": 0.8049339207048458, + "grad_norm": 1.7686604234656398, + "learning_rate": 1.3950293435247933e-05, + "loss": 0.760187029838562, + "step": 2284 + }, + { + "epoch": 0.8052863436123348, + "grad_norm": 1.5333245954906318, + "learning_rate": 1.3944940238679384e-05, + "loss": 0.7004644274711609, + "step": 2285 + }, + { + "epoch": 0.8056387665198238, + "grad_norm": 1.9274194313344672, + "learning_rate": 1.393958570292639e-05, + "loss": 0.7662780284881592, + "step": 2286 + }, + { + "epoch": 0.8059911894273127, + "grad_norm": 1.3943181397787612, + "learning_rate": 1.393422982980666e-05, + "loss": 0.7939090132713318, + "step": 2287 + }, + { + "epoch": 0.8063436123348018, + "grad_norm": 1.377559765071464, + "learning_rate": 1.3928872621138337e-05, + "loss": 0.7461861371994019, + "step": 2288 + }, + { + "epoch": 0.8066960352422907, + "grad_norm": 1.4875661773009663, + "learning_rate": 1.3923514078740032e-05, + "loss": 0.5997019410133362, + "step": 2289 + }, + { + "epoch": 0.8070484581497798, + "grad_norm": 1.5379009713311227, + "learning_rate": 1.3918154204430801e-05, + "loss": 0.5437384843826294, + "step": 2290 + }, + { + "epoch": 0.8074008810572687, + "grad_norm": 1.8168415447512607, + "learning_rate": 1.3912793000030154e-05, + "loss": 0.7387127876281738, + "step": 2291 + }, + { + "epoch": 0.8077533039647578, + "grad_norm": 1.305308107523337, + "learning_rate": 1.3907430467358054e-05, + "loss": 0.483035147190094, + "step": 2292 + }, + { + "epoch": 0.8081057268722467, + "grad_norm": 1.3669144351401303, + "learning_rate": 1.3902066608234919e-05, + "loss": 0.6208503842353821, + "step": 2293 + }, + { + "epoch": 0.8084581497797357, + "grad_norm": 1.7196168695476914, + "learning_rate": 1.3896701424481603e-05, + "loss": 0.6691559553146362, + "step": 2294 + }, + { + "epoch": 0.8088105726872247, + "grad_norm": 1.6945751274550964, + "learning_rate": 1.3891334917919422e-05, + "loss": 0.8960802555084229, + "step": 2295 + }, + { + "epoch": 0.8091629955947136, + "grad_norm": 1.7625732291329363, + "learning_rate": 1.388596709037014e-05, + "loss": 0.669715404510498, + "step": 2296 + }, + { + "epoch": 0.8095154185022027, + "grad_norm": 1.4235891674683654, + "learning_rate": 1.3880597943655972e-05, + "loss": 0.7356190085411072, + "step": 2297 + }, + { + "epoch": 0.8098678414096916, + "grad_norm": 1.6403595773987272, + "learning_rate": 1.3875227479599565e-05, + "loss": 0.9158750176429749, + "step": 2298 + }, + { + "epoch": 0.8102202643171806, + "grad_norm": 1.718215094287951, + "learning_rate": 1.3869855700024031e-05, + "loss": 0.7395786643028259, + "step": 2299 + }, + { + "epoch": 0.8105726872246696, + "grad_norm": 1.6360185397225708, + "learning_rate": 1.3864482606752922e-05, + "loss": 0.594106912612915, + "step": 2300 + }, + { + "epoch": 0.8109251101321586, + "grad_norm": 1.6395747499474045, + "learning_rate": 1.3859108201610236e-05, + "loss": 0.7853089570999146, + "step": 2301 + }, + { + "epoch": 0.8112775330396476, + "grad_norm": 1.6313227134249062, + "learning_rate": 1.3853732486420413e-05, + "loss": 0.8346991539001465, + "step": 2302 + }, + { + "epoch": 0.8116299559471366, + "grad_norm": 1.6254363131857819, + "learning_rate": 1.3848355463008344e-05, + "loss": 0.5493819117546082, + "step": 2303 + }, + { + "epoch": 0.8119823788546255, + "grad_norm": 1.566621350016491, + "learning_rate": 1.3842977133199363e-05, + "loss": 0.7474828958511353, + "step": 2304 + }, + { + "epoch": 0.8123348017621146, + "grad_norm": 1.6648296076023164, + "learning_rate": 1.3837597498819242e-05, + "loss": 0.6599621772766113, + "step": 2305 + }, + { + "epoch": 0.8126872246696035, + "grad_norm": 1.5217466732352583, + "learning_rate": 1.38322165616942e-05, + "loss": 0.6751214861869812, + "step": 2306 + }, + { + "epoch": 0.8130396475770925, + "grad_norm": 1.720054765999457, + "learning_rate": 1.3826834323650899e-05, + "loss": 0.7450453042984009, + "step": 2307 + }, + { + "epoch": 0.8133920704845815, + "grad_norm": 1.4739637914592345, + "learning_rate": 1.382145078651644e-05, + "loss": 0.7015345692634583, + "step": 2308 + }, + { + "epoch": 0.8137444933920704, + "grad_norm": 1.4921910425897076, + "learning_rate": 1.3816065952118368e-05, + "loss": 0.7161329984664917, + "step": 2309 + }, + { + "epoch": 0.8140969162995595, + "grad_norm": 1.576440929020717, + "learning_rate": 1.3810679822284665e-05, + "loss": 0.771783709526062, + "step": 2310 + }, + { + "epoch": 0.8144493392070484, + "grad_norm": 1.461165164266228, + "learning_rate": 1.3805292398843755e-05, + "loss": 0.6710794568061829, + "step": 2311 + }, + { + "epoch": 0.8148017621145375, + "grad_norm": 1.6256312715940777, + "learning_rate": 1.3799903683624503e-05, + "loss": 0.6614924669265747, + "step": 2312 + }, + { + "epoch": 0.8151541850220264, + "grad_norm": 1.429649360127197, + "learning_rate": 1.3794513678456203e-05, + "loss": 0.6432225704193115, + "step": 2313 + }, + { + "epoch": 0.8155066079295155, + "grad_norm": 1.233784916709085, + "learning_rate": 1.3789122385168604e-05, + "loss": 0.6228311061859131, + "step": 2314 + }, + { + "epoch": 0.8158590308370044, + "grad_norm": 1.5182036065920572, + "learning_rate": 1.3783729805591875e-05, + "loss": 0.5597498416900635, + "step": 2315 + }, + { + "epoch": 0.8162114537444934, + "grad_norm": 1.954667780900904, + "learning_rate": 1.3778335941556629e-05, + "loss": 0.7651177048683167, + "step": 2316 + }, + { + "epoch": 0.8165638766519824, + "grad_norm": 1.3053642347729657, + "learning_rate": 1.3772940794893916e-05, + "loss": 0.5482406616210938, + "step": 2317 + }, + { + "epoch": 0.8169162995594713, + "grad_norm": 1.4432389735878668, + "learning_rate": 1.3767544367435229e-05, + "loss": 0.767236590385437, + "step": 2318 + }, + { + "epoch": 0.8172687224669604, + "grad_norm": 1.7071036751428772, + "learning_rate": 1.3762146661012471e-05, + "loss": 0.705253541469574, + "step": 2319 + }, + { + "epoch": 0.8176211453744493, + "grad_norm": 1.4969645559129943, + "learning_rate": 1.3756747677458008e-05, + "loss": 0.7800463438034058, + "step": 2320 + }, + { + "epoch": 0.8179735682819383, + "grad_norm": 1.6172262621918039, + "learning_rate": 1.3751347418604623e-05, + "loss": 0.7615088224411011, + "step": 2321 + }, + { + "epoch": 0.8183259911894273, + "grad_norm": 1.6932314886464006, + "learning_rate": 1.3745945886285536e-05, + "loss": 0.8004297614097595, + "step": 2322 + }, + { + "epoch": 0.8186784140969163, + "grad_norm": 1.605867375121777, + "learning_rate": 1.3740543082334399e-05, + "loss": 0.6428912281990051, + "step": 2323 + }, + { + "epoch": 0.8190308370044053, + "grad_norm": 1.4147620040703779, + "learning_rate": 1.3735139008585294e-05, + "loss": 0.6702802777290344, + "step": 2324 + }, + { + "epoch": 0.8193832599118943, + "grad_norm": 1.3127203907182126, + "learning_rate": 1.3729733666872736e-05, + "loss": 0.6003440022468567, + "step": 2325 + }, + { + "epoch": 0.8197356828193832, + "grad_norm": 2.04633486984075, + "learning_rate": 1.3724327059031677e-05, + "loss": 0.8264240622520447, + "step": 2326 + }, + { + "epoch": 0.8200881057268723, + "grad_norm": 1.4037319277657845, + "learning_rate": 1.3718919186897481e-05, + "loss": 0.6974462866783142, + "step": 2327 + }, + { + "epoch": 0.8204405286343612, + "grad_norm": 1.7081986923623933, + "learning_rate": 1.3713510052305962e-05, + "loss": 0.8273947238922119, + "step": 2328 + }, + { + "epoch": 0.8207929515418502, + "grad_norm": 1.5000401588722418, + "learning_rate": 1.3708099657093348e-05, + "loss": 0.6230529546737671, + "step": 2329 + }, + { + "epoch": 0.8211453744493392, + "grad_norm": 1.6377312790274685, + "learning_rate": 1.37026880030963e-05, + "loss": 0.6997084021568298, + "step": 2330 + }, + { + "epoch": 0.8214977973568282, + "grad_norm": 1.582616740422673, + "learning_rate": 1.3697275092151908e-05, + "loss": 0.7212036848068237, + "step": 2331 + }, + { + "epoch": 0.8218502202643172, + "grad_norm": 1.5449017822829925, + "learning_rate": 1.3691860926097685e-05, + "loss": 0.7758737206459045, + "step": 2332 + }, + { + "epoch": 0.8222026431718061, + "grad_norm": 1.7784238395856364, + "learning_rate": 1.368644550677157e-05, + "loss": 0.62369704246521, + "step": 2333 + }, + { + "epoch": 0.8225550660792952, + "grad_norm": 1.6110908974677367, + "learning_rate": 1.3681028836011935e-05, + "loss": 0.8051841855049133, + "step": 2334 + }, + { + "epoch": 0.8229074889867841, + "grad_norm": 1.3626761635443752, + "learning_rate": 1.3675610915657568e-05, + "loss": 0.6087243556976318, + "step": 2335 + }, + { + "epoch": 0.8232599118942732, + "grad_norm": 1.9382202981470131, + "learning_rate": 1.3670191747547685e-05, + "loss": 0.6949581503868103, + "step": 2336 + }, + { + "epoch": 0.8236123348017621, + "grad_norm": 1.5451121537596906, + "learning_rate": 1.3664771333521922e-05, + "loss": 0.5621528029441833, + "step": 2337 + }, + { + "epoch": 0.8239647577092511, + "grad_norm": 1.622327701652298, + "learning_rate": 1.3659349675420346e-05, + "loss": 0.8731498718261719, + "step": 2338 + }, + { + "epoch": 0.8243171806167401, + "grad_norm": 1.5570249925953572, + "learning_rate": 1.3653926775083437e-05, + "loss": 0.6997240781784058, + "step": 2339 + }, + { + "epoch": 0.824669603524229, + "grad_norm": 1.6562463291138314, + "learning_rate": 1.3648502634352104e-05, + "loss": 0.8061426877975464, + "step": 2340 + }, + { + "epoch": 0.8250220264317181, + "grad_norm": 1.7061312576253802, + "learning_rate": 1.3643077255067667e-05, + "loss": 0.6186845302581787, + "step": 2341 + }, + { + "epoch": 0.825374449339207, + "grad_norm": 1.6605971928200247, + "learning_rate": 1.3637650639071884e-05, + "loss": 0.8098937273025513, + "step": 2342 + }, + { + "epoch": 0.825726872246696, + "grad_norm": 1.6091516027269386, + "learning_rate": 1.3632222788206916e-05, + "loss": 0.5810271501541138, + "step": 2343 + }, + { + "epoch": 0.826079295154185, + "grad_norm": 1.4965459276387059, + "learning_rate": 1.3626793704315348e-05, + "loss": 0.48309600353240967, + "step": 2344 + }, + { + "epoch": 0.826431718061674, + "grad_norm": 1.4326274242229946, + "learning_rate": 1.3621363389240188e-05, + "loss": 0.7366980314254761, + "step": 2345 + }, + { + "epoch": 0.826784140969163, + "grad_norm": 1.571199172280502, + "learning_rate": 1.3615931844824859e-05, + "loss": 0.6572252511978149, + "step": 2346 + }, + { + "epoch": 0.827136563876652, + "grad_norm": 1.3078300281358257, + "learning_rate": 1.3610499072913204e-05, + "loss": 0.6776653528213501, + "step": 2347 + }, + { + "epoch": 0.827488986784141, + "grad_norm": 1.772641440888185, + "learning_rate": 1.3605065075349473e-05, + "loss": 0.6536053419113159, + "step": 2348 + }, + { + "epoch": 0.82784140969163, + "grad_norm": 1.600184025362065, + "learning_rate": 1.3599629853978342e-05, + "loss": 0.7000117301940918, + "step": 2349 + }, + { + "epoch": 0.8281938325991189, + "grad_norm": 1.5533713409132957, + "learning_rate": 1.3594193410644902e-05, + "loss": 0.6480045318603516, + "step": 2350 + }, + { + "epoch": 0.8285462555066079, + "grad_norm": 1.5474076871693587, + "learning_rate": 1.3588755747194656e-05, + "loss": 0.6428179740905762, + "step": 2351 + }, + { + "epoch": 0.8288986784140969, + "grad_norm": 1.3886734182652174, + "learning_rate": 1.3583316865473517e-05, + "loss": 0.618633508682251, + "step": 2352 + }, + { + "epoch": 0.8292511013215859, + "grad_norm": 1.5946423674864716, + "learning_rate": 1.357787676732782e-05, + "loss": 0.7289671897888184, + "step": 2353 + }, + { + "epoch": 0.8296035242290749, + "grad_norm": 1.687058159970245, + "learning_rate": 1.3572435454604307e-05, + "loss": 0.6969538927078247, + "step": 2354 + }, + { + "epoch": 0.8299559471365638, + "grad_norm": 1.565248379514886, + "learning_rate": 1.3566992929150137e-05, + "loss": 0.8490859270095825, + "step": 2355 + }, + { + "epoch": 0.8303083700440529, + "grad_norm": 1.532906793366292, + "learning_rate": 1.3561549192812877e-05, + "loss": 0.6883271336555481, + "step": 2356 + }, + { + "epoch": 0.8306607929515418, + "grad_norm": 1.3151000902691472, + "learning_rate": 1.3556104247440504e-05, + "loss": 0.68092280626297, + "step": 2357 + }, + { + "epoch": 0.8310132158590309, + "grad_norm": 1.2591886658215548, + "learning_rate": 1.3550658094881413e-05, + "loss": 0.7077454924583435, + "step": 2358 + }, + { + "epoch": 0.8313656387665198, + "grad_norm": 1.5452673483096302, + "learning_rate": 1.3545210736984393e-05, + "loss": 0.7364591360092163, + "step": 2359 + }, + { + "epoch": 0.8317180616740089, + "grad_norm": 1.4999509926023873, + "learning_rate": 1.3539762175598666e-05, + "loss": 0.8047930002212524, + "step": 2360 + }, + { + "epoch": 0.8320704845814978, + "grad_norm": 1.4862380654794773, + "learning_rate": 1.3534312412573836e-05, + "loss": 0.7717781066894531, + "step": 2361 + }, + { + "epoch": 0.8324229074889867, + "grad_norm": 1.7032828917925678, + "learning_rate": 1.3528861449759938e-05, + "loss": 0.7228613495826721, + "step": 2362 + }, + { + "epoch": 0.8327753303964758, + "grad_norm": 1.5752771060390574, + "learning_rate": 1.3523409289007399e-05, + "loss": 0.8025436401367188, + "step": 2363 + }, + { + "epoch": 0.8331277533039647, + "grad_norm": 1.5214524176303228, + "learning_rate": 1.3517955932167057e-05, + "loss": 0.6653664112091064, + "step": 2364 + }, + { + "epoch": 0.8334801762114538, + "grad_norm": 1.4409217046848606, + "learning_rate": 1.3512501381090158e-05, + "loss": 0.709527313709259, + "step": 2365 + }, + { + "epoch": 0.8338325991189427, + "grad_norm": 1.4678807653581447, + "learning_rate": 1.3507045637628355e-05, + "loss": 0.7317520380020142, + "step": 2366 + }, + { + "epoch": 0.8341850220264317, + "grad_norm": 1.4520344718636113, + "learning_rate": 1.3501588703633703e-05, + "loss": 0.734069287776947, + "step": 2367 + }, + { + "epoch": 0.8345374449339207, + "grad_norm": 1.355050784601881, + "learning_rate": 1.349613058095866e-05, + "loss": 0.5950552225112915, + "step": 2368 + }, + { + "epoch": 0.8348898678414097, + "grad_norm": 1.3916802158941735, + "learning_rate": 1.3490671271456084e-05, + "loss": 0.5958857536315918, + "step": 2369 + }, + { + "epoch": 0.8352422907488987, + "grad_norm": 1.319860830071963, + "learning_rate": 1.348521077697925e-05, + "loss": 0.7094449996948242, + "step": 2370 + }, + { + "epoch": 0.8355947136563877, + "grad_norm": 1.283824481194398, + "learning_rate": 1.3479749099381818e-05, + "loss": 0.6260385513305664, + "step": 2371 + }, + { + "epoch": 0.8359471365638766, + "grad_norm": 1.3546760632082742, + "learning_rate": 1.3474286240517862e-05, + "loss": 0.65608811378479, + "step": 2372 + }, + { + "epoch": 0.8362995594713656, + "grad_norm": 1.5902013950729095, + "learning_rate": 1.346882220224185e-05, + "loss": 0.6942586898803711, + "step": 2373 + }, + { + "epoch": 0.8366519823788546, + "grad_norm": 1.5432700710308092, + "learning_rate": 1.3463356986408653e-05, + "loss": 0.6831374168395996, + "step": 2374 + }, + { + "epoch": 0.8370044052863436, + "grad_norm": 1.2453712902306997, + "learning_rate": 1.3457890594873546e-05, + "loss": 0.6363790035247803, + "step": 2375 + }, + { + "epoch": 0.8373568281938326, + "grad_norm": 1.4407831477600082, + "learning_rate": 1.3452423029492194e-05, + "loss": 0.698935866355896, + "step": 2376 + }, + { + "epoch": 0.8377092511013216, + "grad_norm": 1.6516160077651472, + "learning_rate": 1.3446954292120667e-05, + "loss": 0.8569005727767944, + "step": 2377 + }, + { + "epoch": 0.8380616740088106, + "grad_norm": 1.4963554673760426, + "learning_rate": 1.3441484384615428e-05, + "loss": 0.8461613655090332, + "step": 2378 + }, + { + "epoch": 0.8384140969162995, + "grad_norm": 1.635336062215313, + "learning_rate": 1.343601330883335e-05, + "loss": 0.7481078505516052, + "step": 2379 + }, + { + "epoch": 0.8387665198237886, + "grad_norm": 1.1164155853725835, + "learning_rate": 1.343054106663168e-05, + "loss": 0.5632544755935669, + "step": 2380 + }, + { + "epoch": 0.8391189427312775, + "grad_norm": 1.2387886339726162, + "learning_rate": 1.3425067659868084e-05, + "loss": 0.528980016708374, + "step": 2381 + }, + { + "epoch": 0.8394713656387666, + "grad_norm": 1.2987181937645196, + "learning_rate": 1.341959309040061e-05, + "loss": 0.5520849227905273, + "step": 2382 + }, + { + "epoch": 0.8398237885462555, + "grad_norm": 1.1709661282123542, + "learning_rate": 1.34141173600877e-05, + "loss": 0.569744348526001, + "step": 2383 + }, + { + "epoch": 0.8401762114537445, + "grad_norm": 1.1526596958180186, + "learning_rate": 1.3408640470788202e-05, + "loss": 0.595065712928772, + "step": 2384 + }, + { + "epoch": 0.8405286343612335, + "grad_norm": 1.716530250506247, + "learning_rate": 1.3403162424361342e-05, + "loss": 0.6993277072906494, + "step": 2385 + }, + { + "epoch": 0.8408810572687224, + "grad_norm": 1.467497517918387, + "learning_rate": 1.3397683222666748e-05, + "loss": 0.6183342933654785, + "step": 2386 + }, + { + "epoch": 0.8412334801762115, + "grad_norm": 1.5660447986557493, + "learning_rate": 1.339220286756444e-05, + "loss": 0.7280797362327576, + "step": 2387 + }, + { + "epoch": 0.8415859030837004, + "grad_norm": 1.5538390945999534, + "learning_rate": 1.3386721360914829e-05, + "loss": 0.7377837896347046, + "step": 2388 + }, + { + "epoch": 0.8419383259911895, + "grad_norm": 1.3658202604001934, + "learning_rate": 1.3381238704578718e-05, + "loss": 0.7202758193016052, + "step": 2389 + }, + { + "epoch": 0.8422907488986784, + "grad_norm": 1.4864419338323784, + "learning_rate": 1.3375754900417291e-05, + "loss": 0.5899994969367981, + "step": 2390 + }, + { + "epoch": 0.8426431718061674, + "grad_norm": 1.6545749228929092, + "learning_rate": 1.3370269950292133e-05, + "loss": 0.8128558993339539, + "step": 2391 + }, + { + "epoch": 0.8429955947136564, + "grad_norm": 1.4863580222240895, + "learning_rate": 1.3364783856065213e-05, + "loss": 0.8222962617874146, + "step": 2392 + }, + { + "epoch": 0.8433480176211454, + "grad_norm": 1.5392010225603865, + "learning_rate": 1.3359296619598894e-05, + "loss": 0.7898896932601929, + "step": 2393 + }, + { + "epoch": 0.8437004405286344, + "grad_norm": 1.59106154269148, + "learning_rate": 1.3353808242755912e-05, + "loss": 0.6596726179122925, + "step": 2394 + }, + { + "epoch": 0.8440528634361234, + "grad_norm": 1.6652244607977948, + "learning_rate": 1.3348318727399411e-05, + "loss": 0.8073080778121948, + "step": 2395 + }, + { + "epoch": 0.8444052863436123, + "grad_norm": 1.582055504815832, + "learning_rate": 1.3342828075392902e-05, + "loss": 0.6640043258666992, + "step": 2396 + }, + { + "epoch": 0.8447577092511013, + "grad_norm": 1.415789065826391, + "learning_rate": 1.3337336288600297e-05, + "loss": 0.6067632436752319, + "step": 2397 + }, + { + "epoch": 0.8451101321585903, + "grad_norm": 1.308177796408265, + "learning_rate": 1.3331843368885882e-05, + "loss": 0.6891398429870605, + "step": 2398 + }, + { + "epoch": 0.8454625550660793, + "grad_norm": 1.276250238749864, + "learning_rate": 1.3326349318114335e-05, + "loss": 0.6007423996925354, + "step": 2399 + }, + { + "epoch": 0.8458149779735683, + "grad_norm": 1.6159836309404996, + "learning_rate": 1.3320854138150712e-05, + "loss": 0.7314017415046692, + "step": 2400 + }, + { + "epoch": 0.8461674008810572, + "grad_norm": 1.5060027308979995, + "learning_rate": 1.3315357830860461e-05, + "loss": 0.7352335453033447, + "step": 2401 + }, + { + "epoch": 0.8465198237885463, + "grad_norm": 1.3629774951204896, + "learning_rate": 1.3309860398109402e-05, + "loss": 0.6546785831451416, + "step": 2402 + }, + { + "epoch": 0.8468722466960352, + "grad_norm": 1.4629106252693242, + "learning_rate": 1.3304361841763746e-05, + "loss": 0.590252697467804, + "step": 2403 + }, + { + "epoch": 0.8472246696035243, + "grad_norm": 1.5501476697602834, + "learning_rate": 1.3298862163690078e-05, + "loss": 0.6864089369773865, + "step": 2404 + }, + { + "epoch": 0.8475770925110132, + "grad_norm": 1.452376737172979, + "learning_rate": 1.3293361365755373e-05, + "loss": 0.7818390130996704, + "step": 2405 + }, + { + "epoch": 0.8479295154185023, + "grad_norm": 1.9084475381981967, + "learning_rate": 1.3287859449826977e-05, + "loss": 0.7461166381835938, + "step": 2406 + }, + { + "epoch": 0.8482819383259912, + "grad_norm": 1.7337796671611372, + "learning_rate": 1.3282356417772618e-05, + "loss": 0.7519750595092773, + "step": 2407 + }, + { + "epoch": 0.8486343612334801, + "grad_norm": 1.445619912428175, + "learning_rate": 1.3276852271460406e-05, + "loss": 0.7041791081428528, + "step": 2408 + }, + { + "epoch": 0.8489867841409692, + "grad_norm": 1.3131157575910486, + "learning_rate": 1.327134701275883e-05, + "loss": 0.5649428367614746, + "step": 2409 + }, + { + "epoch": 0.8493392070484581, + "grad_norm": 1.838398891045019, + "learning_rate": 1.3265840643536746e-05, + "loss": 0.6607545614242554, + "step": 2410 + }, + { + "epoch": 0.8496916299559472, + "grad_norm": 1.590568626194504, + "learning_rate": 1.3260333165663406e-05, + "loss": 0.7393547892570496, + "step": 2411 + }, + { + "epoch": 0.8500440528634361, + "grad_norm": 1.660269046740627, + "learning_rate": 1.325482458100842e-05, + "loss": 0.6550742387771606, + "step": 2412 + }, + { + "epoch": 0.8503964757709251, + "grad_norm": 1.3409806360783354, + "learning_rate": 1.324931489144178e-05, + "loss": 0.5104576349258423, + "step": 2413 + }, + { + "epoch": 0.8507488986784141, + "grad_norm": 1.7056036938051933, + "learning_rate": 1.3243804098833859e-05, + "loss": 0.7679733037948608, + "step": 2414 + }, + { + "epoch": 0.8511013215859031, + "grad_norm": 1.3058704920771766, + "learning_rate": 1.3238292205055397e-05, + "loss": 0.6516377925872803, + "step": 2415 + }, + { + "epoch": 0.8514537444933921, + "grad_norm": 1.4749751578789572, + "learning_rate": 1.3232779211977509e-05, + "loss": 0.8509281277656555, + "step": 2416 + }, + { + "epoch": 0.8518061674008811, + "grad_norm": 1.6532741255389543, + "learning_rate": 1.3227265121471691e-05, + "loss": 0.5643317103385925, + "step": 2417 + }, + { + "epoch": 0.85215859030837, + "grad_norm": 1.4681710603298503, + "learning_rate": 1.3221749935409798e-05, + "loss": 0.5294302105903625, + "step": 2418 + }, + { + "epoch": 0.852511013215859, + "grad_norm": 1.4914498870655002, + "learning_rate": 1.3216233655664067e-05, + "loss": 0.6301594972610474, + "step": 2419 + }, + { + "epoch": 0.852863436123348, + "grad_norm": 1.399957922496421, + "learning_rate": 1.32107162841071e-05, + "loss": 0.6930294036865234, + "step": 2420 + }, + { + "epoch": 0.853215859030837, + "grad_norm": 1.4069779391578274, + "learning_rate": 1.3205197822611876e-05, + "loss": 0.6266883611679077, + "step": 2421 + }, + { + "epoch": 0.853568281938326, + "grad_norm": 1.7817063662748283, + "learning_rate": 1.3199678273051743e-05, + "loss": 0.7789868116378784, + "step": 2422 + }, + { + "epoch": 0.853920704845815, + "grad_norm": 1.3387299141459739, + "learning_rate": 1.3194157637300416e-05, + "loss": 0.7148274779319763, + "step": 2423 + }, + { + "epoch": 0.854273127753304, + "grad_norm": 1.4757263125304436, + "learning_rate": 1.3188635917231972e-05, + "loss": 0.550403356552124, + "step": 2424 + }, + { + "epoch": 0.8546255506607929, + "grad_norm": 1.563076871593329, + "learning_rate": 1.3183113114720872e-05, + "loss": 0.6650338768959045, + "step": 2425 + }, + { + "epoch": 0.854977973568282, + "grad_norm": 1.569123753374588, + "learning_rate": 1.317758923164193e-05, + "loss": 0.7774436473846436, + "step": 2426 + }, + { + "epoch": 0.8553303964757709, + "grad_norm": 1.407079429107656, + "learning_rate": 1.3172064269870335e-05, + "loss": 0.6192025542259216, + "step": 2427 + }, + { + "epoch": 0.85568281938326, + "grad_norm": 1.6230407627498116, + "learning_rate": 1.3166538231281635e-05, + "loss": 0.6758309602737427, + "step": 2428 + }, + { + "epoch": 0.8560352422907489, + "grad_norm": 1.6026256588862147, + "learning_rate": 1.3161011117751756e-05, + "loss": 0.7311116456985474, + "step": 2429 + }, + { + "epoch": 0.8563876651982378, + "grad_norm": 1.797024553793142, + "learning_rate": 1.3155482931156977e-05, + "loss": 0.7525666952133179, + "step": 2430 + }, + { + "epoch": 0.8567400881057269, + "grad_norm": 1.7067244433524313, + "learning_rate": 1.3149953673373945e-05, + "loss": 0.6903671026229858, + "step": 2431 + }, + { + "epoch": 0.8570925110132158, + "grad_norm": 1.2833360218942749, + "learning_rate": 1.314442334627967e-05, + "loss": 0.6036638021469116, + "step": 2432 + }, + { + "epoch": 0.8574449339207049, + "grad_norm": 1.6354054518430503, + "learning_rate": 1.3138891951751526e-05, + "loss": 0.6490209698677063, + "step": 2433 + }, + { + "epoch": 0.8577973568281938, + "grad_norm": 1.6970156912379664, + "learning_rate": 1.3133359491667252e-05, + "loss": 0.692024290561676, + "step": 2434 + }, + { + "epoch": 0.8581497797356828, + "grad_norm": 1.4031255607051936, + "learning_rate": 1.3127825967904944e-05, + "loss": 0.6977943181991577, + "step": 2435 + }, + { + "epoch": 0.8585022026431718, + "grad_norm": 1.3842045822286646, + "learning_rate": 1.312229138234306e-05, + "loss": 0.625649094581604, + "step": 2436 + }, + { + "epoch": 0.8588546255506608, + "grad_norm": 1.5910466082409926, + "learning_rate": 1.3116755736860422e-05, + "loss": 0.671939492225647, + "step": 2437 + }, + { + "epoch": 0.8592070484581498, + "grad_norm": 1.3856883940296008, + "learning_rate": 1.3111219033336211e-05, + "loss": 0.700029194355011, + "step": 2438 + }, + { + "epoch": 0.8595594713656388, + "grad_norm": 1.3907118477619378, + "learning_rate": 1.3105681273649959e-05, + "loss": 0.6339718699455261, + "step": 2439 + }, + { + "epoch": 0.8599118942731278, + "grad_norm": 1.306943148235595, + "learning_rate": 1.3100142459681569e-05, + "loss": 0.7105488777160645, + "step": 2440 + }, + { + "epoch": 0.8602643171806167, + "grad_norm": 1.4503861250177865, + "learning_rate": 1.3094602593311294e-05, + "loss": 0.616797924041748, + "step": 2441 + }, + { + "epoch": 0.8606167400881057, + "grad_norm": 1.5110286813274958, + "learning_rate": 1.3089061676419746e-05, + "loss": 0.7167524099349976, + "step": 2442 + }, + { + "epoch": 0.8609691629955947, + "grad_norm": 1.5215961993133658, + "learning_rate": 1.3083519710887895e-05, + "loss": 0.5499090552330017, + "step": 2443 + }, + { + "epoch": 0.8613215859030837, + "grad_norm": 1.4623789546240658, + "learning_rate": 1.3077976698597064e-05, + "loss": 0.5764151811599731, + "step": 2444 + }, + { + "epoch": 0.8616740088105727, + "grad_norm": 1.438510619597336, + "learning_rate": 1.3072432641428931e-05, + "loss": 0.7171419858932495, + "step": 2445 + }, + { + "epoch": 0.8620264317180617, + "grad_norm": 1.3023250448197168, + "learning_rate": 1.3066887541265539e-05, + "loss": 0.7546026706695557, + "step": 2446 + }, + { + "epoch": 0.8623788546255506, + "grad_norm": 1.2250371592811133, + "learning_rate": 1.306134139998927e-05, + "loss": 0.5884296298027039, + "step": 2447 + }, + { + "epoch": 0.8627312775330397, + "grad_norm": 1.3135127283076564, + "learning_rate": 1.3055794219482867e-05, + "loss": 0.6877926588058472, + "step": 2448 + }, + { + "epoch": 0.8630837004405286, + "grad_norm": 1.5935068741769265, + "learning_rate": 1.3050246001629425e-05, + "loss": 0.598037838935852, + "step": 2449 + }, + { + "epoch": 0.8634361233480177, + "grad_norm": 1.4128431939298278, + "learning_rate": 1.3044696748312395e-05, + "loss": 0.6560795307159424, + "step": 2450 + }, + { + "epoch": 0.8637885462555066, + "grad_norm": 1.5856094022002207, + "learning_rate": 1.3039146461415575e-05, + "loss": 0.7130829691886902, + "step": 2451 + }, + { + "epoch": 0.8641409691629955, + "grad_norm": 1.9167144031452974, + "learning_rate": 1.303359514282311e-05, + "loss": 0.7402251958847046, + "step": 2452 + }, + { + "epoch": 0.8644933920704846, + "grad_norm": 1.4143817039312587, + "learning_rate": 1.3028042794419502e-05, + "loss": 0.6610683798789978, + "step": 2453 + }, + { + "epoch": 0.8648458149779735, + "grad_norm": 1.6544654323663863, + "learning_rate": 1.3022489418089606e-05, + "loss": 0.84892737865448, + "step": 2454 + }, + { + "epoch": 0.8651982378854626, + "grad_norm": 1.689285386487206, + "learning_rate": 1.3016935015718612e-05, + "loss": 0.7285948991775513, + "step": 2455 + }, + { + "epoch": 0.8655506607929515, + "grad_norm": 1.46262615014944, + "learning_rate": 1.3011379589192074e-05, + "loss": 0.6800004839897156, + "step": 2456 + }, + { + "epoch": 0.8659030837004406, + "grad_norm": 1.492659523558787, + "learning_rate": 1.3005823140395878e-05, + "loss": 0.618618369102478, + "step": 2457 + }, + { + "epoch": 0.8662555066079295, + "grad_norm": 1.8084387802865425, + "learning_rate": 1.3000265671216278e-05, + "loss": 0.7657757997512817, + "step": 2458 + }, + { + "epoch": 0.8666079295154185, + "grad_norm": 1.5490708834885107, + "learning_rate": 1.2994707183539848e-05, + "loss": 0.7814151644706726, + "step": 2459 + }, + { + "epoch": 0.8669603524229075, + "grad_norm": 1.2899412950022648, + "learning_rate": 1.2989147679253531e-05, + "loss": 0.6494930982589722, + "step": 2460 + }, + { + "epoch": 0.8673127753303965, + "grad_norm": 1.5543724658760723, + "learning_rate": 1.2983587160244602e-05, + "loss": 0.6498425006866455, + "step": 2461 + }, + { + "epoch": 0.8676651982378855, + "grad_norm": 1.5210228165977844, + "learning_rate": 1.2978025628400684e-05, + "loss": 0.635313093662262, + "step": 2462 + }, + { + "epoch": 0.8680176211453744, + "grad_norm": 1.500755936886382, + "learning_rate": 1.2972463085609744e-05, + "loss": 0.6892971992492676, + "step": 2463 + }, + { + "epoch": 0.8683700440528634, + "grad_norm": 1.3872566957567176, + "learning_rate": 1.2966899533760095e-05, + "loss": 0.691922128200531, + "step": 2464 + }, + { + "epoch": 0.8687224669603524, + "grad_norm": 1.773327696286038, + "learning_rate": 1.2961334974740386e-05, + "loss": 0.5764378309249878, + "step": 2465 + }, + { + "epoch": 0.8690748898678414, + "grad_norm": 1.6231464224655543, + "learning_rate": 1.2955769410439616e-05, + "loss": 0.8193005919456482, + "step": 2466 + }, + { + "epoch": 0.8694273127753304, + "grad_norm": 1.4243504226778951, + "learning_rate": 1.2950202842747115e-05, + "loss": 0.6141501665115356, + "step": 2467 + }, + { + "epoch": 0.8697797356828194, + "grad_norm": 1.5061592811010869, + "learning_rate": 1.2944635273552565e-05, + "loss": 0.7464454174041748, + "step": 2468 + }, + { + "epoch": 0.8701321585903083, + "grad_norm": 1.3349759192393535, + "learning_rate": 1.293906670474598e-05, + "loss": 0.5970025062561035, + "step": 2469 + }, + { + "epoch": 0.8704845814977974, + "grad_norm": 1.6022434524431073, + "learning_rate": 1.2933497138217714e-05, + "loss": 0.7247673273086548, + "step": 2470 + }, + { + "epoch": 0.8708370044052863, + "grad_norm": 1.535051650641408, + "learning_rate": 1.2927926575858463e-05, + "loss": 0.746272087097168, + "step": 2471 + }, + { + "epoch": 0.8711894273127754, + "grad_norm": 1.5072596947359789, + "learning_rate": 1.2922355019559265e-05, + "loss": 0.6918776035308838, + "step": 2472 + }, + { + "epoch": 0.8715418502202643, + "grad_norm": 1.553343209452483, + "learning_rate": 1.2916782471211478e-05, + "loss": 0.6056039929389954, + "step": 2473 + }, + { + "epoch": 0.8718942731277532, + "grad_norm": 1.3670048649799473, + "learning_rate": 1.2911208932706821e-05, + "loss": 0.6699481010437012, + "step": 2474 + }, + { + "epoch": 0.8722466960352423, + "grad_norm": 1.4719810242076543, + "learning_rate": 1.2905634405937327e-05, + "loss": 0.5141814947128296, + "step": 2475 + }, + { + "epoch": 0.8725991189427312, + "grad_norm": 1.5819338229003952, + "learning_rate": 1.2900058892795383e-05, + "loss": 0.7521284818649292, + "step": 2476 + }, + { + "epoch": 0.8729515418502203, + "grad_norm": 2.2082732494247916, + "learning_rate": 1.2894482395173695e-05, + "loss": 0.6878937482833862, + "step": 2477 + }, + { + "epoch": 0.8733039647577092, + "grad_norm": 1.3942904192465777, + "learning_rate": 1.2888904914965317e-05, + "loss": 0.5963379144668579, + "step": 2478 + }, + { + "epoch": 0.8736563876651983, + "grad_norm": 1.7634340153188761, + "learning_rate": 1.2883326454063623e-05, + "loss": 0.7572320103645325, + "step": 2479 + }, + { + "epoch": 0.8740088105726872, + "grad_norm": 1.399026210420982, + "learning_rate": 1.2877747014362334e-05, + "loss": 0.7047982215881348, + "step": 2480 + }, + { + "epoch": 0.8743612334801762, + "grad_norm": 2.0588397887454715, + "learning_rate": 1.2872166597755488e-05, + "loss": 0.6449024677276611, + "step": 2481 + }, + { + "epoch": 0.8747136563876652, + "grad_norm": 1.6446468607591163, + "learning_rate": 1.2866585206137469e-05, + "loss": 0.7590922117233276, + "step": 2482 + }, + { + "epoch": 0.8750660792951542, + "grad_norm": 1.6164965426300901, + "learning_rate": 1.2861002841402983e-05, + "loss": 0.7534210085868835, + "step": 2483 + }, + { + "epoch": 0.8754185022026432, + "grad_norm": 1.9198456186069754, + "learning_rate": 1.2855419505447073e-05, + "loss": 0.7091225385665894, + "step": 2484 + }, + { + "epoch": 0.8757709251101321, + "grad_norm": 1.5347710098555305, + "learning_rate": 1.2849835200165104e-05, + "loss": 0.7578933835029602, + "step": 2485 + }, + { + "epoch": 0.8761233480176212, + "grad_norm": 1.3282869408675961, + "learning_rate": 1.2844249927452771e-05, + "loss": 0.5938349962234497, + "step": 2486 + }, + { + "epoch": 0.8764757709251101, + "grad_norm": 1.5090052513716286, + "learning_rate": 1.2838663689206108e-05, + "loss": 0.5726315379142761, + "step": 2487 + }, + { + "epoch": 0.8768281938325991, + "grad_norm": 1.450396836473225, + "learning_rate": 1.2833076487321465e-05, + "loss": 0.8181554079055786, + "step": 2488 + }, + { + "epoch": 0.8771806167400881, + "grad_norm": 1.71919397348368, + "learning_rate": 1.2827488323695522e-05, + "loss": 0.7465275526046753, + "step": 2489 + }, + { + "epoch": 0.8775330396475771, + "grad_norm": 1.2623461784182488, + "learning_rate": 1.2821899200225288e-05, + "loss": 0.6083456873893738, + "step": 2490 + }, + { + "epoch": 0.877885462555066, + "grad_norm": 1.4922167619772364, + "learning_rate": 1.2816309118808095e-05, + "loss": 0.6393307447433472, + "step": 2491 + }, + { + "epoch": 0.8782378854625551, + "grad_norm": 1.3846980777960398, + "learning_rate": 1.2810718081341604e-05, + "loss": 0.6562504768371582, + "step": 2492 + }, + { + "epoch": 0.878590308370044, + "grad_norm": 1.5590691123255283, + "learning_rate": 1.2805126089723798e-05, + "loss": 0.6737300753593445, + "step": 2493 + }, + { + "epoch": 0.8789427312775331, + "grad_norm": 1.7724399876158112, + "learning_rate": 1.2799533145852982e-05, + "loss": 0.6246815919876099, + "step": 2494 + }, + { + "epoch": 0.879295154185022, + "grad_norm": 1.7718655540042538, + "learning_rate": 1.2793939251627788e-05, + "loss": 0.7499577403068542, + "step": 2495 + }, + { + "epoch": 0.8796475770925111, + "grad_norm": 1.6628095797742937, + "learning_rate": 1.2788344408947171e-05, + "loss": 0.7645655870437622, + "step": 2496 + }, + { + "epoch": 0.88, + "grad_norm": 1.732888201165417, + "learning_rate": 1.27827486197104e-05, + "loss": 0.7407524585723877, + "step": 2497 + }, + { + "epoch": 0.8803524229074889, + "grad_norm": 1.590151572985607, + "learning_rate": 1.2777151885817078e-05, + "loss": 0.6401108503341675, + "step": 2498 + }, + { + "epoch": 0.880704845814978, + "grad_norm": 1.5984459598023502, + "learning_rate": 1.2771554209167116e-05, + "loss": 0.8332269191741943, + "step": 2499 + }, + { + "epoch": 0.8810572687224669, + "grad_norm": 1.61859187638703, + "learning_rate": 1.2765955591660757e-05, + "loss": 0.7677830457687378, + "step": 2500 + }, + { + "epoch": 0.881409691629956, + "grad_norm": 1.4420535275594295, + "learning_rate": 1.2760356035198553e-05, + "loss": 0.8532943725585938, + "step": 2501 + }, + { + "epoch": 0.8817621145374449, + "grad_norm": 1.3662949943021319, + "learning_rate": 1.2754755541681384e-05, + "loss": 0.6287009716033936, + "step": 2502 + }, + { + "epoch": 0.882114537444934, + "grad_norm": 1.38981570117233, + "learning_rate": 1.2749154113010432e-05, + "loss": 0.7039133310317993, + "step": 2503 + }, + { + "epoch": 0.8824669603524229, + "grad_norm": 1.6518390089780828, + "learning_rate": 1.2743551751087222e-05, + "loss": 0.6959357857704163, + "step": 2504 + }, + { + "epoch": 0.8828193832599119, + "grad_norm": 1.3554006828606007, + "learning_rate": 1.2737948457813571e-05, + "loss": 0.6862938404083252, + "step": 2505 + }, + { + "epoch": 0.8831718061674009, + "grad_norm": 1.6773466383223146, + "learning_rate": 1.273234423509163e-05, + "loss": 0.6903352737426758, + "step": 2506 + }, + { + "epoch": 0.8835242290748899, + "grad_norm": 1.374322606051121, + "learning_rate": 1.2726739084823851e-05, + "loss": 0.7226145267486572, + "step": 2507 + }, + { + "epoch": 0.8838766519823789, + "grad_norm": 1.4091144718113782, + "learning_rate": 1.2721133008913015e-05, + "loss": 0.7865043878555298, + "step": 2508 + }, + { + "epoch": 0.8842290748898678, + "grad_norm": 1.4501170174913356, + "learning_rate": 1.2715526009262209e-05, + "loss": 0.6594572067260742, + "step": 2509 + }, + { + "epoch": 0.8845814977973568, + "grad_norm": 1.3500042347590218, + "learning_rate": 1.270991808777483e-05, + "loss": 0.5967481136322021, + "step": 2510 + }, + { + "epoch": 0.8849339207048458, + "grad_norm": 1.3600104271689806, + "learning_rate": 1.2704309246354599e-05, + "loss": 0.7843632698059082, + "step": 2511 + }, + { + "epoch": 0.8852863436123348, + "grad_norm": 1.3543191802484777, + "learning_rate": 1.2698699486905538e-05, + "loss": 0.7475506067276001, + "step": 2512 + }, + { + "epoch": 0.8856387665198238, + "grad_norm": 1.4881501151953718, + "learning_rate": 1.2693088811331987e-05, + "loss": 0.8082534670829773, + "step": 2513 + }, + { + "epoch": 0.8859911894273128, + "grad_norm": 1.6899694353159702, + "learning_rate": 1.2687477221538598e-05, + "loss": 0.7421785593032837, + "step": 2514 + }, + { + "epoch": 0.8863436123348017, + "grad_norm": 1.295151070825849, + "learning_rate": 1.2681864719430328e-05, + "loss": 0.6268718242645264, + "step": 2515 + }, + { + "epoch": 0.8866960352422908, + "grad_norm": 1.595396389533138, + "learning_rate": 1.2676251306912448e-05, + "loss": 0.7285459041595459, + "step": 2516 + }, + { + "epoch": 0.8870484581497797, + "grad_norm": 1.4826705601530517, + "learning_rate": 1.2670636985890542e-05, + "loss": 0.6132184267044067, + "step": 2517 + }, + { + "epoch": 0.8874008810572688, + "grad_norm": 1.4018565352445778, + "learning_rate": 1.2665021758270488e-05, + "loss": 0.5550754070281982, + "step": 2518 + }, + { + "epoch": 0.8877533039647577, + "grad_norm": 1.3628132273232696, + "learning_rate": 1.2659405625958488e-05, + "loss": 0.5357390642166138, + "step": 2519 + }, + { + "epoch": 0.8881057268722466, + "grad_norm": 1.4153066703364516, + "learning_rate": 1.2653788590861039e-05, + "loss": 0.5858328342437744, + "step": 2520 + }, + { + "epoch": 0.8884581497797357, + "grad_norm": 1.731815068535558, + "learning_rate": 1.2648170654884955e-05, + "loss": 0.7109283208847046, + "step": 2521 + }, + { + "epoch": 0.8888105726872246, + "grad_norm": 1.9753429482306435, + "learning_rate": 1.2642551819937348e-05, + "loss": 0.808137834072113, + "step": 2522 + }, + { + "epoch": 0.8891629955947137, + "grad_norm": 1.6385693606484741, + "learning_rate": 1.2636932087925637e-05, + "loss": 0.587998628616333, + "step": 2523 + }, + { + "epoch": 0.8895154185022026, + "grad_norm": 1.4234526769499198, + "learning_rate": 1.2631311460757545e-05, + "loss": 0.5555537343025208, + "step": 2524 + }, + { + "epoch": 0.8898678414096917, + "grad_norm": 1.4118650122814267, + "learning_rate": 1.2625689940341102e-05, + "loss": 0.641632080078125, + "step": 2525 + }, + { + "epoch": 0.8902202643171806, + "grad_norm": 1.5401015682174186, + "learning_rate": 1.262006752858464e-05, + "loss": 0.7005184888839722, + "step": 2526 + }, + { + "epoch": 0.8905726872246696, + "grad_norm": 1.272518513643159, + "learning_rate": 1.2614444227396792e-05, + "loss": 0.6907261610031128, + "step": 2527 + }, + { + "epoch": 0.8909251101321586, + "grad_norm": 1.4162379009723582, + "learning_rate": 1.2608820038686492e-05, + "loss": 0.5757718086242676, + "step": 2528 + }, + { + "epoch": 0.8912775330396476, + "grad_norm": 1.888252337049927, + "learning_rate": 1.2603194964362979e-05, + "loss": 0.6462569832801819, + "step": 2529 + }, + { + "epoch": 0.8916299559471366, + "grad_norm": 2.6509089623338586, + "learning_rate": 1.2597569006335787e-05, + "loss": 0.7028999328613281, + "step": 2530 + }, + { + "epoch": 0.8919823788546255, + "grad_norm": 1.3325876541370223, + "learning_rate": 1.2591942166514763e-05, + "loss": 0.5789325833320618, + "step": 2531 + }, + { + "epoch": 0.8923348017621145, + "grad_norm": 1.5373223041612576, + "learning_rate": 1.258631444681003e-05, + "loss": 0.6545255184173584, + "step": 2532 + }, + { + "epoch": 0.8926872246696035, + "grad_norm": 1.560686991488605, + "learning_rate": 1.258068584913204e-05, + "loss": 0.7227469682693481, + "step": 2533 + }, + { + "epoch": 0.8930396475770925, + "grad_norm": 1.3545909427052794, + "learning_rate": 1.2575056375391513e-05, + "loss": 0.5985771417617798, + "step": 2534 + }, + { + "epoch": 0.8933920704845815, + "grad_norm": 1.5422643503857134, + "learning_rate": 1.2569426027499485e-05, + "loss": 0.6705960035324097, + "step": 2535 + }, + { + "epoch": 0.8937444933920705, + "grad_norm": 1.5427105799340322, + "learning_rate": 1.2563794807367284e-05, + "loss": 0.6662027835845947, + "step": 2536 + }, + { + "epoch": 0.8940969162995595, + "grad_norm": 1.5270286613671318, + "learning_rate": 1.2558162716906537e-05, + "loss": 0.7742453813552856, + "step": 2537 + }, + { + "epoch": 0.8944493392070485, + "grad_norm": 1.628032718158035, + "learning_rate": 1.255252975802916e-05, + "loss": 0.6124528050422668, + "step": 2538 + }, + { + "epoch": 0.8948017621145374, + "grad_norm": 1.455711423520218, + "learning_rate": 1.2546895932647365e-05, + "loss": 0.5728615522384644, + "step": 2539 + }, + { + "epoch": 0.8951541850220265, + "grad_norm": 1.5737389396802581, + "learning_rate": 1.2541261242673665e-05, + "loss": 0.6347167491912842, + "step": 2540 + }, + { + "epoch": 0.8955066079295154, + "grad_norm": 1.62324317727844, + "learning_rate": 1.2535625690020861e-05, + "loss": 0.6350656747817993, + "step": 2541 + }, + { + "epoch": 0.8958590308370044, + "grad_norm": 1.674339310689998, + "learning_rate": 1.2529989276602043e-05, + "loss": 0.7538303732872009, + "step": 2542 + }, + { + "epoch": 0.8962114537444934, + "grad_norm": 1.5900983527544528, + "learning_rate": 1.2524352004330607e-05, + "loss": 0.8154318928718567, + "step": 2543 + }, + { + "epoch": 0.8965638766519823, + "grad_norm": 1.4033932104877718, + "learning_rate": 1.2518713875120222e-05, + "loss": 0.5313037633895874, + "step": 2544 + }, + { + "epoch": 0.8969162995594714, + "grad_norm": 1.3069539051845793, + "learning_rate": 1.2513074890884864e-05, + "loss": 0.740921139717102, + "step": 2545 + }, + { + "epoch": 0.8972687224669603, + "grad_norm": 1.593785966579892, + "learning_rate": 1.250743505353879e-05, + "loss": 0.6079888343811035, + "step": 2546 + }, + { + "epoch": 0.8976211453744494, + "grad_norm": 1.266024042192646, + "learning_rate": 1.2501794364996553e-05, + "loss": 0.46736663579940796, + "step": 2547 + }, + { + "epoch": 0.8979735682819383, + "grad_norm": 1.5066472302506413, + "learning_rate": 1.2496152827172982e-05, + "loss": 0.5670880079269409, + "step": 2548 + }, + { + "epoch": 0.8983259911894274, + "grad_norm": 1.4991563073413907, + "learning_rate": 1.2490510441983212e-05, + "loss": 0.7845931649208069, + "step": 2549 + }, + { + "epoch": 0.8986784140969163, + "grad_norm": 1.5458127280177445, + "learning_rate": 1.2484867211342653e-05, + "loss": 0.5625143647193909, + "step": 2550 + }, + { + "epoch": 0.8990308370044053, + "grad_norm": 1.5409896244330605, + "learning_rate": 1.2479223137167011e-05, + "loss": 0.6631217002868652, + "step": 2551 + }, + { + "epoch": 0.8993832599118943, + "grad_norm": 1.6071757454969378, + "learning_rate": 1.247357822137227e-05, + "loss": 0.6588548421859741, + "step": 2552 + }, + { + "epoch": 0.8997356828193832, + "grad_norm": 1.4192601474848106, + "learning_rate": 1.24679324658747e-05, + "loss": 0.8046029806137085, + "step": 2553 + }, + { + "epoch": 0.9000881057268723, + "grad_norm": 1.6272051463241026, + "learning_rate": 1.2462285872590862e-05, + "loss": 0.6651894450187683, + "step": 2554 + }, + { + "epoch": 0.9004405286343612, + "grad_norm": 1.5179002680249722, + "learning_rate": 1.2456638443437605e-05, + "loss": 0.5888474583625793, + "step": 2555 + }, + { + "epoch": 0.9007929515418502, + "grad_norm": 1.7319345866859506, + "learning_rate": 1.2450990180332045e-05, + "loss": 0.5915735363960266, + "step": 2556 + }, + { + "epoch": 0.9011453744493392, + "grad_norm": 1.5409991319630119, + "learning_rate": 1.24453410851916e-05, + "loss": 0.6830431222915649, + "step": 2557 + }, + { + "epoch": 0.9014977973568282, + "grad_norm": 1.3954767744454935, + "learning_rate": 1.2439691159933955e-05, + "loss": 0.6812379956245422, + "step": 2558 + }, + { + "epoch": 0.9018502202643172, + "grad_norm": 1.3481753587360845, + "learning_rate": 1.2434040406477092e-05, + "loss": 0.6887152791023254, + "step": 2559 + }, + { + "epoch": 0.9022026431718062, + "grad_norm": 1.495436388275929, + "learning_rate": 1.2428388826739254e-05, + "loss": 0.677071213722229, + "step": 2560 + }, + { + "epoch": 0.9025550660792951, + "grad_norm": 1.5809198519920526, + "learning_rate": 1.242273642263899e-05, + "loss": 0.6635652780532837, + "step": 2561 + }, + { + "epoch": 0.9029074889867842, + "grad_norm": 1.7455357614962055, + "learning_rate": 1.2417083196095105e-05, + "loss": 0.7543712854385376, + "step": 2562 + }, + { + "epoch": 0.9032599118942731, + "grad_norm": 1.743758273604275, + "learning_rate": 1.2411429149026701e-05, + "loss": 0.6219073534011841, + "step": 2563 + }, + { + "epoch": 0.9036123348017621, + "grad_norm": 1.360518097358955, + "learning_rate": 1.2405774283353144e-05, + "loss": 0.6576533317565918, + "step": 2564 + }, + { + "epoch": 0.9039647577092511, + "grad_norm": 1.3683846685040542, + "learning_rate": 1.240011860099409e-05, + "loss": 0.6458585262298584, + "step": 2565 + }, + { + "epoch": 0.90431718061674, + "grad_norm": 1.5753618523282886, + "learning_rate": 1.2394462103869464e-05, + "loss": 0.6943198442459106, + "step": 2566 + }, + { + "epoch": 0.9046696035242291, + "grad_norm": 1.5425443594991994, + "learning_rate": 1.2388804793899473e-05, + "loss": 0.6684235334396362, + "step": 2567 + }, + { + "epoch": 0.905022026431718, + "grad_norm": 1.4432793187881665, + "learning_rate": 1.2383146673004598e-05, + "loss": 0.6707017421722412, + "step": 2568 + }, + { + "epoch": 0.9053744493392071, + "grad_norm": 1.4610510830510222, + "learning_rate": 1.2377487743105593e-05, + "loss": 0.6009544134140015, + "step": 2569 + }, + { + "epoch": 0.905726872246696, + "grad_norm": 1.3343070463381261, + "learning_rate": 1.2371828006123488e-05, + "loss": 0.57770836353302, + "step": 2570 + }, + { + "epoch": 0.9060792951541851, + "grad_norm": 1.50423514822828, + "learning_rate": 1.236616746397959e-05, + "loss": 0.6146866083145142, + "step": 2571 + }, + { + "epoch": 0.906431718061674, + "grad_norm": 1.4060902038910876, + "learning_rate": 1.2360506118595476e-05, + "loss": 0.6374951601028442, + "step": 2572 + }, + { + "epoch": 0.906784140969163, + "grad_norm": 1.5006132241656203, + "learning_rate": 1.2354843971892998e-05, + "loss": 0.6933800578117371, + "step": 2573 + }, + { + "epoch": 0.907136563876652, + "grad_norm": 1.6402374081466708, + "learning_rate": 1.2349181025794278e-05, + "loss": 0.857126772403717, + "step": 2574 + }, + { + "epoch": 0.9074889867841409, + "grad_norm": 1.7970464713795387, + "learning_rate": 1.2343517282221704e-05, + "loss": 0.7316192388534546, + "step": 2575 + }, + { + "epoch": 0.90784140969163, + "grad_norm": 1.7338748475900745, + "learning_rate": 1.2337852743097947e-05, + "loss": 0.7916824817657471, + "step": 2576 + }, + { + "epoch": 0.9081938325991189, + "grad_norm": 1.342845056559204, + "learning_rate": 1.2332187410345941e-05, + "loss": 0.6437021493911743, + "step": 2577 + }, + { + "epoch": 0.908546255506608, + "grad_norm": 1.547322536503476, + "learning_rate": 1.2326521285888892e-05, + "loss": 0.8788109421730042, + "step": 2578 + }, + { + "epoch": 0.9088986784140969, + "grad_norm": 1.4382005842040866, + "learning_rate": 1.2320854371650268e-05, + "loss": 0.704395055770874, + "step": 2579 + }, + { + "epoch": 0.9092511013215859, + "grad_norm": 1.410037340911335, + "learning_rate": 1.2315186669553814e-05, + "loss": 0.6579844951629639, + "step": 2580 + }, + { + "epoch": 0.9096035242290749, + "grad_norm": 1.3089054036910626, + "learning_rate": 1.2309518181523537e-05, + "loss": 0.6329941749572754, + "step": 2581 + }, + { + "epoch": 0.9099559471365639, + "grad_norm": 1.746183595307062, + "learning_rate": 1.2303848909483711e-05, + "loss": 0.8868603706359863, + "step": 2582 + }, + { + "epoch": 0.9103083700440529, + "grad_norm": 1.4531546458491524, + "learning_rate": 1.2298178855358875e-05, + "loss": 0.6402688026428223, + "step": 2583 + }, + { + "epoch": 0.9106607929515419, + "grad_norm": 1.3289180353613772, + "learning_rate": 1.2292508021073846e-05, + "loss": 0.8017194271087646, + "step": 2584 + }, + { + "epoch": 0.9110132158590308, + "grad_norm": 1.6340808373356166, + "learning_rate": 1.2286836408553687e-05, + "loss": 0.7396517992019653, + "step": 2585 + }, + { + "epoch": 0.9113656387665198, + "grad_norm": 1.5443847526543046, + "learning_rate": 1.2281164019723737e-05, + "loss": 0.6123272776603699, + "step": 2586 + }, + { + "epoch": 0.9117180616740088, + "grad_norm": 1.464544186162697, + "learning_rate": 1.2275490856509591e-05, + "loss": 0.7675807476043701, + "step": 2587 + }, + { + "epoch": 0.9120704845814978, + "grad_norm": 1.67164115622116, + "learning_rate": 1.2269816920837121e-05, + "loss": 0.6814998388290405, + "step": 2588 + }, + { + "epoch": 0.9124229074889868, + "grad_norm": 1.3228366401729674, + "learning_rate": 1.2264142214632441e-05, + "loss": 0.6290348768234253, + "step": 2589 + }, + { + "epoch": 0.9127753303964757, + "grad_norm": 1.5676260945728981, + "learning_rate": 1.2258466739821946e-05, + "loss": 0.6752464175224304, + "step": 2590 + }, + { + "epoch": 0.9131277533039648, + "grad_norm": 1.3388236473063337, + "learning_rate": 1.2252790498332275e-05, + "loss": 0.6153687238693237, + "step": 2591 + }, + { + "epoch": 0.9134801762114537, + "grad_norm": 1.5346187118504635, + "learning_rate": 1.2247113492090344e-05, + "loss": 0.5952479839324951, + "step": 2592 + }, + { + "epoch": 0.9138325991189428, + "grad_norm": 1.4457638395568853, + "learning_rate": 1.2241435723023309e-05, + "loss": 0.5457659959793091, + "step": 2593 + }, + { + "epoch": 0.9141850220264317, + "grad_norm": 1.5389040689398128, + "learning_rate": 1.2235757193058607e-05, + "loss": 0.7373491525650024, + "step": 2594 + }, + { + "epoch": 0.9145374449339208, + "grad_norm": 1.3149945847764668, + "learning_rate": 1.2230077904123914e-05, + "loss": 0.6564488410949707, + "step": 2595 + }, + { + "epoch": 0.9148898678414097, + "grad_norm": 1.8716233271125673, + "learning_rate": 1.2224397858147176e-05, + "loss": 0.6790947914123535, + "step": 2596 + }, + { + "epoch": 0.9152422907488986, + "grad_norm": 1.6467277287942856, + "learning_rate": 1.2218717057056592e-05, + "loss": 0.8304486274719238, + "step": 2597 + }, + { + "epoch": 0.9155947136563877, + "grad_norm": 1.7018746535629268, + "learning_rate": 1.2213035502780616e-05, + "loss": 0.7452701330184937, + "step": 2598 + }, + { + "epoch": 0.9159471365638766, + "grad_norm": 1.270448247487427, + "learning_rate": 1.2207353197247957e-05, + "loss": 0.572200357913971, + "step": 2599 + }, + { + "epoch": 0.9162995594713657, + "grad_norm": 1.574291214704138, + "learning_rate": 1.2201670142387587e-05, + "loss": 0.7142342925071716, + "step": 2600 + }, + { + "epoch": 0.9166519823788546, + "grad_norm": 1.367606009894927, + "learning_rate": 1.219598634012872e-05, + "loss": 0.9390528202056885, + "step": 2601 + }, + { + "epoch": 0.9170044052863436, + "grad_norm": 1.6870829349403977, + "learning_rate": 1.2190301792400832e-05, + "loss": 0.6897540092468262, + "step": 2602 + }, + { + "epoch": 0.9173568281938326, + "grad_norm": 1.5631074773710765, + "learning_rate": 1.2184616501133649e-05, + "loss": 0.7309582233428955, + "step": 2603 + }, + { + "epoch": 0.9177092511013216, + "grad_norm": 1.4956685909345118, + "learning_rate": 1.2178930468257154e-05, + "loss": 0.7692370414733887, + "step": 2604 + }, + { + "epoch": 0.9180616740088106, + "grad_norm": 1.6160577913139176, + "learning_rate": 1.2173243695701575e-05, + "loss": 0.7650456428527832, + "step": 2605 + }, + { + "epoch": 0.9184140969162996, + "grad_norm": 1.4419682356133905, + "learning_rate": 1.2167556185397396e-05, + "loss": 0.6000699996948242, + "step": 2606 + }, + { + "epoch": 0.9187665198237885, + "grad_norm": 1.368037173998054, + "learning_rate": 1.2161867939275344e-05, + "loss": 0.6227651834487915, + "step": 2607 + }, + { + "epoch": 0.9191189427312776, + "grad_norm": 1.3507337866227296, + "learning_rate": 1.2156178959266414e-05, + "loss": 0.6554160118103027, + "step": 2608 + }, + { + "epoch": 0.9194713656387665, + "grad_norm": 1.4986959017577084, + "learning_rate": 1.2150489247301826e-05, + "loss": 0.5360773801803589, + "step": 2609 + }, + { + "epoch": 0.9198237885462555, + "grad_norm": 1.3546990782009203, + "learning_rate": 1.2144798805313065e-05, + "loss": 0.7184062004089355, + "step": 2610 + }, + { + "epoch": 0.9201762114537445, + "grad_norm": 1.6293146255106934, + "learning_rate": 1.2139107635231857e-05, + "loss": 0.646910548210144, + "step": 2611 + }, + { + "epoch": 0.9205286343612334, + "grad_norm": 1.449047238736513, + "learning_rate": 1.2133415738990179e-05, + "loss": 0.7794413566589355, + "step": 2612 + }, + { + "epoch": 0.9208810572687225, + "grad_norm": 1.5173448374489182, + "learning_rate": 1.2127723118520254e-05, + "loss": 0.5904654860496521, + "step": 2613 + }, + { + "epoch": 0.9212334801762114, + "grad_norm": 1.6062827687776695, + "learning_rate": 1.2122029775754545e-05, + "loss": 0.5526635646820068, + "step": 2614 + }, + { + "epoch": 0.9215859030837005, + "grad_norm": 1.584080412995617, + "learning_rate": 1.2116335712625766e-05, + "loss": 0.6832528710365295, + "step": 2615 + }, + { + "epoch": 0.9219383259911894, + "grad_norm": 1.5962919739796952, + "learning_rate": 1.211064093106688e-05, + "loss": 0.5858304500579834, + "step": 2616 + }, + { + "epoch": 0.9222907488986785, + "grad_norm": 1.6542154949587857, + "learning_rate": 1.2104945433011079e-05, + "loss": 0.7383478879928589, + "step": 2617 + }, + { + "epoch": 0.9226431718061674, + "grad_norm": 1.4197774198085091, + "learning_rate": 1.2099249220391815e-05, + "loss": 0.6466768980026245, + "step": 2618 + }, + { + "epoch": 0.9229955947136564, + "grad_norm": 1.6780588288371647, + "learning_rate": 1.209355229514277e-05, + "loss": 0.5681238174438477, + "step": 2619 + }, + { + "epoch": 0.9233480176211454, + "grad_norm": 1.4473210287022626, + "learning_rate": 1.2087854659197874e-05, + "loss": 0.5726606249809265, + "step": 2620 + }, + { + "epoch": 0.9237004405286343, + "grad_norm": 1.5671254030487451, + "learning_rate": 1.2082156314491298e-05, + "loss": 0.6643342971801758, + "step": 2621 + }, + { + "epoch": 0.9240528634361234, + "grad_norm": 1.6891696074210503, + "learning_rate": 1.2076457262957454e-05, + "loss": 0.5408967733383179, + "step": 2622 + }, + { + "epoch": 0.9244052863436123, + "grad_norm": 1.503887173232949, + "learning_rate": 1.207075750653099e-05, + "loss": 0.706169843673706, + "step": 2623 + }, + { + "epoch": 0.9247577092511013, + "grad_norm": 1.7934999117659478, + "learning_rate": 1.2065057047146797e-05, + "loss": 0.7973969578742981, + "step": 2624 + }, + { + "epoch": 0.9251101321585903, + "grad_norm": 1.4120942109312036, + "learning_rate": 1.2059355886740002e-05, + "loss": 0.6907010674476624, + "step": 2625 + }, + { + "epoch": 0.9254625550660793, + "grad_norm": 1.8378017160561377, + "learning_rate": 1.2053654027245977e-05, + "loss": 0.8174253702163696, + "step": 2626 + }, + { + "epoch": 0.9258149779735683, + "grad_norm": 1.8873519737119473, + "learning_rate": 1.204795147060032e-05, + "loss": 0.60319983959198, + "step": 2627 + }, + { + "epoch": 0.9261674008810573, + "grad_norm": 2.916318649806586, + "learning_rate": 1.204224821873887e-05, + "loss": 0.718228816986084, + "step": 2628 + }, + { + "epoch": 0.9265198237885462, + "grad_norm": 1.5801609410641386, + "learning_rate": 1.2036544273597708e-05, + "loss": 0.7385132312774658, + "step": 2629 + }, + { + "epoch": 0.9268722466960353, + "grad_norm": 1.5320403236251587, + "learning_rate": 1.203083963711315e-05, + "loss": 0.7700635194778442, + "step": 2630 + }, + { + "epoch": 0.9272246696035242, + "grad_norm": 1.4381703720368488, + "learning_rate": 1.2025134311221732e-05, + "loss": 0.8767666816711426, + "step": 2631 + }, + { + "epoch": 0.9275770925110132, + "grad_norm": 1.4534975042510074, + "learning_rate": 1.2019428297860241e-05, + "loss": 0.6517986059188843, + "step": 2632 + }, + { + "epoch": 0.9279295154185022, + "grad_norm": 1.3295910752440807, + "learning_rate": 1.2013721598965688e-05, + "loss": 0.5967941284179688, + "step": 2633 + }, + { + "epoch": 0.9282819383259912, + "grad_norm": 2.085302745009741, + "learning_rate": 1.2008014216475327e-05, + "loss": 0.7480533123016357, + "step": 2634 + }, + { + "epoch": 0.9286343612334802, + "grad_norm": 1.415633444981562, + "learning_rate": 1.2002306152326626e-05, + "loss": 0.8020488023757935, + "step": 2635 + }, + { + "epoch": 0.9289867841409691, + "grad_norm": 1.235581839334599, + "learning_rate": 1.1996597408457302e-05, + "loss": 0.5535889863967896, + "step": 2636 + }, + { + "epoch": 0.9293392070484582, + "grad_norm": 1.5093780754929471, + "learning_rate": 1.1990887986805295e-05, + "loss": 0.6888864040374756, + "step": 2637 + }, + { + "epoch": 0.9296916299559471, + "grad_norm": 1.761723253773031, + "learning_rate": 1.1985177889308777e-05, + "loss": 0.7723515033721924, + "step": 2638 + }, + { + "epoch": 0.9300440528634362, + "grad_norm": 2.4386861549294476, + "learning_rate": 1.1979467117906143e-05, + "loss": 0.6929488182067871, + "step": 2639 + }, + { + "epoch": 0.9303964757709251, + "grad_norm": 1.7413716913523498, + "learning_rate": 1.1973755674536027e-05, + "loss": 0.7025216221809387, + "step": 2640 + }, + { + "epoch": 0.9307488986784141, + "grad_norm": 1.5278537581621425, + "learning_rate": 1.1968043561137287e-05, + "loss": 0.6618740558624268, + "step": 2641 + }, + { + "epoch": 0.9311013215859031, + "grad_norm": 1.3720349025623486, + "learning_rate": 1.1962330779649002e-05, + "loss": 0.5308352708816528, + "step": 2642 + }, + { + "epoch": 0.931453744493392, + "grad_norm": 1.6043152610659495, + "learning_rate": 1.1956617332010488e-05, + "loss": 0.6559470891952515, + "step": 2643 + }, + { + "epoch": 0.9318061674008811, + "grad_norm": 1.5758989244918422, + "learning_rate": 1.1950903220161286e-05, + "loss": 0.6572221517562866, + "step": 2644 + }, + { + "epoch": 0.93215859030837, + "grad_norm": 1.7357943090474917, + "learning_rate": 1.194518844604115e-05, + "loss": 0.7854161262512207, + "step": 2645 + }, + { + "epoch": 0.932511013215859, + "grad_norm": 1.555855365183626, + "learning_rate": 1.1939473011590075e-05, + "loss": 0.6471760869026184, + "step": 2646 + }, + { + "epoch": 0.932863436123348, + "grad_norm": 1.5672890328663938, + "learning_rate": 1.1933756918748271e-05, + "loss": 0.6261184215545654, + "step": 2647 + }, + { + "epoch": 0.933215859030837, + "grad_norm": 1.425764950800843, + "learning_rate": 1.1928040169456176e-05, + "loss": 0.6876180171966553, + "step": 2648 + }, + { + "epoch": 0.933568281938326, + "grad_norm": 1.6203483271740744, + "learning_rate": 1.1922322765654446e-05, + "loss": 0.6782447099685669, + "step": 2649 + }, + { + "epoch": 0.933920704845815, + "grad_norm": 1.640471126849017, + "learning_rate": 1.1916604709283958e-05, + "loss": 0.6085894107818604, + "step": 2650 + }, + { + "epoch": 0.934273127753304, + "grad_norm": 1.6964969219798813, + "learning_rate": 1.1910886002285822e-05, + "loss": 0.6940577030181885, + "step": 2651 + }, + { + "epoch": 0.934625550660793, + "grad_norm": 1.4704189591593113, + "learning_rate": 1.1905166646601356e-05, + "loss": 0.8204144239425659, + "step": 2652 + }, + { + "epoch": 0.9349779735682819, + "grad_norm": 1.389489538033466, + "learning_rate": 1.1899446644172106e-05, + "loss": 0.6184309720993042, + "step": 2653 + }, + { + "epoch": 0.9353303964757709, + "grad_norm": 2.1507675107714306, + "learning_rate": 1.1893725996939831e-05, + "loss": 0.7499250173568726, + "step": 2654 + }, + { + "epoch": 0.9356828193832599, + "grad_norm": 1.739709417281562, + "learning_rate": 1.1888004706846519e-05, + "loss": 0.7021058797836304, + "step": 2655 + }, + { + "epoch": 0.9360352422907489, + "grad_norm": 1.4311959050457856, + "learning_rate": 1.188228277583436e-05, + "loss": 0.6005666255950928, + "step": 2656 + }, + { + "epoch": 0.9363876651982379, + "grad_norm": 1.4910024814198868, + "learning_rate": 1.1876560205845782e-05, + "loss": 0.6572481393814087, + "step": 2657 + }, + { + "epoch": 0.9367400881057268, + "grad_norm": 1.5258435486694566, + "learning_rate": 1.187083699882341e-05, + "loss": 0.7402434349060059, + "step": 2658 + }, + { + "epoch": 0.9370925110132159, + "grad_norm": 1.4352893489445113, + "learning_rate": 1.1865113156710106e-05, + "loss": 0.6693596243858337, + "step": 2659 + }, + { + "epoch": 0.9374449339207048, + "grad_norm": 1.6704808140330663, + "learning_rate": 1.1859388681448925e-05, + "loss": 0.7708617448806763, + "step": 2660 + }, + { + "epoch": 0.9377973568281939, + "grad_norm": 1.4245143913781195, + "learning_rate": 1.1853663574983154e-05, + "loss": 0.5871701836585999, + "step": 2661 + }, + { + "epoch": 0.9381497797356828, + "grad_norm": 1.505716027406483, + "learning_rate": 1.1847937839256287e-05, + "loss": 0.6492994427680969, + "step": 2662 + }, + { + "epoch": 0.9385022026431719, + "grad_norm": 1.3908643684674444, + "learning_rate": 1.1842211476212038e-05, + "loss": 0.6803429126739502, + "step": 2663 + }, + { + "epoch": 0.9388546255506608, + "grad_norm": 1.5017846140199234, + "learning_rate": 1.1836484487794318e-05, + "loss": 0.5602244734764099, + "step": 2664 + }, + { + "epoch": 0.9392070484581497, + "grad_norm": 1.2797636855685697, + "learning_rate": 1.183075687594727e-05, + "loss": 0.6562157869338989, + "step": 2665 + }, + { + "epoch": 0.9395594713656388, + "grad_norm": 1.4855818018568143, + "learning_rate": 1.182502864261524e-05, + "loss": 0.71474289894104, + "step": 2666 + }, + { + "epoch": 0.9399118942731277, + "grad_norm": 1.5995143445420303, + "learning_rate": 1.1819299789742782e-05, + "loss": 0.7130062580108643, + "step": 2667 + }, + { + "epoch": 0.9402643171806168, + "grad_norm": 1.645740195320987, + "learning_rate": 1.1813570319274663e-05, + "loss": 0.788813054561615, + "step": 2668 + }, + { + "epoch": 0.9406167400881057, + "grad_norm": 1.965041520497338, + "learning_rate": 1.1807840233155863e-05, + "loss": 0.6485022306442261, + "step": 2669 + }, + { + "epoch": 0.9409691629955947, + "grad_norm": 1.6399057690578631, + "learning_rate": 1.1802109533331562e-05, + "loss": 0.4491521418094635, + "step": 2670 + }, + { + "epoch": 0.9413215859030837, + "grad_norm": 1.6744760497066637, + "learning_rate": 1.1796378221747162e-05, + "loss": 0.6073683500289917, + "step": 2671 + }, + { + "epoch": 0.9416740088105727, + "grad_norm": 1.859395754773969, + "learning_rate": 1.179064630034826e-05, + "loss": 0.5942971706390381, + "step": 2672 + }, + { + "epoch": 0.9420264317180617, + "grad_norm": 1.4303169952284007, + "learning_rate": 1.1784913771080667e-05, + "loss": 0.7295013666152954, + "step": 2673 + }, + { + "epoch": 0.9423788546255507, + "grad_norm": 1.8192026049611665, + "learning_rate": 1.1779180635890394e-05, + "loss": 0.7347372770309448, + "step": 2674 + }, + { + "epoch": 0.9427312775330396, + "grad_norm": 1.5350977995485566, + "learning_rate": 1.1773446896723668e-05, + "loss": 0.5591942667961121, + "step": 2675 + }, + { + "epoch": 0.9430837004405286, + "grad_norm": 1.5036340589436215, + "learning_rate": 1.1767712555526911e-05, + "loss": 0.822568953037262, + "step": 2676 + }, + { + "epoch": 0.9434361233480176, + "grad_norm": 1.4619836017557306, + "learning_rate": 1.1761977614246757e-05, + "loss": 0.649920642375946, + "step": 2677 + }, + { + "epoch": 0.9437885462555066, + "grad_norm": 1.4884584586985279, + "learning_rate": 1.1756242074830036e-05, + "loss": 0.6298861503601074, + "step": 2678 + }, + { + "epoch": 0.9441409691629956, + "grad_norm": 1.6194483495779424, + "learning_rate": 1.1750505939223787e-05, + "loss": 0.81938636302948, + "step": 2679 + }, + { + "epoch": 0.9444933920704845, + "grad_norm": 1.4751430048371623, + "learning_rate": 1.1744769209375248e-05, + "loss": 0.6627225875854492, + "step": 2680 + }, + { + "epoch": 0.9448458149779736, + "grad_norm": 1.310837287475738, + "learning_rate": 1.1739031887231864e-05, + "loss": 0.6563318371772766, + "step": 2681 + }, + { + "epoch": 0.9451982378854625, + "grad_norm": 1.3782616320804129, + "learning_rate": 1.1733293974741273e-05, + "loss": 0.5702694654464722, + "step": 2682 + }, + { + "epoch": 0.9455506607929516, + "grad_norm": 1.5543579440741437, + "learning_rate": 1.1727555473851321e-05, + "loss": 0.685553789138794, + "step": 2683 + }, + { + "epoch": 0.9459030837004405, + "grad_norm": 1.2085432227797441, + "learning_rate": 1.172181638651005e-05, + "loss": 0.6092622876167297, + "step": 2684 + }, + { + "epoch": 0.9462555066079296, + "grad_norm": 2.0946243925185013, + "learning_rate": 1.1716076714665701e-05, + "loss": 0.6650614738464355, + "step": 2685 + }, + { + "epoch": 0.9466079295154185, + "grad_norm": 1.6479809864443196, + "learning_rate": 1.171033646026671e-05, + "loss": 0.7665754556655884, + "step": 2686 + }, + { + "epoch": 0.9469603524229074, + "grad_norm": 1.3199886923676785, + "learning_rate": 1.1704595625261722e-05, + "loss": 0.6365277171134949, + "step": 2687 + }, + { + "epoch": 0.9473127753303965, + "grad_norm": 1.4825934002405374, + "learning_rate": 1.1698854211599565e-05, + "loss": 0.6622267961502075, + "step": 2688 + }, + { + "epoch": 0.9476651982378854, + "grad_norm": 1.4519347010464663, + "learning_rate": 1.1693112221229278e-05, + "loss": 0.6636145710945129, + "step": 2689 + }, + { + "epoch": 0.9480176211453745, + "grad_norm": 1.3381328445735352, + "learning_rate": 1.168736965610008e-05, + "loss": 0.6943212747573853, + "step": 2690 + }, + { + "epoch": 0.9483700440528634, + "grad_norm": 1.5439836232478343, + "learning_rate": 1.1681626518161397e-05, + "loss": 0.7479512691497803, + "step": 2691 + }, + { + "epoch": 0.9487224669603525, + "grad_norm": 1.5424571304173897, + "learning_rate": 1.1675882809362846e-05, + "loss": 0.7227041721343994, + "step": 2692 + }, + { + "epoch": 0.9490748898678414, + "grad_norm": 1.3855049912904343, + "learning_rate": 1.1670138531654238e-05, + "loss": 0.7366166114807129, + "step": 2693 + }, + { + "epoch": 0.9494273127753304, + "grad_norm": 1.634945701470733, + "learning_rate": 1.1664393686985571e-05, + "loss": 0.8634493350982666, + "step": 2694 + }, + { + "epoch": 0.9497797356828194, + "grad_norm": 1.3102748532201536, + "learning_rate": 1.165864827730705e-05, + "loss": 0.5802862048149109, + "step": 2695 + }, + { + "epoch": 0.9501321585903084, + "grad_norm": 1.571840947668404, + "learning_rate": 1.1652902304569053e-05, + "loss": 0.5931085348129272, + "step": 2696 + }, + { + "epoch": 0.9504845814977974, + "grad_norm": 1.7175179856841813, + "learning_rate": 1.164715577072217e-05, + "loss": 0.7684508562088013, + "step": 2697 + }, + { + "epoch": 0.9508370044052863, + "grad_norm": 1.6094834386500196, + "learning_rate": 1.1641408677717158e-05, + "loss": 0.94246906042099, + "step": 2698 + }, + { + "epoch": 0.9511894273127753, + "grad_norm": 1.3999360216133725, + "learning_rate": 1.1635661027504985e-05, + "loss": 0.7072316408157349, + "step": 2699 + }, + { + "epoch": 0.9515418502202643, + "grad_norm": 1.5926279454886292, + "learning_rate": 1.16299128220368e-05, + "loss": 0.5872572064399719, + "step": 2700 + }, + { + "epoch": 0.9518942731277533, + "grad_norm": 1.4987885212929257, + "learning_rate": 1.1624164063263931e-05, + "loss": 0.6549060344696045, + "step": 2701 + }, + { + "epoch": 0.9522466960352423, + "grad_norm": 1.6773153304869155, + "learning_rate": 1.161841475313791e-05, + "loss": 0.7338137626647949, + "step": 2702 + }, + { + "epoch": 0.9525991189427313, + "grad_norm": 1.6523970676343225, + "learning_rate": 1.161266489361045e-05, + "loss": 0.6942911148071289, + "step": 2703 + }, + { + "epoch": 0.9529515418502202, + "grad_norm": 2.037450532351288, + "learning_rate": 1.1606914486633444e-05, + "loss": 0.674375057220459, + "step": 2704 + }, + { + "epoch": 0.9533039647577093, + "grad_norm": 1.6450610385875453, + "learning_rate": 1.160116353415898e-05, + "loss": 0.6790377497673035, + "step": 2705 + }, + { + "epoch": 0.9536563876651982, + "grad_norm": 1.6724856793361191, + "learning_rate": 1.1595412038139326e-05, + "loss": 0.5902142524719238, + "step": 2706 + }, + { + "epoch": 0.9540088105726873, + "grad_norm": 1.4286047469499437, + "learning_rate": 1.1589660000526937e-05, + "loss": 0.7034019231796265, + "step": 2707 + }, + { + "epoch": 0.9543612334801762, + "grad_norm": 3.1062423334867106, + "learning_rate": 1.158390742327445e-05, + "loss": 0.6986846923828125, + "step": 2708 + }, + { + "epoch": 0.9547136563876651, + "grad_norm": 1.8367783325674814, + "learning_rate": 1.1578154308334683e-05, + "loss": 0.6972544193267822, + "step": 2709 + }, + { + "epoch": 0.9550660792951542, + "grad_norm": 1.3370474194561557, + "learning_rate": 1.1572400657660646e-05, + "loss": 0.6312702298164368, + "step": 2710 + }, + { + "epoch": 0.9554185022026431, + "grad_norm": 1.7161015062577845, + "learning_rate": 1.1566646473205518e-05, + "loss": 0.7584360241889954, + "step": 2711 + }, + { + "epoch": 0.9557709251101322, + "grad_norm": 1.256436023255263, + "learning_rate": 1.156089175692267e-05, + "loss": 0.700894296169281, + "step": 2712 + }, + { + "epoch": 0.9561233480176211, + "grad_norm": 1.3257581819044393, + "learning_rate": 1.1555136510765645e-05, + "loss": 0.5637902617454529, + "step": 2713 + }, + { + "epoch": 0.9564757709251102, + "grad_norm": 1.388319575976614, + "learning_rate": 1.1549380736688173e-05, + "loss": 0.4537314772605896, + "step": 2714 + }, + { + "epoch": 0.9568281938325991, + "grad_norm": 1.8324279373886256, + "learning_rate": 1.1543624436644161e-05, + "loss": 0.7880423069000244, + "step": 2715 + }, + { + "epoch": 0.9571806167400881, + "grad_norm": 1.6310441104063826, + "learning_rate": 1.1537867612587692e-05, + "loss": 0.7314344644546509, + "step": 2716 + }, + { + "epoch": 0.9575330396475771, + "grad_norm": 1.7810937354544796, + "learning_rate": 1.1532110266473026e-05, + "loss": 0.9550024271011353, + "step": 2717 + }, + { + "epoch": 0.9578854625550661, + "grad_norm": 1.3474455317445524, + "learning_rate": 1.152635240025461e-05, + "loss": 0.6482470035552979, + "step": 2718 + }, + { + "epoch": 0.9582378854625551, + "grad_norm": 1.6637520992254753, + "learning_rate": 1.152059401588705e-05, + "loss": 0.6347365975379944, + "step": 2719 + }, + { + "epoch": 0.958590308370044, + "grad_norm": 1.469780222161662, + "learning_rate": 1.151483511532515e-05, + "loss": 0.7214993238449097, + "step": 2720 + }, + { + "epoch": 0.958942731277533, + "grad_norm": 1.4597118679681749, + "learning_rate": 1.1509075700523869e-05, + "loss": 0.6255312561988831, + "step": 2721 + }, + { + "epoch": 0.959295154185022, + "grad_norm": 1.4735593911126945, + "learning_rate": 1.1503315773438352e-05, + "loss": 0.6152437925338745, + "step": 2722 + }, + { + "epoch": 0.959647577092511, + "grad_norm": 1.8178378627357112, + "learning_rate": 1.1497555336023916e-05, + "loss": 0.6565401554107666, + "step": 2723 + }, + { + "epoch": 0.96, + "grad_norm": 1.5268947365741583, + "learning_rate": 1.1491794390236047e-05, + "loss": 0.796178936958313, + "step": 2724 + }, + { + "epoch": 0.960352422907489, + "grad_norm": 1.4289859748860345, + "learning_rate": 1.1486032938030409e-05, + "loss": 0.6243436336517334, + "step": 2725 + }, + { + "epoch": 0.960704845814978, + "grad_norm": 3.1702620206811036, + "learning_rate": 1.148027098136284e-05, + "loss": 0.6043159365653992, + "step": 2726 + }, + { + "epoch": 0.961057268722467, + "grad_norm": 2.2643023721896554, + "learning_rate": 1.1474508522189334e-05, + "loss": 0.7268002033233643, + "step": 2727 + }, + { + "epoch": 0.9614096916299559, + "grad_norm": 1.6105062692265093, + "learning_rate": 1.1468745562466076e-05, + "loss": 0.6156840324401855, + "step": 2728 + }, + { + "epoch": 0.961762114537445, + "grad_norm": 1.3602355982897767, + "learning_rate": 1.1462982104149409e-05, + "loss": 0.8415796756744385, + "step": 2729 + }, + { + "epoch": 0.9621145374449339, + "grad_norm": 1.7603646172978014, + "learning_rate": 1.145721814919585e-05, + "loss": 0.5983521342277527, + "step": 2730 + }, + { + "epoch": 0.962466960352423, + "grad_norm": 1.6358592349658665, + "learning_rate": 1.1451453699562077e-05, + "loss": 0.6144511699676514, + "step": 2731 + }, + { + "epoch": 0.9628193832599119, + "grad_norm": 1.66844617820458, + "learning_rate": 1.1445688757204942e-05, + "loss": 0.6449630260467529, + "step": 2732 + }, + { + "epoch": 0.9631718061674008, + "grad_norm": 1.5343236560799753, + "learning_rate": 1.1439923324081465e-05, + "loss": 0.7321716547012329, + "step": 2733 + }, + { + "epoch": 0.9635242290748899, + "grad_norm": 1.9877317345810759, + "learning_rate": 1.1434157402148838e-05, + "loss": 0.8354923129081726, + "step": 2734 + }, + { + "epoch": 0.9638766519823788, + "grad_norm": 1.3653549857555707, + "learning_rate": 1.14283909933644e-05, + "loss": 0.728820264339447, + "step": 2735 + }, + { + "epoch": 0.9642290748898679, + "grad_norm": 1.4013626479373464, + "learning_rate": 1.1422624099685675e-05, + "loss": 0.6683202981948853, + "step": 2736 + }, + { + "epoch": 0.9645814977973568, + "grad_norm": 1.6203635868462385, + "learning_rate": 1.141685672307034e-05, + "loss": 0.7159590125083923, + "step": 2737 + }, + { + "epoch": 0.9649339207048458, + "grad_norm": 1.9197883933040156, + "learning_rate": 1.1411088865476245e-05, + "loss": 0.8269981145858765, + "step": 2738 + }, + { + "epoch": 0.9652863436123348, + "grad_norm": 1.7561037821195844, + "learning_rate": 1.1405320528861393e-05, + "loss": 0.6993168592453003, + "step": 2739 + }, + { + "epoch": 0.9656387665198238, + "grad_norm": 1.4700171152077626, + "learning_rate": 1.1399551715183956e-05, + "loss": 0.6296184062957764, + "step": 2740 + }, + { + "epoch": 0.9659911894273128, + "grad_norm": 1.5505746175576802, + "learning_rate": 1.1393782426402267e-05, + "loss": 0.670283317565918, + "step": 2741 + }, + { + "epoch": 0.9663436123348018, + "grad_norm": 1.6125051339337373, + "learning_rate": 1.1388012664474824e-05, + "loss": 0.9248946905136108, + "step": 2742 + }, + { + "epoch": 0.9666960352422908, + "grad_norm": 1.7027770081175677, + "learning_rate": 1.1382242431360272e-05, + "loss": 0.7965992093086243, + "step": 2743 + }, + { + "epoch": 0.9670484581497797, + "grad_norm": 1.6413263453773168, + "learning_rate": 1.1376471729017435e-05, + "loss": 0.632454514503479, + "step": 2744 + }, + { + "epoch": 0.9674008810572687, + "grad_norm": 1.4364322830343181, + "learning_rate": 1.1370700559405283e-05, + "loss": 0.6463649272918701, + "step": 2745 + }, + { + "epoch": 0.9677533039647577, + "grad_norm": 1.5890798975591325, + "learning_rate": 1.1364928924482952e-05, + "loss": 0.5864677429199219, + "step": 2746 + }, + { + "epoch": 0.9681057268722467, + "grad_norm": 1.5090045708209912, + "learning_rate": 1.1359156826209726e-05, + "loss": 0.6313967108726501, + "step": 2747 + }, + { + "epoch": 0.9684581497797357, + "grad_norm": 1.2634359711899723, + "learning_rate": 1.1353384266545056e-05, + "loss": 0.5736903548240662, + "step": 2748 + }, + { + "epoch": 0.9688105726872247, + "grad_norm": 1.3956693120918684, + "learning_rate": 1.1347611247448544e-05, + "loss": 0.672286868095398, + "step": 2749 + }, + { + "epoch": 0.9691629955947136, + "grad_norm": 1.7905269273993527, + "learning_rate": 1.1341837770879957e-05, + "loss": 0.7181379795074463, + "step": 2750 + }, + { + "epoch": 0.9695154185022027, + "grad_norm": 1.3192307426609728, + "learning_rate": 1.1336063838799204e-05, + "loss": 0.6160816550254822, + "step": 2751 + }, + { + "epoch": 0.9698678414096916, + "grad_norm": 1.3858752821091025, + "learning_rate": 1.1330289453166361e-05, + "loss": 0.737337589263916, + "step": 2752 + }, + { + "epoch": 0.9702202643171807, + "grad_norm": 1.4067461052680075, + "learning_rate": 1.1324514615941644e-05, + "loss": 0.6752150058746338, + "step": 2753 + }, + { + "epoch": 0.9705726872246696, + "grad_norm": 1.502210352579975, + "learning_rate": 1.1318739329085438e-05, + "loss": 0.6917784214019775, + "step": 2754 + }, + { + "epoch": 0.9709251101321585, + "grad_norm": 1.873477988490531, + "learning_rate": 1.131296359455827e-05, + "loss": 0.7863353490829468, + "step": 2755 + }, + { + "epoch": 0.9712775330396476, + "grad_norm": 1.338648959960645, + "learning_rate": 1.1307187414320823e-05, + "loss": 0.6236519813537598, + "step": 2756 + }, + { + "epoch": 0.9716299559471365, + "grad_norm": 1.443196389025093, + "learning_rate": 1.130141079033393e-05, + "loss": 0.6957560181617737, + "step": 2757 + }, + { + "epoch": 0.9719823788546256, + "grad_norm": 1.6687230505642796, + "learning_rate": 1.1295633724558574e-05, + "loss": 0.6460270881652832, + "step": 2758 + }, + { + "epoch": 0.9723348017621145, + "grad_norm": 1.4575621917812085, + "learning_rate": 1.1289856218955892e-05, + "loss": 0.7352741956710815, + "step": 2759 + }, + { + "epoch": 0.9726872246696036, + "grad_norm": 1.7999835448567072, + "learning_rate": 1.1284078275487165e-05, + "loss": 0.6285911798477173, + "step": 2760 + }, + { + "epoch": 0.9730396475770925, + "grad_norm": 1.4280819376163427, + "learning_rate": 1.1278299896113823e-05, + "loss": 0.6577984094619751, + "step": 2761 + }, + { + "epoch": 0.9733920704845815, + "grad_norm": 1.4424142490511096, + "learning_rate": 1.1272521082797452e-05, + "loss": 0.6445770859718323, + "step": 2762 + }, + { + "epoch": 0.9737444933920705, + "grad_norm": 1.3911141072298185, + "learning_rate": 1.1266741837499773e-05, + "loss": 0.557687520980835, + "step": 2763 + }, + { + "epoch": 0.9740969162995595, + "grad_norm": 1.559776829553993, + "learning_rate": 1.1260962162182664e-05, + "loss": 0.6117650866508484, + "step": 2764 + }, + { + "epoch": 0.9744493392070485, + "grad_norm": 1.4751836492364416, + "learning_rate": 1.1255182058808143e-05, + "loss": 0.6498113870620728, + "step": 2765 + }, + { + "epoch": 0.9748017621145374, + "grad_norm": 1.9707928584824135, + "learning_rate": 1.1249401529338375e-05, + "loss": 0.8738062381744385, + "step": 2766 + }, + { + "epoch": 0.9751541850220264, + "grad_norm": 1.6389865398372674, + "learning_rate": 1.1243620575735672e-05, + "loss": 0.551408052444458, + "step": 2767 + }, + { + "epoch": 0.9755066079295154, + "grad_norm": 1.645802380531443, + "learning_rate": 1.1237839199962488e-05, + "loss": 0.7197355031967163, + "step": 2768 + }, + { + "epoch": 0.9758590308370044, + "grad_norm": 1.5393826706252047, + "learning_rate": 1.1232057403981415e-05, + "loss": 0.5704015493392944, + "step": 2769 + }, + { + "epoch": 0.9762114537444934, + "grad_norm": 1.373872634740153, + "learning_rate": 1.1226275189755199e-05, + "loss": 0.603929877281189, + "step": 2770 + }, + { + "epoch": 0.9765638766519824, + "grad_norm": 1.731229349756288, + "learning_rate": 1.1220492559246719e-05, + "loss": 0.8652673363685608, + "step": 2771 + }, + { + "epoch": 0.9769162995594713, + "grad_norm": 1.5891679358388853, + "learning_rate": 1.1214709514418998e-05, + "loss": 0.6827684044837952, + "step": 2772 + }, + { + "epoch": 0.9772687224669604, + "grad_norm": 1.3323036683469254, + "learning_rate": 1.1208926057235197e-05, + "loss": 0.5584808588027954, + "step": 2773 + }, + { + "epoch": 0.9776211453744493, + "grad_norm": 1.5495557729443614, + "learning_rate": 1.1203142189658627e-05, + "loss": 0.7242820262908936, + "step": 2774 + }, + { + "epoch": 0.9779735682819384, + "grad_norm": 1.3489108616226997, + "learning_rate": 1.1197357913652725e-05, + "loss": 0.5299571752548218, + "step": 2775 + }, + { + "epoch": 0.9783259911894273, + "grad_norm": 1.8541326435971137, + "learning_rate": 1.1191573231181074e-05, + "loss": 0.69478440284729, + "step": 2776 + }, + { + "epoch": 0.9786784140969162, + "grad_norm": 1.540885425711554, + "learning_rate": 1.1185788144207394e-05, + "loss": 0.6997090578079224, + "step": 2777 + }, + { + "epoch": 0.9790308370044053, + "grad_norm": 1.422432956680528, + "learning_rate": 1.1180002654695543e-05, + "loss": 0.6882679462432861, + "step": 2778 + }, + { + "epoch": 0.9793832599118942, + "grad_norm": 1.5811365233101125, + "learning_rate": 1.1174216764609514e-05, + "loss": 0.6434916257858276, + "step": 2779 + }, + { + "epoch": 0.9797356828193833, + "grad_norm": 1.5811226707061032, + "learning_rate": 1.1168430475913437e-05, + "loss": 0.6614376902580261, + "step": 2780 + }, + { + "epoch": 0.9800881057268722, + "grad_norm": 1.380437766979243, + "learning_rate": 1.1162643790571574e-05, + "loss": 0.6440471410751343, + "step": 2781 + }, + { + "epoch": 0.9804405286343613, + "grad_norm": 1.6997398594970703, + "learning_rate": 1.1156856710548327e-05, + "loss": 0.6493573188781738, + "step": 2782 + }, + { + "epoch": 0.9807929515418502, + "grad_norm": 1.5246321952125226, + "learning_rate": 1.1151069237808231e-05, + "loss": 0.660174548625946, + "step": 2783 + }, + { + "epoch": 0.9811453744493392, + "grad_norm": 1.7392611870715098, + "learning_rate": 1.1145281374315953e-05, + "loss": 0.8041812181472778, + "step": 2784 + }, + { + "epoch": 0.9814977973568282, + "grad_norm": 1.3479949919135392, + "learning_rate": 1.1139493122036289e-05, + "loss": 0.4758625030517578, + "step": 2785 + }, + { + "epoch": 0.9818502202643172, + "grad_norm": 1.6334305751982239, + "learning_rate": 1.113370448293417e-05, + "loss": 0.6482613682746887, + "step": 2786 + }, + { + "epoch": 0.9822026431718062, + "grad_norm": 1.475447708954463, + "learning_rate": 1.1127915458974665e-05, + "loss": 0.6911569237709045, + "step": 2787 + }, + { + "epoch": 0.9825550660792951, + "grad_norm": 1.362340888945518, + "learning_rate": 1.1122126052122963e-05, + "loss": 0.6851824522018433, + "step": 2788 + }, + { + "epoch": 0.9829074889867842, + "grad_norm": 1.5792587066367831, + "learning_rate": 1.111633626434439e-05, + "loss": 0.6405081748962402, + "step": 2789 + }, + { + "epoch": 0.9832599118942731, + "grad_norm": 1.5781550908818451, + "learning_rate": 1.1110546097604391e-05, + "loss": 0.7064476013183594, + "step": 2790 + }, + { + "epoch": 0.9836123348017621, + "grad_norm": 1.4647903320195184, + "learning_rate": 1.1104755553868559e-05, + "loss": 0.641350269317627, + "step": 2791 + }, + { + "epoch": 0.9839647577092511, + "grad_norm": 1.4142953897430577, + "learning_rate": 1.1098964635102597e-05, + "loss": 0.748977780342102, + "step": 2792 + }, + { + "epoch": 0.9843171806167401, + "grad_norm": 1.3989289975006294, + "learning_rate": 1.1093173343272342e-05, + "loss": 0.6033440828323364, + "step": 2793 + }, + { + "epoch": 0.984669603524229, + "grad_norm": 1.2877663440814373, + "learning_rate": 1.1087381680343754e-05, + "loss": 0.5684633255004883, + "step": 2794 + }, + { + "epoch": 0.9850220264317181, + "grad_norm": 1.5189384787980884, + "learning_rate": 1.1081589648282928e-05, + "loss": 0.7041289210319519, + "step": 2795 + }, + { + "epoch": 0.985374449339207, + "grad_norm": 1.5616342989862266, + "learning_rate": 1.1075797249056079e-05, + "loss": 0.7189786434173584, + "step": 2796 + }, + { + "epoch": 0.9857268722466961, + "grad_norm": 1.534620191791425, + "learning_rate": 1.1070004484629543e-05, + "loss": 0.5114344358444214, + "step": 2797 + }, + { + "epoch": 0.986079295154185, + "grad_norm": 1.6541092784437663, + "learning_rate": 1.1064211356969782e-05, + "loss": 0.5897136926651001, + "step": 2798 + }, + { + "epoch": 0.986431718061674, + "grad_norm": 1.5980123151797752, + "learning_rate": 1.1058417868043387e-05, + "loss": 0.8490760326385498, + "step": 2799 + }, + { + "epoch": 0.986784140969163, + "grad_norm": 1.5100542298165633, + "learning_rate": 1.1052624019817065e-05, + "loss": 0.6392524242401123, + "step": 2800 + }, + { + "epoch": 0.9871365638766519, + "grad_norm": 1.5630522519900902, + "learning_rate": 1.104682981425765e-05, + "loss": 0.7267303466796875, + "step": 2801 + }, + { + "epoch": 0.987488986784141, + "grad_norm": 1.5413815660334662, + "learning_rate": 1.1041035253332087e-05, + "loss": 0.6622469425201416, + "step": 2802 + }, + { + "epoch": 0.9878414096916299, + "grad_norm": 1.4547931829788883, + "learning_rate": 1.1035240339007454e-05, + "loss": 0.643883466720581, + "step": 2803 + }, + { + "epoch": 0.988193832599119, + "grad_norm": 1.4919310534649226, + "learning_rate": 1.1029445073250945e-05, + "loss": 0.6281142234802246, + "step": 2804 + }, + { + "epoch": 0.9885462555066079, + "grad_norm": 1.606048707782168, + "learning_rate": 1.1023649458029873e-05, + "loss": 0.6356241703033447, + "step": 2805 + }, + { + "epoch": 0.988898678414097, + "grad_norm": 1.7018688321982895, + "learning_rate": 1.1017853495311664e-05, + "loss": 0.8118115663528442, + "step": 2806 + }, + { + "epoch": 0.9892511013215859, + "grad_norm": 1.4779776881835476, + "learning_rate": 1.1012057187063872e-05, + "loss": 0.7673395276069641, + "step": 2807 + }, + { + "epoch": 0.9896035242290749, + "grad_norm": 1.5158382122898324, + "learning_rate": 1.1006260535254159e-05, + "loss": 0.6617262959480286, + "step": 2808 + }, + { + "epoch": 0.9899559471365639, + "grad_norm": 1.7342419352159402, + "learning_rate": 1.1000463541850315e-05, + "loss": 0.537519097328186, + "step": 2809 + }, + { + "epoch": 0.9903083700440528, + "grad_norm": 1.8093297060046025, + "learning_rate": 1.0994666208820229e-05, + "loss": 0.6281024813652039, + "step": 2810 + }, + { + "epoch": 0.9906607929515419, + "grad_norm": 1.4111971416204439, + "learning_rate": 1.0988868538131922e-05, + "loss": 0.7189136743545532, + "step": 2811 + }, + { + "epoch": 0.9910132158590308, + "grad_norm": 1.3844162550962045, + "learning_rate": 1.098307053175352e-05, + "loss": 0.622093677520752, + "step": 2812 + }, + { + "epoch": 0.9913656387665198, + "grad_norm": 1.4032650881900075, + "learning_rate": 1.0977272191653272e-05, + "loss": 0.6774802207946777, + "step": 2813 + }, + { + "epoch": 0.9917180616740088, + "grad_norm": 1.490303383982121, + "learning_rate": 1.0971473519799523e-05, + "loss": 0.5999646186828613, + "step": 2814 + }, + { + "epoch": 0.9920704845814978, + "grad_norm": 1.3508886274303966, + "learning_rate": 1.096567451816075e-05, + "loss": 0.6450619697570801, + "step": 2815 + }, + { + "epoch": 0.9924229074889868, + "grad_norm": 1.8693455627252262, + "learning_rate": 1.0959875188705529e-05, + "loss": 0.693134069442749, + "step": 2816 + }, + { + "epoch": 0.9927753303964758, + "grad_norm": 1.744167199385734, + "learning_rate": 1.0954075533402557e-05, + "loss": 0.8968616724014282, + "step": 2817 + }, + { + "epoch": 0.9931277533039647, + "grad_norm": 1.5750441805034816, + "learning_rate": 1.0948275554220632e-05, + "loss": 0.6114391088485718, + "step": 2818 + }, + { + "epoch": 0.9934801762114538, + "grad_norm": 1.3761860122661305, + "learning_rate": 1.0942475253128667e-05, + "loss": 0.7583796977996826, + "step": 2819 + }, + { + "epoch": 0.9938325991189427, + "grad_norm": 2.0494911253957735, + "learning_rate": 1.0936674632095683e-05, + "loss": 0.5683549046516418, + "step": 2820 + }, + { + "epoch": 0.9941850220264317, + "grad_norm": 1.4100630352107084, + "learning_rate": 1.0930873693090815e-05, + "loss": 0.5664689540863037, + "step": 2821 + }, + { + "epoch": 0.9945374449339207, + "grad_norm": 1.1859055454278844, + "learning_rate": 1.0925072438083296e-05, + "loss": 0.5799476504325867, + "step": 2822 + }, + { + "epoch": 0.9948898678414096, + "grad_norm": 1.4558284543811444, + "learning_rate": 1.0919270869042475e-05, + "loss": 0.6879112720489502, + "step": 2823 + }, + { + "epoch": 0.9952422907488987, + "grad_norm": 1.3673096151886848, + "learning_rate": 1.09134689879378e-05, + "loss": 0.6348927021026611, + "step": 2824 + }, + { + "epoch": 0.9955947136563876, + "grad_norm": 1.5301215006310536, + "learning_rate": 1.0907666796738839e-05, + "loss": 0.55754554271698, + "step": 2825 + }, + { + "epoch": 0.9959471365638767, + "grad_norm": 1.6611255848189581, + "learning_rate": 1.090186429741524e-05, + "loss": 0.6664899587631226, + "step": 2826 + }, + { + "epoch": 0.9962995594713656, + "grad_norm": 1.3580224067934683, + "learning_rate": 1.0896061491936782e-05, + "loss": 0.6521929502487183, + "step": 2827 + }, + { + "epoch": 0.9966519823788547, + "grad_norm": 1.4217882734660863, + "learning_rate": 1.0890258382273333e-05, + "loss": 0.542471170425415, + "step": 2828 + }, + { + "epoch": 0.9970044052863436, + "grad_norm": 1.3242120868836005, + "learning_rate": 1.0884454970394871e-05, + "loss": 0.60117506980896, + "step": 2829 + }, + { + "epoch": 0.9973568281938326, + "grad_norm": 1.5563969946549858, + "learning_rate": 1.0878651258271471e-05, + "loss": 0.6783676147460938, + "step": 2830 + }, + { + "epoch": 0.9977092511013216, + "grad_norm": 1.4867095260992749, + "learning_rate": 1.0872847247873315e-05, + "loss": 0.7080766558647156, + "step": 2831 + }, + { + "epoch": 0.9980616740088105, + "grad_norm": 1.7595047000981443, + "learning_rate": 1.0867042941170677e-05, + "loss": 0.9228106141090393, + "step": 2832 + }, + { + "epoch": 0.9984140969162996, + "grad_norm": 1.749212162747955, + "learning_rate": 1.086123834013395e-05, + "loss": 0.7601282596588135, + "step": 2833 + }, + { + "epoch": 0.9987665198237885, + "grad_norm": 1.388473564306277, + "learning_rate": 1.0855433446733607e-05, + "loss": 0.7101393342018127, + "step": 2834 + }, + { + "epoch": 0.9991189427312775, + "grad_norm": 1.426665891638417, + "learning_rate": 1.084962826294023e-05, + "loss": 0.5006242394447327, + "step": 2835 + }, + { + "epoch": 0.9994713656387665, + "grad_norm": 1.6063601330711992, + "learning_rate": 1.08438227907245e-05, + "loss": 0.7270148992538452, + "step": 2836 + }, + { + "epoch": 0.9998237885462555, + "grad_norm": 1.5770914971205114, + "learning_rate": 1.0838017032057194e-05, + "loss": 0.7252628803253174, + "step": 2837 + }, + { + "epoch": 1.0, + "grad_norm": 2.9062070384731578, + "learning_rate": 1.0832210988909187e-05, + "loss": 0.4579252004623413, + "step": 2838 + }, + { + "epoch": 1.000352422907489, + "grad_norm": 1.410073366222354, + "learning_rate": 1.0826404663251446e-05, + "loss": 0.635676920413971, + "step": 2839 + }, + { + "epoch": 1.0007048458149779, + "grad_norm": 1.5085425099131595, + "learning_rate": 1.0820598057055039e-05, + "loss": 0.6083015203475952, + "step": 2840 + }, + { + "epoch": 1.001057268722467, + "grad_norm": 1.2571881093552235, + "learning_rate": 1.0814791172291132e-05, + "loss": 0.5641704797744751, + "step": 2841 + }, + { + "epoch": 1.001409691629956, + "grad_norm": 1.448254627835315, + "learning_rate": 1.0808984010930981e-05, + "loss": 0.7668559551239014, + "step": 2842 + }, + { + "epoch": 1.001762114537445, + "grad_norm": 1.7836674103878665, + "learning_rate": 1.0803176574945933e-05, + "loss": 0.5205796957015991, + "step": 2843 + }, + { + "epoch": 1.0021145374449338, + "grad_norm": 1.2460568970106132, + "learning_rate": 1.0797368866307431e-05, + "loss": 0.6771252155303955, + "step": 2844 + }, + { + "epoch": 1.002466960352423, + "grad_norm": 1.3246167691239887, + "learning_rate": 1.0791560886987016e-05, + "loss": 0.6101677417755127, + "step": 2845 + }, + { + "epoch": 1.002819383259912, + "grad_norm": 1.683370422985012, + "learning_rate": 1.0785752638956315e-05, + "loss": 0.5651522874832153, + "step": 2846 + }, + { + "epoch": 1.0031718061674009, + "grad_norm": 1.3543139981801942, + "learning_rate": 1.0779944124187048e-05, + "loss": 0.6814571619033813, + "step": 2847 + }, + { + "epoch": 1.0035242290748898, + "grad_norm": 1.5579116379809095, + "learning_rate": 1.0774135344651023e-05, + "loss": 0.6786171197891235, + "step": 2848 + }, + { + "epoch": 1.003876651982379, + "grad_norm": 1.341282658364188, + "learning_rate": 1.0768326302320136e-05, + "loss": 0.5244907736778259, + "step": 2849 + }, + { + "epoch": 1.004229074889868, + "grad_norm": 1.5100504884551087, + "learning_rate": 1.0762516999166383e-05, + "loss": 0.6368712186813354, + "step": 2850 + }, + { + "epoch": 1.0045814977973568, + "grad_norm": 1.3929085404961679, + "learning_rate": 1.0756707437161841e-05, + "loss": 0.6389411687850952, + "step": 2851 + }, + { + "epoch": 1.0049339207048458, + "grad_norm": 1.796913818431425, + "learning_rate": 1.0750897618278675e-05, + "loss": 0.6257550716400146, + "step": 2852 + }, + { + "epoch": 1.0052863436123347, + "grad_norm": 1.384078231158131, + "learning_rate": 1.0745087544489132e-05, + "loss": 0.49478042125701904, + "step": 2853 + }, + { + "epoch": 1.0056387665198239, + "grad_norm": 1.3713236142324383, + "learning_rate": 1.0739277217765558e-05, + "loss": 0.6350952386856079, + "step": 2854 + }, + { + "epoch": 1.0059911894273128, + "grad_norm": 1.4287669419061304, + "learning_rate": 1.0733466640080374e-05, + "loss": 0.6057480573654175, + "step": 2855 + }, + { + "epoch": 1.0063436123348017, + "grad_norm": 1.5646694084149986, + "learning_rate": 1.0727655813406094e-05, + "loss": 0.5545427799224854, + "step": 2856 + }, + { + "epoch": 1.0066960352422907, + "grad_norm": 1.371726691889951, + "learning_rate": 1.0721844739715311e-05, + "loss": 0.55484938621521, + "step": 2857 + }, + { + "epoch": 1.0070484581497798, + "grad_norm": 1.6325523903522516, + "learning_rate": 1.0716033420980703e-05, + "loss": 0.6889834403991699, + "step": 2858 + }, + { + "epoch": 1.0074008810572688, + "grad_norm": 1.928061303452338, + "learning_rate": 1.0710221859175031e-05, + "loss": 0.7259023189544678, + "step": 2859 + }, + { + "epoch": 1.0077533039647577, + "grad_norm": 1.7213820381224034, + "learning_rate": 1.0704410056271144e-05, + "loss": 0.6200032234191895, + "step": 2860 + }, + { + "epoch": 1.0081057268722466, + "grad_norm": 1.2488919699208767, + "learning_rate": 1.069859801424196e-05, + "loss": 0.5357909202575684, + "step": 2861 + }, + { + "epoch": 1.0084581497797356, + "grad_norm": 1.462725629247434, + "learning_rate": 1.0692785735060495e-05, + "loss": 0.8121966123580933, + "step": 2862 + }, + { + "epoch": 1.0088105726872247, + "grad_norm": 1.5047486906511685, + "learning_rate": 1.0686973220699834e-05, + "loss": 0.5698819160461426, + "step": 2863 + }, + { + "epoch": 1.0091629955947137, + "grad_norm": 1.3352019656375154, + "learning_rate": 1.0681160473133144e-05, + "loss": 0.6598206162452698, + "step": 2864 + }, + { + "epoch": 1.0095154185022026, + "grad_norm": 1.571854196128042, + "learning_rate": 1.0675347494333667e-05, + "loss": 0.7574363946914673, + "step": 2865 + }, + { + "epoch": 1.0098678414096915, + "grad_norm": 2.0265508752029007, + "learning_rate": 1.0669534286274737e-05, + "loss": 0.6749663949012756, + "step": 2866 + }, + { + "epoch": 1.0102202643171807, + "grad_norm": 1.5445692097493786, + "learning_rate": 1.0663720850929753e-05, + "loss": 0.5932409763336182, + "step": 2867 + }, + { + "epoch": 1.0105726872246696, + "grad_norm": 1.4883467064779885, + "learning_rate": 1.0657907190272197e-05, + "loss": 0.7070773839950562, + "step": 2868 + }, + { + "epoch": 1.0109251101321586, + "grad_norm": 1.6639794076635466, + "learning_rate": 1.0652093306275621e-05, + "loss": 0.531635582447052, + "step": 2869 + }, + { + "epoch": 1.0112775330396475, + "grad_norm": 1.5967103256398283, + "learning_rate": 1.0646279200913665e-05, + "loss": 0.5966447591781616, + "step": 2870 + }, + { + "epoch": 1.0116299559471367, + "grad_norm": 1.5047477869564347, + "learning_rate": 1.0640464876160033e-05, + "loss": 0.6308450698852539, + "step": 2871 + }, + { + "epoch": 1.0119823788546256, + "grad_norm": 1.6938927429813924, + "learning_rate": 1.0634650333988508e-05, + "loss": 0.6477035284042358, + "step": 2872 + }, + { + "epoch": 1.0123348017621145, + "grad_norm": 1.4725648899614407, + "learning_rate": 1.0628835576372942e-05, + "loss": 0.5856079459190369, + "step": 2873 + }, + { + "epoch": 1.0126872246696035, + "grad_norm": 1.6415031005435194, + "learning_rate": 1.062302060528727e-05, + "loss": 0.733691930770874, + "step": 2874 + }, + { + "epoch": 1.0130396475770924, + "grad_norm": 1.6528326658043055, + "learning_rate": 1.0617205422705495e-05, + "loss": 0.6020156145095825, + "step": 2875 + }, + { + "epoch": 1.0133920704845816, + "grad_norm": 1.5978613503890422, + "learning_rate": 1.0611390030601685e-05, + "loss": 0.4980982542037964, + "step": 2876 + }, + { + "epoch": 1.0137444933920705, + "grad_norm": 1.5178573200522583, + "learning_rate": 1.0605574430949983e-05, + "loss": 0.6498349905014038, + "step": 2877 + }, + { + "epoch": 1.0140969162995594, + "grad_norm": 1.7318519084472541, + "learning_rate": 1.0599758625724612e-05, + "loss": 0.6456383466720581, + "step": 2878 + }, + { + "epoch": 1.0144493392070484, + "grad_norm": 1.7056738628689527, + "learning_rate": 1.059394261689985e-05, + "loss": 0.6047386527061462, + "step": 2879 + }, + { + "epoch": 1.0148017621145375, + "grad_norm": 1.6633316847391189, + "learning_rate": 1.0588126406450056e-05, + "loss": 0.641674816608429, + "step": 2880 + }, + { + "epoch": 1.0151541850220265, + "grad_norm": 1.549495353719679, + "learning_rate": 1.0582309996349648e-05, + "loss": 0.6157702207565308, + "step": 2881 + }, + { + "epoch": 1.0155066079295154, + "grad_norm": 1.614686141937513, + "learning_rate": 1.057649338857312e-05, + "loss": 0.6004809737205505, + "step": 2882 + }, + { + "epoch": 1.0158590308370044, + "grad_norm": 1.460588924951717, + "learning_rate": 1.0570676585095028e-05, + "loss": 0.5534430742263794, + "step": 2883 + }, + { + "epoch": 1.0162114537444933, + "grad_norm": 2.0058626486485367, + "learning_rate": 1.0564859587889997e-05, + "loss": 0.7781813144683838, + "step": 2884 + }, + { + "epoch": 1.0165638766519824, + "grad_norm": 1.9228872779765243, + "learning_rate": 1.0559042398932713e-05, + "loss": 0.6949760913848877, + "step": 2885 + }, + { + "epoch": 1.0169162995594714, + "grad_norm": 1.51396598780538, + "learning_rate": 1.0553225020197932e-05, + "loss": 0.5718453526496887, + "step": 2886 + }, + { + "epoch": 1.0172687224669603, + "grad_norm": 1.7835909963123882, + "learning_rate": 1.0547407453660471e-05, + "loss": 0.6689345836639404, + "step": 2887 + }, + { + "epoch": 1.0176211453744493, + "grad_norm": 1.5559332596209525, + "learning_rate": 1.0541589701295222e-05, + "loss": 0.6615442037582397, + "step": 2888 + }, + { + "epoch": 1.0179735682819384, + "grad_norm": 1.4810070180145358, + "learning_rate": 1.0535771765077121e-05, + "loss": 0.6458337306976318, + "step": 2889 + }, + { + "epoch": 1.0183259911894273, + "grad_norm": 1.4770072284014752, + "learning_rate": 1.052995364698118e-05, + "loss": 0.5330519676208496, + "step": 2890 + }, + { + "epoch": 1.0186784140969163, + "grad_norm": 1.4780636522187705, + "learning_rate": 1.0524135348982467e-05, + "loss": 0.6219571232795715, + "step": 2891 + }, + { + "epoch": 1.0190308370044052, + "grad_norm": 1.4624191661889683, + "learning_rate": 1.0518316873056118e-05, + "loss": 0.6731684803962708, + "step": 2892 + }, + { + "epoch": 1.0193832599118944, + "grad_norm": 1.614741871357758, + "learning_rate": 1.0512498221177319e-05, + "loss": 0.6126813888549805, + "step": 2893 + }, + { + "epoch": 1.0197356828193833, + "grad_norm": 1.4895494518265573, + "learning_rate": 1.0506679395321325e-05, + "loss": 0.5796904563903809, + "step": 2894 + }, + { + "epoch": 1.0200881057268723, + "grad_norm": 1.5545739969005041, + "learning_rate": 1.050086039746344e-05, + "loss": 0.5765914916992188, + "step": 2895 + }, + { + "epoch": 1.0204405286343612, + "grad_norm": 1.3710954206781227, + "learning_rate": 1.0495041229579043e-05, + "loss": 0.4798969328403473, + "step": 2896 + }, + { + "epoch": 1.0207929515418501, + "grad_norm": 1.551476741605498, + "learning_rate": 1.0489221893643553e-05, + "loss": 0.673927366733551, + "step": 2897 + }, + { + "epoch": 1.0211453744493393, + "grad_norm": 1.6211129054938926, + "learning_rate": 1.0483402391632453e-05, + "loss": 0.5681431293487549, + "step": 2898 + }, + { + "epoch": 1.0214977973568282, + "grad_norm": 1.3128793329209902, + "learning_rate": 1.0477582725521287e-05, + "loss": 0.6156354546546936, + "step": 2899 + }, + { + "epoch": 1.0218502202643172, + "grad_norm": 1.4369078255379546, + "learning_rate": 1.0471762897285652e-05, + "loss": 0.6569045782089233, + "step": 2900 + }, + { + "epoch": 1.022202643171806, + "grad_norm": 1.4293089736412674, + "learning_rate": 1.046594290890119e-05, + "loss": 0.6125048995018005, + "step": 2901 + }, + { + "epoch": 1.0225550660792952, + "grad_norm": 1.6465466140905431, + "learning_rate": 1.0460122762343614e-05, + "loss": 0.604046106338501, + "step": 2902 + }, + { + "epoch": 1.0229074889867842, + "grad_norm": 1.5461286198100506, + "learning_rate": 1.0454302459588677e-05, + "loss": 0.4569816589355469, + "step": 2903 + }, + { + "epoch": 1.0232599118942731, + "grad_norm": 1.6187784923192434, + "learning_rate": 1.0448482002612194e-05, + "loss": 0.5764607787132263, + "step": 2904 + }, + { + "epoch": 1.023612334801762, + "grad_norm": 1.503585291483294, + "learning_rate": 1.044266139339003e-05, + "loss": 0.5859626531600952, + "step": 2905 + }, + { + "epoch": 1.023964757709251, + "grad_norm": 1.6642769825669268, + "learning_rate": 1.04368406338981e-05, + "loss": 0.7326341271400452, + "step": 2906 + }, + { + "epoch": 1.0243171806167402, + "grad_norm": 1.613324765385094, + "learning_rate": 1.0431019726112366e-05, + "loss": 0.6355161070823669, + "step": 2907 + }, + { + "epoch": 1.024669603524229, + "grad_norm": 1.5833367942965741, + "learning_rate": 1.0425198672008851e-05, + "loss": 0.6990653872489929, + "step": 2908 + }, + { + "epoch": 1.025022026431718, + "grad_norm": 2.3098262824716542, + "learning_rate": 1.0419377473563621e-05, + "loss": 0.631952166557312, + "step": 2909 + }, + { + "epoch": 1.025374449339207, + "grad_norm": 1.4397039525414863, + "learning_rate": 1.041355613275279e-05, + "loss": 0.4872596561908722, + "step": 2910 + }, + { + "epoch": 1.0257268722466961, + "grad_norm": 1.5222931253330352, + "learning_rate": 1.0407734651552522e-05, + "loss": 0.5334043502807617, + "step": 2911 + }, + { + "epoch": 1.026079295154185, + "grad_norm": 1.5817730675020623, + "learning_rate": 1.0401913031939026e-05, + "loss": 0.5971134305000305, + "step": 2912 + }, + { + "epoch": 1.026431718061674, + "grad_norm": 1.7562208471394358, + "learning_rate": 1.0396091275888567e-05, + "loss": 0.6527851819992065, + "step": 2913 + }, + { + "epoch": 1.026784140969163, + "grad_norm": 1.5387477454353993, + "learning_rate": 1.0390269385377444e-05, + "loss": 0.4515818953514099, + "step": 2914 + }, + { + "epoch": 1.027136563876652, + "grad_norm": 1.4624804092376522, + "learning_rate": 1.0384447362382013e-05, + "loss": 0.530797004699707, + "step": 2915 + }, + { + "epoch": 1.027488986784141, + "grad_norm": 1.4915704465108583, + "learning_rate": 1.0378625208878666e-05, + "loss": 0.5477641224861145, + "step": 2916 + }, + { + "epoch": 1.02784140969163, + "grad_norm": 1.6025052451883606, + "learning_rate": 1.0372802926843843e-05, + "loss": 0.6390479207038879, + "step": 2917 + }, + { + "epoch": 1.028193832599119, + "grad_norm": 1.5706073153963707, + "learning_rate": 1.0366980518254028e-05, + "loss": 0.610755443572998, + "step": 2918 + }, + { + "epoch": 1.0285462555066078, + "grad_norm": 1.4805888577219812, + "learning_rate": 1.036115798508575e-05, + "loss": 0.5427766442298889, + "step": 2919 + }, + { + "epoch": 1.028898678414097, + "grad_norm": 1.4610582929917253, + "learning_rate": 1.0355335329315573e-05, + "loss": 0.621055006980896, + "step": 2920 + }, + { + "epoch": 1.029251101321586, + "grad_norm": 1.7760527372961, + "learning_rate": 1.0349512552920114e-05, + "loss": 0.6098253726959229, + "step": 2921 + }, + { + "epoch": 1.0296035242290749, + "grad_norm": 1.8967300437588117, + "learning_rate": 1.0343689657876017e-05, + "loss": 0.591664731502533, + "step": 2922 + }, + { + "epoch": 1.0299559471365638, + "grad_norm": 1.616730113059231, + "learning_rate": 1.033786664615998e-05, + "loss": 0.6531485915184021, + "step": 2923 + }, + { + "epoch": 1.030308370044053, + "grad_norm": 1.5937698715448299, + "learning_rate": 1.0332043519748727e-05, + "loss": 0.6933655738830566, + "step": 2924 + }, + { + "epoch": 1.030660792951542, + "grad_norm": 1.5987643686429562, + "learning_rate": 1.0326220280619036e-05, + "loss": 0.6512705087661743, + "step": 2925 + }, + { + "epoch": 1.0310132158590308, + "grad_norm": 1.829250792437923, + "learning_rate": 1.0320396930747712e-05, + "loss": 0.5671502947807312, + "step": 2926 + }, + { + "epoch": 1.0313656387665198, + "grad_norm": 1.6239123058071627, + "learning_rate": 1.0314573472111601e-05, + "loss": 0.6795192360877991, + "step": 2927 + }, + { + "epoch": 1.0317180616740087, + "grad_norm": 1.5985127083182307, + "learning_rate": 1.0308749906687585e-05, + "loss": 0.6357578039169312, + "step": 2928 + }, + { + "epoch": 1.0320704845814979, + "grad_norm": 1.6982196546251649, + "learning_rate": 1.0302926236452588e-05, + "loss": 0.7009944915771484, + "step": 2929 + }, + { + "epoch": 1.0324229074889868, + "grad_norm": 1.4806960711115318, + "learning_rate": 1.0297102463383557e-05, + "loss": 0.4685679078102112, + "step": 2930 + }, + { + "epoch": 1.0327753303964757, + "grad_norm": 1.5429925693746163, + "learning_rate": 1.0291278589457488e-05, + "loss": 0.6359078884124756, + "step": 2931 + }, + { + "epoch": 1.0331277533039647, + "grad_norm": 1.8631741910761805, + "learning_rate": 1.0285454616651398e-05, + "loss": 0.6606266498565674, + "step": 2932 + }, + { + "epoch": 1.0334801762114538, + "grad_norm": 1.7076039728900445, + "learning_rate": 1.0279630546942353e-05, + "loss": 0.5405932664871216, + "step": 2933 + }, + { + "epoch": 1.0338325991189428, + "grad_norm": 1.4934491606364382, + "learning_rate": 1.0273806382307443e-05, + "loss": 0.8072758316993713, + "step": 2934 + }, + { + "epoch": 1.0341850220264317, + "grad_norm": 1.5899951805886359, + "learning_rate": 1.0267982124723783e-05, + "loss": 0.6923058032989502, + "step": 2935 + }, + { + "epoch": 1.0345374449339206, + "grad_norm": 1.7156977270346485, + "learning_rate": 1.0262157776168533e-05, + "loss": 0.5577275156974792, + "step": 2936 + }, + { + "epoch": 1.0348898678414098, + "grad_norm": 1.6363417924911698, + "learning_rate": 1.0256333338618875e-05, + "loss": 0.6780786514282227, + "step": 2937 + }, + { + "epoch": 1.0352422907488987, + "grad_norm": 1.6093019454005904, + "learning_rate": 1.0250508814052029e-05, + "loss": 0.6966040134429932, + "step": 2938 + }, + { + "epoch": 1.0355947136563877, + "grad_norm": 1.4912092272159942, + "learning_rate": 1.0244684204445237e-05, + "loss": 0.5726339817047119, + "step": 2939 + }, + { + "epoch": 1.0359471365638766, + "grad_norm": 1.372791278777169, + "learning_rate": 1.0238859511775768e-05, + "loss": 0.64924156665802, + "step": 2940 + }, + { + "epoch": 1.0362995594713655, + "grad_norm": 1.5498611273448277, + "learning_rate": 1.0233034738020933e-05, + "loss": 0.49121707677841187, + "step": 2941 + }, + { + "epoch": 1.0366519823788547, + "grad_norm": 1.4698297870867278, + "learning_rate": 1.0227209885158053e-05, + "loss": 0.5505814552307129, + "step": 2942 + }, + { + "epoch": 1.0370044052863436, + "grad_norm": 1.658171020881214, + "learning_rate": 1.022138495516449e-05, + "loss": 0.7429872751235962, + "step": 2943 + }, + { + "epoch": 1.0373568281938326, + "grad_norm": 1.5946562373848934, + "learning_rate": 1.0215559950017624e-05, + "loss": 0.6492434740066528, + "step": 2944 + }, + { + "epoch": 1.0377092511013215, + "grad_norm": 1.5139165780476451, + "learning_rate": 1.0209734871694865e-05, + "loss": 0.5418736338615417, + "step": 2945 + }, + { + "epoch": 1.0380616740088107, + "grad_norm": 1.676058492453494, + "learning_rate": 1.0203909722173644e-05, + "loss": 0.6252620220184326, + "step": 2946 + }, + { + "epoch": 1.0384140969162996, + "grad_norm": 1.4699238771485563, + "learning_rate": 1.0198084503431416e-05, + "loss": 0.5124455690383911, + "step": 2947 + }, + { + "epoch": 1.0387665198237885, + "grad_norm": 1.4358343290990208, + "learning_rate": 1.0192259217445663e-05, + "loss": 0.5729688405990601, + "step": 2948 + }, + { + "epoch": 1.0391189427312775, + "grad_norm": 1.8222711908460536, + "learning_rate": 1.0186433866193893e-05, + "loss": 0.5891536474227905, + "step": 2949 + }, + { + "epoch": 1.0394713656387666, + "grad_norm": 1.7110443983801997, + "learning_rate": 1.0180608451653626e-05, + "loss": 0.774397075176239, + "step": 2950 + }, + { + "epoch": 1.0398237885462556, + "grad_norm": 1.4480826912481708, + "learning_rate": 1.0174782975802408e-05, + "loss": 0.5987098813056946, + "step": 2951 + }, + { + "epoch": 1.0401762114537445, + "grad_norm": 1.634577600554869, + "learning_rate": 1.016895744061781e-05, + "loss": 0.5334598422050476, + "step": 2952 + }, + { + "epoch": 1.0405286343612334, + "grad_norm": 1.7236175912347957, + "learning_rate": 1.0163131848077421e-05, + "loss": 0.5946340560913086, + "step": 2953 + }, + { + "epoch": 1.0408810572687224, + "grad_norm": 1.601606630295311, + "learning_rate": 1.0157306200158847e-05, + "loss": 0.5780941247940063, + "step": 2954 + }, + { + "epoch": 1.0412334801762115, + "grad_norm": 1.6785528445522104, + "learning_rate": 1.0151480498839712e-05, + "loss": 0.6348963975906372, + "step": 2955 + }, + { + "epoch": 1.0415859030837005, + "grad_norm": 1.717999985242494, + "learning_rate": 1.014565474609766e-05, + "loss": 0.6868102550506592, + "step": 2956 + }, + { + "epoch": 1.0419383259911894, + "grad_norm": 1.6612318546166622, + "learning_rate": 1.0139828943910358e-05, + "loss": 0.6507548689842224, + "step": 2957 + }, + { + "epoch": 1.0422907488986783, + "grad_norm": 1.7617270521903845, + "learning_rate": 1.0134003094255478e-05, + "loss": 0.6358312964439392, + "step": 2958 + }, + { + "epoch": 1.0426431718061675, + "grad_norm": 1.5725895362844704, + "learning_rate": 1.0128177199110723e-05, + "loss": 0.7530224919319153, + "step": 2959 + }, + { + "epoch": 1.0429955947136564, + "grad_norm": 1.5496338862557548, + "learning_rate": 1.012235126045379e-05, + "loss": 0.545819878578186, + "step": 2960 + }, + { + "epoch": 1.0433480176211454, + "grad_norm": 1.5828250584633938, + "learning_rate": 1.011652528026242e-05, + "loss": 0.6626788377761841, + "step": 2961 + }, + { + "epoch": 1.0437004405286343, + "grad_norm": 1.6913571400986156, + "learning_rate": 1.0110699260514336e-05, + "loss": 0.6407896280288696, + "step": 2962 + }, + { + "epoch": 1.0440528634361232, + "grad_norm": 1.4558906354554821, + "learning_rate": 1.0104873203187307e-05, + "loss": 0.5633673667907715, + "step": 2963 + }, + { + "epoch": 1.0444052863436124, + "grad_norm": 1.6991226564822444, + "learning_rate": 1.0099047110259081e-05, + "loss": 0.5356892943382263, + "step": 2964 + }, + { + "epoch": 1.0447577092511013, + "grad_norm": 1.6571256461175092, + "learning_rate": 1.0093220983707448e-05, + "loss": 0.5527205467224121, + "step": 2965 + }, + { + "epoch": 1.0451101321585903, + "grad_norm": 1.5928434384321621, + "learning_rate": 1.008739482551019e-05, + "loss": 0.6148320436477661, + "step": 2966 + }, + { + "epoch": 1.0454625550660792, + "grad_norm": 1.8604930696261837, + "learning_rate": 1.0081568637645111e-05, + "loss": 0.5713976621627808, + "step": 2967 + }, + { + "epoch": 1.0458149779735684, + "grad_norm": 1.4811105317563769, + "learning_rate": 1.0075742422090015e-05, + "loss": 0.5836226940155029, + "step": 2968 + }, + { + "epoch": 1.0461674008810573, + "grad_norm": 1.829134506733255, + "learning_rate": 1.0069916180822727e-05, + "loss": 0.6452749371528625, + "step": 2969 + }, + { + "epoch": 1.0465198237885462, + "grad_norm": 1.507975881410604, + "learning_rate": 1.006408991582107e-05, + "loss": 0.5468501448631287, + "step": 2970 + }, + { + "epoch": 1.0468722466960352, + "grad_norm": 1.6217984014708016, + "learning_rate": 1.0058263629062883e-05, + "loss": 0.5195704698562622, + "step": 2971 + }, + { + "epoch": 1.0472246696035241, + "grad_norm": 1.603914403857505, + "learning_rate": 1.0052437322526003e-05, + "loss": 0.5144641995429993, + "step": 2972 + }, + { + "epoch": 1.0475770925110133, + "grad_norm": 1.767647834896278, + "learning_rate": 1.004661099818829e-05, + "loss": 0.7258927822113037, + "step": 2973 + }, + { + "epoch": 1.0479295154185022, + "grad_norm": 1.8920163745404244, + "learning_rate": 1.004078465802759e-05, + "loss": 0.6108053922653198, + "step": 2974 + }, + { + "epoch": 1.0482819383259911, + "grad_norm": 1.5703096539855212, + "learning_rate": 1.0034958304021766e-05, + "loss": 0.612535834312439, + "step": 2975 + }, + { + "epoch": 1.04863436123348, + "grad_norm": 1.6902304674604145, + "learning_rate": 1.0029131938148686e-05, + "loss": 0.7272380590438843, + "step": 2976 + }, + { + "epoch": 1.0489867841409692, + "grad_norm": 1.4306480582223446, + "learning_rate": 1.0023305562386222e-05, + "loss": 0.4748264253139496, + "step": 2977 + }, + { + "epoch": 1.0493392070484582, + "grad_norm": 1.7625234188194432, + "learning_rate": 1.0017479178712245e-05, + "loss": 0.6686758399009705, + "step": 2978 + }, + { + "epoch": 1.0496916299559471, + "grad_norm": 1.6796969203533192, + "learning_rate": 1.0011652789104631e-05, + "loss": 0.5003838539123535, + "step": 2979 + }, + { + "epoch": 1.050044052863436, + "grad_norm": 1.7305572983583226, + "learning_rate": 1.0005826395541257e-05, + "loss": 0.6210055351257324, + "step": 2980 + }, + { + "epoch": 1.0503964757709252, + "grad_norm": 1.6943397299052507, + "learning_rate": 1e-05, + "loss": 0.6160269975662231, + "step": 2981 + }, + { + "epoch": 1.0507488986784141, + "grad_norm": 1.6249468093767248, + "learning_rate": 9.994173604458748e-06, + "loss": 0.6432052850723267, + "step": 2982 + }, + { + "epoch": 1.051101321585903, + "grad_norm": 1.6764234439374022, + "learning_rate": 9.988347210895372e-06, + "loss": 0.588628888130188, + "step": 2983 + }, + { + "epoch": 1.051453744493392, + "grad_norm": 1.5595740377523009, + "learning_rate": 9.982520821287758e-06, + "loss": 0.6694320440292358, + "step": 2984 + }, + { + "epoch": 1.051806167400881, + "grad_norm": 1.7276474901524372, + "learning_rate": 9.976694437613778e-06, + "loss": 0.8591301441192627, + "step": 2985 + }, + { + "epoch": 1.0521585903083701, + "grad_norm": 1.6697380234108412, + "learning_rate": 9.970868061851315e-06, + "loss": 0.6000436544418335, + "step": 2986 + }, + { + "epoch": 1.052511013215859, + "grad_norm": 1.5357275356358564, + "learning_rate": 9.965041695978239e-06, + "loss": 0.624568521976471, + "step": 2987 + }, + { + "epoch": 1.052863436123348, + "grad_norm": 1.4223866897031825, + "learning_rate": 9.959215341972414e-06, + "loss": 0.6173535585403442, + "step": 2988 + }, + { + "epoch": 1.053215859030837, + "grad_norm": 1.7069399452687213, + "learning_rate": 9.953389001811716e-06, + "loss": 0.5991729497909546, + "step": 2989 + }, + { + "epoch": 1.053568281938326, + "grad_norm": 1.782972390393551, + "learning_rate": 9.947562677473999e-06, + "loss": 0.570953905582428, + "step": 2990 + }, + { + "epoch": 1.053920704845815, + "grad_norm": 1.7332305108715658, + "learning_rate": 9.941736370937119e-06, + "loss": 0.6079390048980713, + "step": 2991 + }, + { + "epoch": 1.054273127753304, + "grad_norm": 2.110617001097567, + "learning_rate": 9.935910084178934e-06, + "loss": 0.599539577960968, + "step": 2992 + }, + { + "epoch": 1.0546255506607929, + "grad_norm": 1.5854202353385896, + "learning_rate": 9.930083819177273e-06, + "loss": 0.6736180186271667, + "step": 2993 + }, + { + "epoch": 1.054977973568282, + "grad_norm": 1.6240153775210555, + "learning_rate": 9.924257577909987e-06, + "loss": 0.6953197717666626, + "step": 2994 + }, + { + "epoch": 1.055330396475771, + "grad_norm": 1.8737137053755175, + "learning_rate": 9.918431362354892e-06, + "loss": 0.6670099496841431, + "step": 2995 + }, + { + "epoch": 1.05568281938326, + "grad_norm": 1.844007753613641, + "learning_rate": 9.912605174489811e-06, + "loss": 0.5829994678497314, + "step": 2996 + }, + { + "epoch": 1.0560352422907489, + "grad_norm": 1.9198236703913207, + "learning_rate": 9.906779016292554e-06, + "loss": 0.5926212072372437, + "step": 2997 + }, + { + "epoch": 1.0563876651982378, + "grad_norm": 1.4868752944824364, + "learning_rate": 9.900952889740922e-06, + "loss": 0.6085237860679626, + "step": 2998 + }, + { + "epoch": 1.056740088105727, + "grad_norm": 1.8046049827658854, + "learning_rate": 9.895126796812698e-06, + "loss": 0.5348918437957764, + "step": 2999 + }, + { + "epoch": 1.0570925110132159, + "grad_norm": 1.79509807280399, + "learning_rate": 9.889300739485666e-06, + "loss": 0.6325811743736267, + "step": 3000 + }, + { + "epoch": 1.0574449339207048, + "grad_norm": 1.6006099839795653, + "learning_rate": 9.883474719737582e-06, + "loss": 0.6262463927268982, + "step": 3001 + }, + { + "epoch": 1.0577973568281938, + "grad_norm": 1.5914788157951554, + "learning_rate": 9.877648739546213e-06, + "loss": 0.5863393545150757, + "step": 3002 + }, + { + "epoch": 1.058149779735683, + "grad_norm": 2.0254476885032924, + "learning_rate": 9.871822800889284e-06, + "loss": 0.6200219392776489, + "step": 3003 + }, + { + "epoch": 1.0585022026431719, + "grad_norm": 1.6216300774961065, + "learning_rate": 9.865996905744523e-06, + "loss": 0.6994227170944214, + "step": 3004 + }, + { + "epoch": 1.0588546255506608, + "grad_norm": 1.735404014120002, + "learning_rate": 9.860171056089646e-06, + "loss": 0.6458406448364258, + "step": 3005 + }, + { + "epoch": 1.0592070484581497, + "grad_norm": 1.6209915560634427, + "learning_rate": 9.854345253902342e-06, + "loss": 0.6814782619476318, + "step": 3006 + }, + { + "epoch": 1.0595594713656387, + "grad_norm": 1.455508358080935, + "learning_rate": 9.84851950116029e-06, + "loss": 0.521275520324707, + "step": 3007 + }, + { + "epoch": 1.0599118942731278, + "grad_norm": 1.486020788258086, + "learning_rate": 9.84269379984116e-06, + "loss": 0.5541207790374756, + "step": 3008 + }, + { + "epoch": 1.0602643171806168, + "grad_norm": 1.7060435970959642, + "learning_rate": 9.836868151922579e-06, + "loss": 0.578704833984375, + "step": 3009 + }, + { + "epoch": 1.0606167400881057, + "grad_norm": 1.5220368339292814, + "learning_rate": 9.831042559382193e-06, + "loss": 0.6280980706214905, + "step": 3010 + }, + { + "epoch": 1.0609691629955946, + "grad_norm": 1.8314917502019485, + "learning_rate": 9.825217024197595e-06, + "loss": 0.6059408783912659, + "step": 3011 + }, + { + "epoch": 1.0613215859030838, + "grad_norm": 1.6362891327789773, + "learning_rate": 9.819391548346377e-06, + "loss": 0.6375449299812317, + "step": 3012 + }, + { + "epoch": 1.0616740088105727, + "grad_norm": 2.503364134053993, + "learning_rate": 9.81356613380611e-06, + "loss": 0.5959592461585999, + "step": 3013 + }, + { + "epoch": 1.0620264317180617, + "grad_norm": 1.735073300438408, + "learning_rate": 9.807740782554337e-06, + "loss": 0.7636409401893616, + "step": 3014 + }, + { + "epoch": 1.0623788546255506, + "grad_norm": 2.2227407713805722, + "learning_rate": 9.801915496568586e-06, + "loss": 0.6136656999588013, + "step": 3015 + }, + { + "epoch": 1.0627312775330395, + "grad_norm": 1.7360474444382674, + "learning_rate": 9.796090277826361e-06, + "loss": 0.4659839868545532, + "step": 3016 + }, + { + "epoch": 1.0630837004405287, + "grad_norm": 1.699131973967987, + "learning_rate": 9.790265128305137e-06, + "loss": 0.6053155660629272, + "step": 3017 + }, + { + "epoch": 1.0634361233480176, + "grad_norm": 1.698457126583602, + "learning_rate": 9.78444004998238e-06, + "loss": 0.6885203123092651, + "step": 3018 + }, + { + "epoch": 1.0637885462555066, + "grad_norm": 1.5620062631250171, + "learning_rate": 9.778615044835513e-06, + "loss": 0.4985584616661072, + "step": 3019 + }, + { + "epoch": 1.0641409691629955, + "grad_norm": 1.699890122838272, + "learning_rate": 9.772790114841948e-06, + "loss": 0.5782307386398315, + "step": 3020 + }, + { + "epoch": 1.0644933920704847, + "grad_norm": 1.7427928970766464, + "learning_rate": 9.766965261979072e-06, + "loss": 0.5819451212882996, + "step": 3021 + }, + { + "epoch": 1.0648458149779736, + "grad_norm": 1.9531302264016444, + "learning_rate": 9.761140488224232e-06, + "loss": 0.7316779494285583, + "step": 3022 + }, + { + "epoch": 1.0651982378854625, + "grad_norm": 2.4211241065200633, + "learning_rate": 9.755315795554766e-06, + "loss": 0.5986718535423279, + "step": 3023 + }, + { + "epoch": 1.0655506607929515, + "grad_norm": 1.5565361520380023, + "learning_rate": 9.749491185947977e-06, + "loss": 0.5052427053451538, + "step": 3024 + }, + { + "epoch": 1.0659030837004406, + "grad_norm": 1.658020296029534, + "learning_rate": 9.743666661381123e-06, + "loss": 0.7370901107788086, + "step": 3025 + }, + { + "epoch": 1.0662555066079296, + "grad_norm": 1.575987435195716, + "learning_rate": 9.73784222383147e-06, + "loss": 0.6423007249832153, + "step": 3026 + }, + { + "epoch": 1.0666079295154185, + "grad_norm": 1.94896820476588, + "learning_rate": 9.73201787527622e-06, + "loss": 0.5679126977920532, + "step": 3027 + }, + { + "epoch": 1.0669603524229074, + "grad_norm": 2.498602043471406, + "learning_rate": 9.72619361769256e-06, + "loss": 0.5890183448791504, + "step": 3028 + }, + { + "epoch": 1.0673127753303966, + "grad_norm": 1.7647674693242208, + "learning_rate": 9.720369453057648e-06, + "loss": 0.6772822141647339, + "step": 3029 + }, + { + "epoch": 1.0676651982378855, + "grad_norm": 2.109810086892336, + "learning_rate": 9.714545383348602e-06, + "loss": 0.8275488615036011, + "step": 3030 + }, + { + "epoch": 1.0680176211453745, + "grad_norm": 1.6620933678667917, + "learning_rate": 9.708721410542517e-06, + "loss": 0.5369541645050049, + "step": 3031 + }, + { + "epoch": 1.0683700440528634, + "grad_norm": 1.611800532750273, + "learning_rate": 9.70289753661645e-06, + "loss": 0.7173746824264526, + "step": 3032 + }, + { + "epoch": 1.0687224669603523, + "grad_norm": 1.7405771304623092, + "learning_rate": 9.697073763547415e-06, + "loss": 0.597034215927124, + "step": 3033 + }, + { + "epoch": 1.0690748898678415, + "grad_norm": 1.867958529307263, + "learning_rate": 9.691250093312419e-06, + "loss": 0.6680281162261963, + "step": 3034 + }, + { + "epoch": 1.0694273127753304, + "grad_norm": 1.4898600082698874, + "learning_rate": 9.6854265278884e-06, + "loss": 0.6155321002006531, + "step": 3035 + }, + { + "epoch": 1.0697797356828194, + "grad_norm": 2.4613840016445314, + "learning_rate": 9.67960306925229e-06, + "loss": 0.5945199728012085, + "step": 3036 + }, + { + "epoch": 1.0701321585903083, + "grad_norm": 1.7063166475670735, + "learning_rate": 9.673779719380967e-06, + "loss": 0.6492328643798828, + "step": 3037 + }, + { + "epoch": 1.0704845814977975, + "grad_norm": 1.8638826733925389, + "learning_rate": 9.667956480251273e-06, + "loss": 0.6501325964927673, + "step": 3038 + }, + { + "epoch": 1.0708370044052864, + "grad_norm": 1.4216071761527918, + "learning_rate": 9.662133353840025e-06, + "loss": 0.5956053733825684, + "step": 3039 + }, + { + "epoch": 1.0711894273127753, + "grad_norm": 1.7546711372901296, + "learning_rate": 9.656310342123988e-06, + "loss": 0.5966510772705078, + "step": 3040 + }, + { + "epoch": 1.0715418502202643, + "grad_norm": 1.7715803220306194, + "learning_rate": 9.65048744707989e-06, + "loss": 0.7096615433692932, + "step": 3041 + }, + { + "epoch": 1.0718942731277532, + "grad_norm": 1.5279732385894715, + "learning_rate": 9.644664670684429e-06, + "loss": 0.6697839498519897, + "step": 3042 + }, + { + "epoch": 1.0722466960352424, + "grad_norm": 1.6318262899161158, + "learning_rate": 9.638842014914253e-06, + "loss": 0.6288081407546997, + "step": 3043 + }, + { + "epoch": 1.0725991189427313, + "grad_norm": 1.6830476156095877, + "learning_rate": 9.633019481745973e-06, + "loss": 0.5870436429977417, + "step": 3044 + }, + { + "epoch": 1.0729515418502202, + "grad_norm": 1.4073037692368846, + "learning_rate": 9.62719707315616e-06, + "loss": 0.5540846586227417, + "step": 3045 + }, + { + "epoch": 1.0733039647577092, + "grad_norm": 1.8276869267624827, + "learning_rate": 9.621374791121335e-06, + "loss": 0.6134544014930725, + "step": 3046 + }, + { + "epoch": 1.0736563876651983, + "grad_norm": 1.9310361455307938, + "learning_rate": 9.61555263761799e-06, + "loss": 0.5537046194076538, + "step": 3047 + }, + { + "epoch": 1.0740088105726873, + "grad_norm": 1.5553451953770387, + "learning_rate": 9.60973061462256e-06, + "loss": 0.6423748731613159, + "step": 3048 + }, + { + "epoch": 1.0743612334801762, + "grad_norm": 1.7219317421679232, + "learning_rate": 9.603908724111438e-06, + "loss": 0.575737714767456, + "step": 3049 + }, + { + "epoch": 1.0747136563876651, + "grad_norm": 1.7334347992355148, + "learning_rate": 9.598086968060976e-06, + "loss": 0.5326197147369385, + "step": 3050 + }, + { + "epoch": 1.075066079295154, + "grad_norm": 1.5560472770838902, + "learning_rate": 9.592265348447481e-06, + "loss": 0.6533973217010498, + "step": 3051 + }, + { + "epoch": 1.0754185022026432, + "grad_norm": 1.5101678591543142, + "learning_rate": 9.586443867247212e-06, + "loss": 0.5536586046218872, + "step": 3052 + }, + { + "epoch": 1.0757709251101322, + "grad_norm": 1.6611779528904365, + "learning_rate": 9.580622526436382e-06, + "loss": 0.6024892926216125, + "step": 3053 + }, + { + "epoch": 1.076123348017621, + "grad_norm": 1.5423440836231639, + "learning_rate": 9.574801327991148e-06, + "loss": 0.5070478320121765, + "step": 3054 + }, + { + "epoch": 1.07647577092511, + "grad_norm": 1.9040251147858696, + "learning_rate": 9.568980273887637e-06, + "loss": 0.6518458127975464, + "step": 3055 + }, + { + "epoch": 1.0768281938325992, + "grad_norm": 1.8761852451910037, + "learning_rate": 9.563159366101905e-06, + "loss": 0.6120346784591675, + "step": 3056 + }, + { + "epoch": 1.0771806167400881, + "grad_norm": 1.7428937123650154, + "learning_rate": 9.557338606609973e-06, + "loss": 0.6725353598594666, + "step": 3057 + }, + { + "epoch": 1.077533039647577, + "grad_norm": 1.5136863007311347, + "learning_rate": 9.551517997387809e-06, + "loss": 0.5311183333396912, + "step": 3058 + }, + { + "epoch": 1.077885462555066, + "grad_norm": 1.8000300040025692, + "learning_rate": 9.545697540411324e-06, + "loss": 0.5728713274002075, + "step": 3059 + }, + { + "epoch": 1.0782378854625552, + "grad_norm": 1.7991281029512354, + "learning_rate": 9.53987723765639e-06, + "loss": 0.5527676343917847, + "step": 3060 + }, + { + "epoch": 1.078590308370044, + "grad_norm": 1.9177712397501578, + "learning_rate": 9.534057091098813e-06, + "loss": 0.7529809474945068, + "step": 3061 + }, + { + "epoch": 1.078942731277533, + "grad_norm": 1.6975104946869117, + "learning_rate": 9.528237102714352e-06, + "loss": 0.5485205054283142, + "step": 3062 + }, + { + "epoch": 1.079295154185022, + "grad_norm": 1.8773141561341242, + "learning_rate": 9.522417274478716e-06, + "loss": 0.785184383392334, + "step": 3063 + }, + { + "epoch": 1.079647577092511, + "grad_norm": 1.692195190429073, + "learning_rate": 9.516597608367547e-06, + "loss": 0.5645574331283569, + "step": 3064 + }, + { + "epoch": 1.08, + "grad_norm": 1.6815198266991151, + "learning_rate": 9.51077810635645e-06, + "loss": 0.5878466367721558, + "step": 3065 + }, + { + "epoch": 1.080352422907489, + "grad_norm": 1.7635464385467587, + "learning_rate": 9.504958770420962e-06, + "loss": 0.6610634922981262, + "step": 3066 + }, + { + "epoch": 1.080704845814978, + "grad_norm": 1.8113852263213976, + "learning_rate": 9.49913960253656e-06, + "loss": 0.5928626656532288, + "step": 3067 + }, + { + "epoch": 1.0810572687224669, + "grad_norm": 1.7322633216843277, + "learning_rate": 9.49332060467868e-06, + "loss": 0.7038083672523499, + "step": 3068 + }, + { + "epoch": 1.081409691629956, + "grad_norm": 1.3686406289588096, + "learning_rate": 9.487501778822685e-06, + "loss": 0.5966217517852783, + "step": 3069 + }, + { + "epoch": 1.081762114537445, + "grad_norm": 1.686172060324731, + "learning_rate": 9.481683126943884e-06, + "loss": 0.6594187021255493, + "step": 3070 + }, + { + "epoch": 1.082114537444934, + "grad_norm": 1.4709153501511232, + "learning_rate": 9.475864651017536e-06, + "loss": 0.450161874294281, + "step": 3071 + }, + { + "epoch": 1.0824669603524228, + "grad_norm": 1.9209170149530705, + "learning_rate": 9.470046353018821e-06, + "loss": 0.6459252834320068, + "step": 3072 + }, + { + "epoch": 1.082819383259912, + "grad_norm": 1.5818284678879686, + "learning_rate": 9.464228234922882e-06, + "loss": 0.6505793929100037, + "step": 3073 + }, + { + "epoch": 1.083171806167401, + "grad_norm": 1.5944722571395005, + "learning_rate": 9.458410298704781e-06, + "loss": 0.6480910778045654, + "step": 3074 + }, + { + "epoch": 1.0835242290748899, + "grad_norm": 1.530550500951046, + "learning_rate": 9.452592546339527e-06, + "loss": 0.6494983434677124, + "step": 3075 + }, + { + "epoch": 1.0838766519823788, + "grad_norm": 1.560525752678919, + "learning_rate": 9.44677497980207e-06, + "loss": 0.4710897207260132, + "step": 3076 + }, + { + "epoch": 1.0842290748898677, + "grad_norm": 1.5265540562186208, + "learning_rate": 9.440957601067294e-06, + "loss": 0.599402904510498, + "step": 3077 + }, + { + "epoch": 1.084581497797357, + "grad_norm": 1.9340764168188993, + "learning_rate": 9.435140412110006e-06, + "loss": 0.665642499923706, + "step": 3078 + }, + { + "epoch": 1.0849339207048458, + "grad_norm": 1.8868033009058576, + "learning_rate": 9.429323414904975e-06, + "loss": 0.5861828923225403, + "step": 3079 + }, + { + "epoch": 1.0852863436123348, + "grad_norm": 1.581789489047221, + "learning_rate": 9.42350661142688e-06, + "loss": 0.6115351915359497, + "step": 3080 + }, + { + "epoch": 1.0856387665198237, + "grad_norm": 1.6610293276945491, + "learning_rate": 9.417690003650353e-06, + "loss": 0.6627066135406494, + "step": 3081 + }, + { + "epoch": 1.0859911894273129, + "grad_norm": 1.5744692750190625, + "learning_rate": 9.411873593549947e-06, + "loss": 0.6155676245689392, + "step": 3082 + }, + { + "epoch": 1.0863436123348018, + "grad_norm": 1.59429166731528, + "learning_rate": 9.406057383100151e-06, + "loss": 0.5429089069366455, + "step": 3083 + }, + { + "epoch": 1.0866960352422907, + "grad_norm": 1.638763712553269, + "learning_rate": 9.400241374275391e-06, + "loss": 0.5416614413261414, + "step": 3084 + }, + { + "epoch": 1.0870484581497797, + "grad_norm": 1.5652840639245515, + "learning_rate": 9.394425569050018e-06, + "loss": 0.6708710193634033, + "step": 3085 + }, + { + "epoch": 1.0874008810572686, + "grad_norm": 1.6407899201706977, + "learning_rate": 9.388609969398318e-06, + "loss": 0.588347315788269, + "step": 3086 + }, + { + "epoch": 1.0877533039647578, + "grad_norm": 1.6990356352816562, + "learning_rate": 9.38279457729451e-06, + "loss": 0.4999222755432129, + "step": 3087 + }, + { + "epoch": 1.0881057268722467, + "grad_norm": 1.5508462782114225, + "learning_rate": 9.37697939471273e-06, + "loss": 0.5400034189224243, + "step": 3088 + }, + { + "epoch": 1.0884581497797357, + "grad_norm": 1.6869985582255194, + "learning_rate": 9.37116442362706e-06, + "loss": 0.5583670139312744, + "step": 3089 + }, + { + "epoch": 1.0888105726872246, + "grad_norm": 2.063349590123988, + "learning_rate": 9.365349666011497e-06, + "loss": 0.6863820552825928, + "step": 3090 + }, + { + "epoch": 1.0891629955947137, + "grad_norm": 1.7395123823701124, + "learning_rate": 9.35953512383997e-06, + "loss": 0.6422115564346313, + "step": 3091 + }, + { + "epoch": 1.0895154185022027, + "grad_norm": 1.7254266288951046, + "learning_rate": 9.353720799086337e-06, + "loss": 0.7106888294219971, + "step": 3092 + }, + { + "epoch": 1.0898678414096916, + "grad_norm": 1.7765997338600088, + "learning_rate": 9.347906693724379e-06, + "loss": 0.6070472002029419, + "step": 3093 + }, + { + "epoch": 1.0902202643171806, + "grad_norm": 2.653468303504809, + "learning_rate": 9.342092809727807e-06, + "loss": 0.577377200126648, + "step": 3094 + }, + { + "epoch": 1.0905726872246695, + "grad_norm": 2.222722693331331, + "learning_rate": 9.336279149070252e-06, + "loss": 0.6249948740005493, + "step": 3095 + }, + { + "epoch": 1.0909251101321586, + "grad_norm": 1.7155188858933852, + "learning_rate": 9.330465713725265e-06, + "loss": 0.5515183210372925, + "step": 3096 + }, + { + "epoch": 1.0912775330396476, + "grad_norm": 1.866411497064146, + "learning_rate": 9.324652505666336e-06, + "loss": 0.6074613332748413, + "step": 3097 + }, + { + "epoch": 1.0916299559471365, + "grad_norm": 1.7632595046666684, + "learning_rate": 9.318839526866863e-06, + "loss": 0.6520178318023682, + "step": 3098 + }, + { + "epoch": 1.0919823788546255, + "grad_norm": 1.4274715968201055, + "learning_rate": 9.31302677930017e-06, + "loss": 0.45863813161849976, + "step": 3099 + }, + { + "epoch": 1.0923348017621146, + "grad_norm": 1.6772052003130429, + "learning_rate": 9.307214264939508e-06, + "loss": 0.610805869102478, + "step": 3100 + }, + { + "epoch": 1.0926872246696036, + "grad_norm": 1.6545163632346178, + "learning_rate": 9.30140198575804e-06, + "loss": 0.5954282283782959, + "step": 3101 + }, + { + "epoch": 1.0930396475770925, + "grad_norm": 1.4805927694864789, + "learning_rate": 9.29558994372886e-06, + "loss": 0.6941400170326233, + "step": 3102 + }, + { + "epoch": 1.0933920704845814, + "grad_norm": 1.4236727289117346, + "learning_rate": 9.289778140824974e-06, + "loss": 0.6723533868789673, + "step": 3103 + }, + { + "epoch": 1.0937444933920706, + "grad_norm": 1.5690147341016918, + "learning_rate": 9.2839665790193e-06, + "loss": 0.49137037992477417, + "step": 3104 + }, + { + "epoch": 1.0940969162995595, + "grad_norm": 1.6112616837583658, + "learning_rate": 9.278155260284692e-06, + "loss": 0.5827045440673828, + "step": 3105 + }, + { + "epoch": 1.0944493392070485, + "grad_norm": 1.7496187485651187, + "learning_rate": 9.272344186593909e-06, + "loss": 0.6391462683677673, + "step": 3106 + }, + { + "epoch": 1.0948017621145374, + "grad_norm": 1.857839078789808, + "learning_rate": 9.266533359919628e-06, + "loss": 0.4994915723800659, + "step": 3107 + }, + { + "epoch": 1.0951541850220265, + "grad_norm": 1.7820549618718244, + "learning_rate": 9.260722782234445e-06, + "loss": 0.6480728387832642, + "step": 3108 + }, + { + "epoch": 1.0955066079295155, + "grad_norm": 1.9724258404436363, + "learning_rate": 9.25491245551087e-06, + "loss": 0.5734057426452637, + "step": 3109 + }, + { + "epoch": 1.0958590308370044, + "grad_norm": 1.5757198230236702, + "learning_rate": 9.249102381721328e-06, + "loss": 0.5650345087051392, + "step": 3110 + }, + { + "epoch": 1.0962114537444934, + "grad_norm": 1.6196253415823336, + "learning_rate": 9.243292562838164e-06, + "loss": 0.6261975765228271, + "step": 3111 + }, + { + "epoch": 1.0965638766519823, + "grad_norm": 1.6283298345999566, + "learning_rate": 9.237483000833619e-06, + "loss": 0.730735182762146, + "step": 3112 + }, + { + "epoch": 1.0969162995594715, + "grad_norm": 1.614573149399901, + "learning_rate": 9.231673697679867e-06, + "loss": 0.6198948621749878, + "step": 3113 + }, + { + "epoch": 1.0972687224669604, + "grad_norm": 1.526191646446162, + "learning_rate": 9.225864655348982e-06, + "loss": 0.5302865505218506, + "step": 3114 + }, + { + "epoch": 1.0976211453744493, + "grad_norm": 1.6895671377093768, + "learning_rate": 9.220055875812955e-06, + "loss": 0.5995128154754639, + "step": 3115 + }, + { + "epoch": 1.0979735682819383, + "grad_norm": 1.5451580100020488, + "learning_rate": 9.214247361043687e-06, + "loss": 0.3801479935646057, + "step": 3116 + }, + { + "epoch": 1.0983259911894274, + "grad_norm": 1.7467243659333909, + "learning_rate": 9.208439113012984e-06, + "loss": 0.5617209076881409, + "step": 3117 + }, + { + "epoch": 1.0986784140969164, + "grad_norm": 2.3313501330545776, + "learning_rate": 9.202631133692572e-06, + "loss": 0.5233842134475708, + "step": 3118 + }, + { + "epoch": 1.0990308370044053, + "grad_norm": 1.5308784453968334, + "learning_rate": 9.196823425054073e-06, + "loss": 0.5300124883651733, + "step": 3119 + }, + { + "epoch": 1.0993832599118942, + "grad_norm": 1.6766914696070794, + "learning_rate": 9.191015989069024e-06, + "loss": 0.686185359954834, + "step": 3120 + }, + { + "epoch": 1.0997356828193832, + "grad_norm": 4.625699614895419, + "learning_rate": 9.18520882770887e-06, + "loss": 0.6043056845664978, + "step": 3121 + }, + { + "epoch": 1.1000881057268723, + "grad_norm": 1.4445640616396158, + "learning_rate": 9.179401942944961e-06, + "loss": 0.6299905776977539, + "step": 3122 + }, + { + "epoch": 1.1004405286343613, + "grad_norm": 1.639683344548818, + "learning_rate": 9.173595336748557e-06, + "loss": 0.57872474193573, + "step": 3123 + }, + { + "epoch": 1.1007929515418502, + "grad_norm": 1.6533643796746975, + "learning_rate": 9.167789011090818e-06, + "loss": 0.5638746023178101, + "step": 3124 + }, + { + "epoch": 1.1011453744493391, + "grad_norm": 1.9780317067618627, + "learning_rate": 9.161982967942806e-06, + "loss": 0.6150490045547485, + "step": 3125 + }, + { + "epoch": 1.1014977973568283, + "grad_norm": 1.6035565827670604, + "learning_rate": 9.156177209275503e-06, + "loss": 0.547231912612915, + "step": 3126 + }, + { + "epoch": 1.1018502202643172, + "grad_norm": 1.753224578445511, + "learning_rate": 9.150371737059773e-06, + "loss": 0.6999325752258301, + "step": 3127 + }, + { + "epoch": 1.1022026431718062, + "grad_norm": 1.868897492269033, + "learning_rate": 9.144566553266396e-06, + "loss": 0.7175568342208862, + "step": 3128 + }, + { + "epoch": 1.102555066079295, + "grad_norm": 1.6615553040601516, + "learning_rate": 9.138761659866054e-06, + "loss": 0.7308273911476135, + "step": 3129 + }, + { + "epoch": 1.102907488986784, + "grad_norm": 1.6216416819643327, + "learning_rate": 9.132957058829323e-06, + "loss": 0.5951930284500122, + "step": 3130 + }, + { + "epoch": 1.1032599118942732, + "grad_norm": 1.8459198222998503, + "learning_rate": 9.127152752126688e-06, + "loss": 0.5684988498687744, + "step": 3131 + }, + { + "epoch": 1.1036123348017621, + "grad_norm": 1.6778026851292638, + "learning_rate": 9.121348741728532e-06, + "loss": 0.6490764617919922, + "step": 3132 + }, + { + "epoch": 1.103964757709251, + "grad_norm": 1.9759558630482505, + "learning_rate": 9.115545029605129e-06, + "loss": 0.7795257568359375, + "step": 3133 + }, + { + "epoch": 1.10431718061674, + "grad_norm": 1.677150279034534, + "learning_rate": 9.10974161772667e-06, + "loss": 0.5443774461746216, + "step": 3134 + }, + { + "epoch": 1.1046696035242292, + "grad_norm": 1.4979331299176493, + "learning_rate": 9.103938508063223e-06, + "loss": 0.48989373445510864, + "step": 3135 + }, + { + "epoch": 1.105022026431718, + "grad_norm": 1.7384756252454785, + "learning_rate": 9.098135702584762e-06, + "loss": 0.5628808736801147, + "step": 3136 + }, + { + "epoch": 1.105374449339207, + "grad_norm": 1.7853238397751252, + "learning_rate": 9.092333203261168e-06, + "loss": 0.6549321413040161, + "step": 3137 + }, + { + "epoch": 1.105726872246696, + "grad_norm": 1.6854667721006384, + "learning_rate": 9.0865310120622e-06, + "loss": 0.7353606224060059, + "step": 3138 + }, + { + "epoch": 1.106079295154185, + "grad_norm": 1.4467352618974103, + "learning_rate": 9.080729130957528e-06, + "loss": 0.650668203830719, + "step": 3139 + }, + { + "epoch": 1.106431718061674, + "grad_norm": 1.4313841589857448, + "learning_rate": 9.07492756191671e-06, + "loss": 0.5618860721588135, + "step": 3140 + }, + { + "epoch": 1.106784140969163, + "grad_norm": 1.6263891772619556, + "learning_rate": 9.069126306909187e-06, + "loss": 0.5532773733139038, + "step": 3141 + }, + { + "epoch": 1.107136563876652, + "grad_norm": 1.5761547934103723, + "learning_rate": 9.06332536790432e-06, + "loss": 0.6240289211273193, + "step": 3142 + }, + { + "epoch": 1.1074889867841409, + "grad_norm": 1.6326282131144043, + "learning_rate": 9.057524746871335e-06, + "loss": 0.5952814221382141, + "step": 3143 + }, + { + "epoch": 1.10784140969163, + "grad_norm": 1.7063742447281478, + "learning_rate": 9.051724445779373e-06, + "loss": 0.6011646389961243, + "step": 3144 + }, + { + "epoch": 1.108193832599119, + "grad_norm": 1.54385403751274, + "learning_rate": 9.045924466597448e-06, + "loss": 0.6964641213417053, + "step": 3145 + }, + { + "epoch": 1.108546255506608, + "grad_norm": 1.9798851390043897, + "learning_rate": 9.040124811294473e-06, + "loss": 0.6821622848510742, + "step": 3146 + }, + { + "epoch": 1.1088986784140968, + "grad_norm": 1.569676973352834, + "learning_rate": 9.034325481839253e-06, + "loss": 0.5045080184936523, + "step": 3147 + }, + { + "epoch": 1.109251101321586, + "grad_norm": 1.608921739397865, + "learning_rate": 9.028526480200482e-06, + "loss": 0.5709735155105591, + "step": 3148 + }, + { + "epoch": 1.109603524229075, + "grad_norm": 1.6331449251948336, + "learning_rate": 9.022727808346731e-06, + "loss": 0.5882325172424316, + "step": 3149 + }, + { + "epoch": 1.1099559471365639, + "grad_norm": 1.6560869042500304, + "learning_rate": 9.016929468246482e-06, + "loss": 0.627426266670227, + "step": 3150 + }, + { + "epoch": 1.1103083700440528, + "grad_norm": 1.5720686051365462, + "learning_rate": 9.011131461868078e-06, + "loss": 0.42419761419296265, + "step": 3151 + }, + { + "epoch": 1.110660792951542, + "grad_norm": 1.487398401726564, + "learning_rate": 9.005333791179775e-06, + "loss": 0.5261023044586182, + "step": 3152 + }, + { + "epoch": 1.111013215859031, + "grad_norm": 1.853640852117203, + "learning_rate": 8.999536458149692e-06, + "loss": 0.6654448509216309, + "step": 3153 + }, + { + "epoch": 1.1113656387665198, + "grad_norm": 1.8252144061899127, + "learning_rate": 8.993739464745843e-06, + "loss": 0.5939514636993408, + "step": 3154 + }, + { + "epoch": 1.1117180616740088, + "grad_norm": 2.120048901517583, + "learning_rate": 8.987942812936133e-06, + "loss": 0.6381959319114685, + "step": 3155 + }, + { + "epoch": 1.1120704845814977, + "grad_norm": 1.5708485505419778, + "learning_rate": 8.982146504688343e-06, + "loss": 0.5474847555160522, + "step": 3156 + }, + { + "epoch": 1.1124229074889869, + "grad_norm": 1.9617265332983251, + "learning_rate": 8.97635054197013e-06, + "loss": 0.6306884288787842, + "step": 3157 + }, + { + "epoch": 1.1127753303964758, + "grad_norm": 1.6582794196349533, + "learning_rate": 8.97055492674906e-06, + "loss": 0.5988807678222656, + "step": 3158 + }, + { + "epoch": 1.1131277533039647, + "grad_norm": 1.4627681911625667, + "learning_rate": 8.964759660992547e-06, + "loss": 0.6316757202148438, + "step": 3159 + }, + { + "epoch": 1.1134801762114537, + "grad_norm": 2.1475966254528265, + "learning_rate": 8.958964746667917e-06, + "loss": 0.6031370162963867, + "step": 3160 + }, + { + "epoch": 1.1138325991189428, + "grad_norm": 1.631780585948097, + "learning_rate": 8.953170185742357e-06, + "loss": 0.6334977149963379, + "step": 3161 + }, + { + "epoch": 1.1141850220264318, + "grad_norm": 1.7666867258825858, + "learning_rate": 8.947375980182937e-06, + "loss": 0.49237731099128723, + "step": 3162 + }, + { + "epoch": 1.1145374449339207, + "grad_norm": 1.8113939325794732, + "learning_rate": 8.941582131956615e-06, + "loss": 0.7349523305892944, + "step": 3163 + }, + { + "epoch": 1.1148898678414096, + "grad_norm": 1.9764498599764084, + "learning_rate": 8.935788643030218e-06, + "loss": 0.5048422813415527, + "step": 3164 + }, + { + "epoch": 1.1152422907488986, + "grad_norm": 1.90381850621639, + "learning_rate": 8.92999551537046e-06, + "loss": 0.6217244267463684, + "step": 3165 + }, + { + "epoch": 1.1155947136563877, + "grad_norm": 1.6579628905821213, + "learning_rate": 8.924202750943926e-06, + "loss": 0.4949147701263428, + "step": 3166 + }, + { + "epoch": 1.1159471365638767, + "grad_norm": 1.8665150826118222, + "learning_rate": 8.918410351717074e-06, + "loss": 0.5975630283355713, + "step": 3167 + }, + { + "epoch": 1.1162995594713656, + "grad_norm": 1.8627553919144322, + "learning_rate": 8.91261831965625e-06, + "loss": 0.7546026110649109, + "step": 3168 + }, + { + "epoch": 1.1166519823788545, + "grad_norm": 1.8785066059323416, + "learning_rate": 8.906826656727665e-06, + "loss": 0.6238037347793579, + "step": 3169 + }, + { + "epoch": 1.1170044052863437, + "grad_norm": 1.7775910427875068, + "learning_rate": 8.901035364897407e-06, + "loss": 0.617587685585022, + "step": 3170 + }, + { + "epoch": 1.1173568281938326, + "grad_norm": 1.6345696523196545, + "learning_rate": 8.895244446131445e-06, + "loss": 0.4834432005882263, + "step": 3171 + }, + { + "epoch": 1.1177092511013216, + "grad_norm": 1.8061061322305951, + "learning_rate": 8.889453902395608e-06, + "loss": 0.614972710609436, + "step": 3172 + }, + { + "epoch": 1.1180616740088105, + "grad_norm": 2.26536947887869, + "learning_rate": 8.883663735655612e-06, + "loss": 0.6468379497528076, + "step": 3173 + }, + { + "epoch": 1.1184140969162994, + "grad_norm": 1.8154030785363677, + "learning_rate": 8.877873947877042e-06, + "loss": 0.6372466683387756, + "step": 3174 + }, + { + "epoch": 1.1187665198237886, + "grad_norm": 1.8831907584481906, + "learning_rate": 8.872084541025336e-06, + "loss": 0.6295863389968872, + "step": 3175 + }, + { + "epoch": 1.1191189427312775, + "grad_norm": 1.7211075291863254, + "learning_rate": 8.866295517065831e-06, + "loss": 0.6109524369239807, + "step": 3176 + }, + { + "epoch": 1.1194713656387665, + "grad_norm": 1.6861537948886334, + "learning_rate": 8.860506877963715e-06, + "loss": 0.6724812388420105, + "step": 3177 + }, + { + "epoch": 1.1198237885462554, + "grad_norm": 1.4091706259139964, + "learning_rate": 8.854718625684049e-06, + "loss": 0.6612162590026855, + "step": 3178 + }, + { + "epoch": 1.1201762114537446, + "grad_norm": 1.6332443405139663, + "learning_rate": 8.84893076219177e-06, + "loss": 0.6209636926651001, + "step": 3179 + }, + { + "epoch": 1.1205286343612335, + "grad_norm": 1.7567347030111673, + "learning_rate": 8.843143289451673e-06, + "loss": 0.8548281192779541, + "step": 3180 + }, + { + "epoch": 1.1208810572687224, + "grad_norm": 1.742397796953756, + "learning_rate": 8.837356209428428e-06, + "loss": 0.4621508717536926, + "step": 3181 + }, + { + "epoch": 1.1212334801762114, + "grad_norm": 1.8553184481302196, + "learning_rate": 8.831569524086568e-06, + "loss": 0.5065817832946777, + "step": 3182 + }, + { + "epoch": 1.1215859030837005, + "grad_norm": 1.5532313157641433, + "learning_rate": 8.825783235390488e-06, + "loss": 0.5467691421508789, + "step": 3183 + }, + { + "epoch": 1.1219383259911895, + "grad_norm": 1.3786030341795126, + "learning_rate": 8.81999734530446e-06, + "loss": 0.4938517212867737, + "step": 3184 + }, + { + "epoch": 1.1222907488986784, + "grad_norm": 1.4972934746199023, + "learning_rate": 8.814211855792609e-06, + "loss": 0.6125702857971191, + "step": 3185 + }, + { + "epoch": 1.1226431718061674, + "grad_norm": 1.427476145591487, + "learning_rate": 8.80842676881893e-06, + "loss": 0.5272841453552246, + "step": 3186 + }, + { + "epoch": 1.1229955947136563, + "grad_norm": 1.8463623605620603, + "learning_rate": 8.802642086347278e-06, + "loss": 0.5595715045928955, + "step": 3187 + }, + { + "epoch": 1.1233480176211454, + "grad_norm": 1.7533827268189746, + "learning_rate": 8.796857810341375e-06, + "loss": 0.7178677916526794, + "step": 3188 + }, + { + "epoch": 1.1237004405286344, + "grad_norm": 2.166791630557212, + "learning_rate": 8.791073942764806e-06, + "loss": 0.6000991463661194, + "step": 3189 + }, + { + "epoch": 1.1240528634361233, + "grad_norm": 1.7926160729471858, + "learning_rate": 8.785290485581008e-06, + "loss": 0.537361741065979, + "step": 3190 + }, + { + "epoch": 1.1244052863436123, + "grad_norm": 1.7666842188914018, + "learning_rate": 8.779507440753286e-06, + "loss": 0.7135556936264038, + "step": 3191 + }, + { + "epoch": 1.1247577092511014, + "grad_norm": 1.7053825384185084, + "learning_rate": 8.773724810244805e-06, + "loss": 0.501063346862793, + "step": 3192 + }, + { + "epoch": 1.1251101321585903, + "grad_norm": 1.679109568038749, + "learning_rate": 8.767942596018587e-06, + "loss": 0.6885302662849426, + "step": 3193 + }, + { + "epoch": 1.1254625550660793, + "grad_norm": 1.321748305255468, + "learning_rate": 8.762160800037516e-06, + "loss": 0.5902360081672668, + "step": 3194 + }, + { + "epoch": 1.1258149779735682, + "grad_norm": 1.687654327550192, + "learning_rate": 8.75637942426433e-06, + "loss": 0.6308953762054443, + "step": 3195 + }, + { + "epoch": 1.1261674008810574, + "grad_norm": 1.8380657710321036, + "learning_rate": 8.750598470661625e-06, + "loss": 0.5710124969482422, + "step": 3196 + }, + { + "epoch": 1.1265198237885463, + "grad_norm": 1.76295044659038, + "learning_rate": 8.744817941191862e-06, + "loss": 0.6110632419586182, + "step": 3197 + }, + { + "epoch": 1.1268722466960353, + "grad_norm": 1.7274451742305768, + "learning_rate": 8.73903783781734e-06, + "loss": 0.5274624824523926, + "step": 3198 + }, + { + "epoch": 1.1272246696035242, + "grad_norm": 1.549070468504263, + "learning_rate": 8.733258162500228e-06, + "loss": 0.6144713163375854, + "step": 3199 + }, + { + "epoch": 1.1275770925110131, + "grad_norm": 1.8001185698886477, + "learning_rate": 8.727478917202551e-06, + "loss": 0.6404621005058289, + "step": 3200 + }, + { + "epoch": 1.1279295154185023, + "grad_norm": 1.602548541775438, + "learning_rate": 8.721700103886177e-06, + "loss": 0.5693025588989258, + "step": 3201 + }, + { + "epoch": 1.1282819383259912, + "grad_norm": 1.6563446017851289, + "learning_rate": 8.715921724512838e-06, + "loss": 0.5631159543991089, + "step": 3202 + }, + { + "epoch": 1.1286343612334802, + "grad_norm": 1.5785191171510689, + "learning_rate": 8.710143781044113e-06, + "loss": 0.648078441619873, + "step": 3203 + }, + { + "epoch": 1.128986784140969, + "grad_norm": 2.0721270642934666, + "learning_rate": 8.704366275441426e-06, + "loss": 0.6858379244804382, + "step": 3204 + }, + { + "epoch": 1.1293392070484582, + "grad_norm": 1.8203927475030908, + "learning_rate": 8.698589209666074e-06, + "loss": 0.7244000434875488, + "step": 3205 + }, + { + "epoch": 1.1296916299559472, + "grad_norm": 1.7775130777760553, + "learning_rate": 8.692812585679182e-06, + "loss": 0.5918365716934204, + "step": 3206 + }, + { + "epoch": 1.1300440528634361, + "grad_norm": 1.8950041670387165, + "learning_rate": 8.687036405441733e-06, + "loss": 0.6893443465232849, + "step": 3207 + }, + { + "epoch": 1.130396475770925, + "grad_norm": 1.6934464725865028, + "learning_rate": 8.681260670914564e-06, + "loss": 0.729834794998169, + "step": 3208 + }, + { + "epoch": 1.130748898678414, + "grad_norm": 1.9278305082183818, + "learning_rate": 8.675485384058356e-06, + "loss": 0.6525821685791016, + "step": 3209 + }, + { + "epoch": 1.1311013215859032, + "grad_norm": 1.7892045210081244, + "learning_rate": 8.669710546833642e-06, + "loss": 0.6799874305725098, + "step": 3210 + }, + { + "epoch": 1.131453744493392, + "grad_norm": 1.6216385781826248, + "learning_rate": 8.6639361612008e-06, + "loss": 0.5614932775497437, + "step": 3211 + }, + { + "epoch": 1.131806167400881, + "grad_norm": 1.6912315117870094, + "learning_rate": 8.658162229120045e-06, + "loss": 0.5975101590156555, + "step": 3212 + }, + { + "epoch": 1.13215859030837, + "grad_norm": 1.7352702737909875, + "learning_rate": 8.652388752551458e-06, + "loss": 0.5367887020111084, + "step": 3213 + }, + { + "epoch": 1.1325110132158591, + "grad_norm": 1.360358935584503, + "learning_rate": 8.646615733454949e-06, + "loss": 0.4451865553855896, + "step": 3214 + }, + { + "epoch": 1.132863436123348, + "grad_norm": 1.8983821913108012, + "learning_rate": 8.64084317379028e-06, + "loss": 0.6482576131820679, + "step": 3215 + }, + { + "epoch": 1.133215859030837, + "grad_norm": 1.5858394578763535, + "learning_rate": 8.635071075517053e-06, + "loss": 0.5890318155288696, + "step": 3216 + }, + { + "epoch": 1.133568281938326, + "grad_norm": 1.6567929917802857, + "learning_rate": 8.629299440594719e-06, + "loss": 0.554576575756073, + "step": 3217 + }, + { + "epoch": 1.1339207048458149, + "grad_norm": 1.6966150183280715, + "learning_rate": 8.623528270982567e-06, + "loss": 0.5987116694450378, + "step": 3218 + }, + { + "epoch": 1.134273127753304, + "grad_norm": 1.8696533969224407, + "learning_rate": 8.617757568639731e-06, + "loss": 0.49857625365257263, + "step": 3219 + }, + { + "epoch": 1.134625550660793, + "grad_norm": 1.6960564098429034, + "learning_rate": 8.61198733552518e-06, + "loss": 0.6116641759872437, + "step": 3220 + }, + { + "epoch": 1.134977973568282, + "grad_norm": 1.6619215502907394, + "learning_rate": 8.606217573597738e-06, + "loss": 0.4346674978733063, + "step": 3221 + }, + { + "epoch": 1.1353303964757708, + "grad_norm": 1.6058889875943096, + "learning_rate": 8.600448284816046e-06, + "loss": 0.6973283290863037, + "step": 3222 + }, + { + "epoch": 1.13568281938326, + "grad_norm": 1.547791232560021, + "learning_rate": 8.594679471138613e-06, + "loss": 0.5457896590232849, + "step": 3223 + }, + { + "epoch": 1.136035242290749, + "grad_norm": 1.6457593373386994, + "learning_rate": 8.58891113452376e-06, + "loss": 0.4520479440689087, + "step": 3224 + }, + { + "epoch": 1.1363876651982379, + "grad_norm": 1.6501706928794149, + "learning_rate": 8.58314327692966e-06, + "loss": 0.6169587969779968, + "step": 3225 + }, + { + "epoch": 1.1367400881057268, + "grad_norm": 1.729795732302939, + "learning_rate": 8.577375900314327e-06, + "loss": 0.6398670673370361, + "step": 3226 + }, + { + "epoch": 1.1370925110132157, + "grad_norm": 1.6846614829900397, + "learning_rate": 8.571609006635604e-06, + "loss": 0.5772207975387573, + "step": 3227 + }, + { + "epoch": 1.137444933920705, + "grad_norm": 1.5622430074284195, + "learning_rate": 8.565842597851165e-06, + "loss": 0.5561503171920776, + "step": 3228 + }, + { + "epoch": 1.1377973568281938, + "grad_norm": 1.644881271079104, + "learning_rate": 8.560076675918537e-06, + "loss": 0.4702373743057251, + "step": 3229 + }, + { + "epoch": 1.1381497797356828, + "grad_norm": 1.778044829497574, + "learning_rate": 8.554311242795061e-06, + "loss": 0.5967564582824707, + "step": 3230 + }, + { + "epoch": 1.138502202643172, + "grad_norm": 1.782270527802186, + "learning_rate": 8.548546300437928e-06, + "loss": 0.4749453663825989, + "step": 3231 + }, + { + "epoch": 1.1388546255506609, + "grad_norm": 2.2009062727733046, + "learning_rate": 8.542781850804155e-06, + "loss": 0.6939869523048401, + "step": 3232 + }, + { + "epoch": 1.1392070484581498, + "grad_norm": 1.4327701228186707, + "learning_rate": 8.537017895850593e-06, + "loss": 0.5618892908096313, + "step": 3233 + }, + { + "epoch": 1.1395594713656387, + "grad_norm": 1.6784618730938181, + "learning_rate": 8.531254437533925e-06, + "loss": 0.6627654433250427, + "step": 3234 + }, + { + "epoch": 1.1399118942731277, + "grad_norm": 1.770712809653697, + "learning_rate": 8.525491477810671e-06, + "loss": 0.6365151405334473, + "step": 3235 + }, + { + "epoch": 1.1402643171806168, + "grad_norm": 1.6623213186798471, + "learning_rate": 8.519729018637164e-06, + "loss": 0.5207303762435913, + "step": 3236 + }, + { + "epoch": 1.1406167400881058, + "grad_norm": 1.8240600257881658, + "learning_rate": 8.513967061969594e-06, + "loss": 0.7469059228897095, + "step": 3237 + }, + { + "epoch": 1.1409691629955947, + "grad_norm": 1.7786802310337648, + "learning_rate": 8.508205609763955e-06, + "loss": 0.5778630971908569, + "step": 3238 + }, + { + "epoch": 1.1413215859030836, + "grad_norm": 1.756406665695002, + "learning_rate": 8.502444663976089e-06, + "loss": 0.5447480082511902, + "step": 3239 + }, + { + "epoch": 1.1416740088105728, + "grad_norm": 1.628690443424602, + "learning_rate": 8.496684226561653e-06, + "loss": 0.6002986431121826, + "step": 3240 + }, + { + "epoch": 1.1420264317180617, + "grad_norm": 1.7257255594282812, + "learning_rate": 8.490924299476133e-06, + "loss": 0.7627072930335999, + "step": 3241 + }, + { + "epoch": 1.1423788546255507, + "grad_norm": 1.725113553289998, + "learning_rate": 8.485164884674854e-06, + "loss": 0.6406078338623047, + "step": 3242 + }, + { + "epoch": 1.1427312775330396, + "grad_norm": 2.110533369358698, + "learning_rate": 8.479405984112949e-06, + "loss": 0.47047436237335205, + "step": 3243 + }, + { + "epoch": 1.1430837004405285, + "grad_norm": 2.0564519486525903, + "learning_rate": 8.473647599745393e-06, + "loss": 0.6702529191970825, + "step": 3244 + }, + { + "epoch": 1.1434361233480177, + "grad_norm": 2.1168699536348488, + "learning_rate": 8.467889733526977e-06, + "loss": 0.6570258140563965, + "step": 3245 + }, + { + "epoch": 1.1437885462555066, + "grad_norm": 11.021488641985083, + "learning_rate": 8.462132387412312e-06, + "loss": 0.6248423457145691, + "step": 3246 + }, + { + "epoch": 1.1441409691629956, + "grad_norm": 1.6339128666105858, + "learning_rate": 8.456375563355842e-06, + "loss": 0.7377427816390991, + "step": 3247 + }, + { + "epoch": 1.1444933920704845, + "grad_norm": 1.8159484011485405, + "learning_rate": 8.45061926331183e-06, + "loss": 0.6469020843505859, + "step": 3248 + }, + { + "epoch": 1.1448458149779737, + "grad_norm": 1.81461416151687, + "learning_rate": 8.444863489234356e-06, + "loss": 0.6417430639266968, + "step": 3249 + }, + { + "epoch": 1.1451982378854626, + "grad_norm": 1.7715952211280361, + "learning_rate": 8.439108243077335e-06, + "loss": 0.5447275638580322, + "step": 3250 + }, + { + "epoch": 1.1455506607929515, + "grad_norm": 1.8341737914542349, + "learning_rate": 8.433353526794484e-06, + "loss": 0.6621315479278564, + "step": 3251 + }, + { + "epoch": 1.1459030837004405, + "grad_norm": 1.850872292820976, + "learning_rate": 8.42759934233936e-06, + "loss": 0.5660392045974731, + "step": 3252 + }, + { + "epoch": 1.1462555066079294, + "grad_norm": 1.695638018183687, + "learning_rate": 8.42184569166532e-06, + "loss": 0.43074172735214233, + "step": 3253 + }, + { + "epoch": 1.1466079295154186, + "grad_norm": 1.6152519611154568, + "learning_rate": 8.416092576725554e-06, + "loss": 0.5863226056098938, + "step": 3254 + }, + { + "epoch": 1.1469603524229075, + "grad_norm": 1.8724827582882198, + "learning_rate": 8.410339999473067e-06, + "loss": 0.6003422737121582, + "step": 3255 + }, + { + "epoch": 1.1473127753303964, + "grad_norm": 1.806876842860533, + "learning_rate": 8.404587961860678e-06, + "loss": 0.6109241247177124, + "step": 3256 + }, + { + "epoch": 1.1476651982378854, + "grad_norm": 1.7768687099142642, + "learning_rate": 8.398836465841021e-06, + "loss": 0.5749140977859497, + "step": 3257 + }, + { + "epoch": 1.1480176211453745, + "grad_norm": 1.762377433704451, + "learning_rate": 8.393085513366557e-06, + "loss": 0.6920739412307739, + "step": 3258 + }, + { + "epoch": 1.1483700440528635, + "grad_norm": 1.903311052790267, + "learning_rate": 8.38733510638955e-06, + "loss": 0.6632573008537292, + "step": 3259 + }, + { + "epoch": 1.1487224669603524, + "grad_norm": 1.925929272799836, + "learning_rate": 8.381585246862091e-06, + "loss": 0.6396503448486328, + "step": 3260 + }, + { + "epoch": 1.1490748898678413, + "grad_norm": 4.327872701462553, + "learning_rate": 8.375835936736072e-06, + "loss": 0.5975937843322754, + "step": 3261 + }, + { + "epoch": 1.1494273127753303, + "grad_norm": 1.9097739370767552, + "learning_rate": 8.370087177963204e-06, + "loss": 0.6297920346260071, + "step": 3262 + }, + { + "epoch": 1.1497797356828194, + "grad_norm": 1.6773858737351708, + "learning_rate": 8.364338972495016e-06, + "loss": 0.7004375457763672, + "step": 3263 + }, + { + "epoch": 1.1501321585903084, + "grad_norm": 1.9905333664754346, + "learning_rate": 8.358591322282845e-06, + "loss": 0.5850871801376343, + "step": 3264 + }, + { + "epoch": 1.1504845814977973, + "grad_norm": 1.6216139435027066, + "learning_rate": 8.352844229277834e-06, + "loss": 0.493900865316391, + "step": 3265 + }, + { + "epoch": 1.1508370044052865, + "grad_norm": 1.8994324319983171, + "learning_rate": 8.34709769543095e-06, + "loss": 0.573354959487915, + "step": 3266 + }, + { + "epoch": 1.1511894273127754, + "grad_norm": 2.1672972359364175, + "learning_rate": 8.341351722692951e-06, + "loss": 0.7154442667961121, + "step": 3267 + }, + { + "epoch": 1.1515418502202643, + "grad_norm": 1.705511845117997, + "learning_rate": 8.335606313014432e-06, + "loss": 0.5429074764251709, + "step": 3268 + }, + { + "epoch": 1.1518942731277533, + "grad_norm": 1.8606068751906144, + "learning_rate": 8.329861468345768e-06, + "loss": 0.6938891410827637, + "step": 3269 + }, + { + "epoch": 1.1522466960352422, + "grad_norm": 5.765839224937511, + "learning_rate": 8.324117190637157e-06, + "loss": 0.7114205360412598, + "step": 3270 + }, + { + "epoch": 1.1525991189427314, + "grad_norm": 1.761532917196708, + "learning_rate": 8.318373481838605e-06, + "loss": 0.5353071093559265, + "step": 3271 + }, + { + "epoch": 1.1529515418502203, + "grad_norm": 1.931038515640054, + "learning_rate": 8.312630343899921e-06, + "loss": 0.7838516235351562, + "step": 3272 + }, + { + "epoch": 1.1533039647577092, + "grad_norm": 2.013028743927059, + "learning_rate": 8.306887778770724e-06, + "loss": 0.630479633808136, + "step": 3273 + }, + { + "epoch": 1.1536563876651982, + "grad_norm": 1.908388737326531, + "learning_rate": 8.301145788400438e-06, + "loss": 0.6568116545677185, + "step": 3274 + }, + { + "epoch": 1.1540088105726873, + "grad_norm": 1.4673620532583986, + "learning_rate": 8.295404374738278e-06, + "loss": 0.5410804748535156, + "step": 3275 + }, + { + "epoch": 1.1543612334801763, + "grad_norm": 2.0887831204496017, + "learning_rate": 8.289663539733292e-06, + "loss": 0.6699862480163574, + "step": 3276 + }, + { + "epoch": 1.1547136563876652, + "grad_norm": 2.146352543425904, + "learning_rate": 8.283923285334304e-06, + "loss": 0.6828576326370239, + "step": 3277 + }, + { + "epoch": 1.1550660792951541, + "grad_norm": 1.6441665475307043, + "learning_rate": 8.278183613489951e-06, + "loss": 0.5569214820861816, + "step": 3278 + }, + { + "epoch": 1.155418502202643, + "grad_norm": 1.5736783771881073, + "learning_rate": 8.27244452614868e-06, + "loss": 0.6276477575302124, + "step": 3279 + }, + { + "epoch": 1.1557709251101322, + "grad_norm": 1.639795393267647, + "learning_rate": 8.266706025258727e-06, + "loss": 0.5752792954444885, + "step": 3280 + }, + { + "epoch": 1.1561233480176212, + "grad_norm": 1.8007170708068962, + "learning_rate": 8.260968112768137e-06, + "loss": 0.6149388551712036, + "step": 3281 + }, + { + "epoch": 1.1564757709251101, + "grad_norm": 1.8241425629966381, + "learning_rate": 8.255230790624755e-06, + "loss": 0.6399196982383728, + "step": 3282 + }, + { + "epoch": 1.156828193832599, + "grad_norm": 1.8065599712551461, + "learning_rate": 8.249494060776215e-06, + "loss": 0.6927458047866821, + "step": 3283 + }, + { + "epoch": 1.1571806167400882, + "grad_norm": 1.5535864037785454, + "learning_rate": 8.243757925169968e-06, + "loss": 0.5843946933746338, + "step": 3284 + }, + { + "epoch": 1.1575330396475771, + "grad_norm": 1.7771012211418213, + "learning_rate": 8.238022385753248e-06, + "loss": 0.6469332575798035, + "step": 3285 + }, + { + "epoch": 1.157885462555066, + "grad_norm": 1.5500454202505596, + "learning_rate": 8.23228744447309e-06, + "loss": 0.572630763053894, + "step": 3286 + }, + { + "epoch": 1.158237885462555, + "grad_norm": 1.7219264264044976, + "learning_rate": 8.226553103276335e-06, + "loss": 0.6872239112854004, + "step": 3287 + }, + { + "epoch": 1.158590308370044, + "grad_norm": 1.7206454172461807, + "learning_rate": 8.220819364109607e-06, + "loss": 0.5116995573043823, + "step": 3288 + }, + { + "epoch": 1.1589427312775331, + "grad_norm": 1.7411708693012447, + "learning_rate": 8.215086228919336e-06, + "loss": 0.6179347038269043, + "step": 3289 + }, + { + "epoch": 1.159295154185022, + "grad_norm": 1.5098995111565061, + "learning_rate": 8.209353699651745e-06, + "loss": 0.573688805103302, + "step": 3290 + }, + { + "epoch": 1.159647577092511, + "grad_norm": 1.6209084651188936, + "learning_rate": 8.20362177825284e-06, + "loss": 0.6622583866119385, + "step": 3291 + }, + { + "epoch": 1.16, + "grad_norm": 1.9043199482736668, + "learning_rate": 8.197890466668441e-06, + "loss": 0.4945096969604492, + "step": 3292 + }, + { + "epoch": 1.160352422907489, + "grad_norm": 1.9477760218669748, + "learning_rate": 8.19215976684414e-06, + "loss": 0.5657082796096802, + "step": 3293 + }, + { + "epoch": 1.160704845814978, + "grad_norm": 1.7301213281073105, + "learning_rate": 8.186429680725339e-06, + "loss": 0.5684623122215271, + "step": 3294 + }, + { + "epoch": 1.161057268722467, + "grad_norm": 1.7781389678625354, + "learning_rate": 8.180700210257223e-06, + "loss": 0.567638635635376, + "step": 3295 + }, + { + "epoch": 1.1614096916299559, + "grad_norm": 1.8973989975016394, + "learning_rate": 8.174971357384762e-06, + "loss": 0.7182992696762085, + "step": 3296 + }, + { + "epoch": 1.1617621145374448, + "grad_norm": 1.7202032555937063, + "learning_rate": 8.169243124052731e-06, + "loss": 0.7188737392425537, + "step": 3297 + }, + { + "epoch": 1.162114537444934, + "grad_norm": 1.632750713102644, + "learning_rate": 8.163515512205687e-06, + "loss": 0.5532418489456177, + "step": 3298 + }, + { + "epoch": 1.162466960352423, + "grad_norm": 2.2725291479645136, + "learning_rate": 8.157788523787967e-06, + "loss": 0.7167447209358215, + "step": 3299 + }, + { + "epoch": 1.1628193832599119, + "grad_norm": 1.8053860419209504, + "learning_rate": 8.152062160743716e-06, + "loss": 0.633411169052124, + "step": 3300 + }, + { + "epoch": 1.1631718061674008, + "grad_norm": 1.8006555184567121, + "learning_rate": 8.146336425016849e-06, + "loss": 0.6686321496963501, + "step": 3301 + }, + { + "epoch": 1.16352422907489, + "grad_norm": 1.884331587638867, + "learning_rate": 8.140611318551078e-06, + "loss": 0.608701765537262, + "step": 3302 + }, + { + "epoch": 1.1638766519823789, + "grad_norm": 1.6532674404979102, + "learning_rate": 8.1348868432899e-06, + "loss": 0.5607466101646423, + "step": 3303 + }, + { + "epoch": 1.1642290748898678, + "grad_norm": 1.9224536271892947, + "learning_rate": 8.12916300117659e-06, + "loss": 0.6397457122802734, + "step": 3304 + }, + { + "epoch": 1.1645814977973568, + "grad_norm": 1.9075190910370474, + "learning_rate": 8.123439794154223e-06, + "loss": 0.6681507229804993, + "step": 3305 + }, + { + "epoch": 1.1649339207048457, + "grad_norm": 1.7601065273352539, + "learning_rate": 8.117717224165645e-06, + "loss": 0.5549972057342529, + "step": 3306 + }, + { + "epoch": 1.1652863436123349, + "grad_norm": 1.9981914923817063, + "learning_rate": 8.111995293153486e-06, + "loss": 0.7519058585166931, + "step": 3307 + }, + { + "epoch": 1.1656387665198238, + "grad_norm": 1.8817978978557874, + "learning_rate": 8.106274003060172e-06, + "loss": 0.7100121378898621, + "step": 3308 + }, + { + "epoch": 1.1659911894273127, + "grad_norm": 2.081586750876693, + "learning_rate": 8.100553355827897e-06, + "loss": 0.6297321319580078, + "step": 3309 + }, + { + "epoch": 1.1663436123348019, + "grad_norm": 2.2854313216105635, + "learning_rate": 8.094833353398645e-06, + "loss": 0.6875895857810974, + "step": 3310 + }, + { + "epoch": 1.1666960352422908, + "grad_norm": 1.7297215389141958, + "learning_rate": 8.08911399771418e-06, + "loss": 0.5369099974632263, + "step": 3311 + }, + { + "epoch": 1.1670484581497798, + "grad_norm": 1.7209622601094259, + "learning_rate": 8.083395290716042e-06, + "loss": 0.5598124265670776, + "step": 3312 + }, + { + "epoch": 1.1674008810572687, + "grad_norm": 1.6153396072397332, + "learning_rate": 8.077677234345557e-06, + "loss": 0.6438342332839966, + "step": 3313 + }, + { + "epoch": 1.1677533039647576, + "grad_norm": 1.649767256033485, + "learning_rate": 8.07195983054383e-06, + "loss": 0.5558618307113647, + "step": 3314 + }, + { + "epoch": 1.1681057268722468, + "grad_norm": 1.744681713922102, + "learning_rate": 8.06624308125173e-06, + "loss": 0.5729602575302124, + "step": 3315 + }, + { + "epoch": 1.1684581497797357, + "grad_norm": 2.294706401477936, + "learning_rate": 8.060526988409929e-06, + "loss": 0.5094903707504272, + "step": 3316 + }, + { + "epoch": 1.1688105726872247, + "grad_norm": 1.6352779890455922, + "learning_rate": 8.054811553958853e-06, + "loss": 0.6605818867683411, + "step": 3317 + }, + { + "epoch": 1.1691629955947136, + "grad_norm": 2.240048633930669, + "learning_rate": 8.04909677983872e-06, + "loss": 0.7929576635360718, + "step": 3318 + }, + { + "epoch": 1.1695154185022028, + "grad_norm": 1.7445241989865017, + "learning_rate": 8.043382667989514e-06, + "loss": 0.5915192365646362, + "step": 3319 + }, + { + "epoch": 1.1698678414096917, + "grad_norm": 1.6537456786938194, + "learning_rate": 8.037669220351e-06, + "loss": 0.5923853516578674, + "step": 3320 + }, + { + "epoch": 1.1702202643171806, + "grad_norm": 1.7692219343864357, + "learning_rate": 8.031956438862718e-06, + "loss": 0.7034223079681396, + "step": 3321 + }, + { + "epoch": 1.1705726872246696, + "grad_norm": 1.699093684077835, + "learning_rate": 8.026244325463975e-06, + "loss": 0.6093307733535767, + "step": 3322 + }, + { + "epoch": 1.1709251101321585, + "grad_norm": 1.820021264359909, + "learning_rate": 8.020532882093862e-06, + "loss": 0.5709424614906311, + "step": 3323 + }, + { + "epoch": 1.1712775330396477, + "grad_norm": 1.6327248259933085, + "learning_rate": 8.01482211069123e-06, + "loss": 0.5242069959640503, + "step": 3324 + }, + { + "epoch": 1.1716299559471366, + "grad_norm": 1.8755413800206977, + "learning_rate": 8.009112013194707e-06, + "loss": 0.5869580507278442, + "step": 3325 + }, + { + "epoch": 1.1719823788546255, + "grad_norm": 1.927667149386539, + "learning_rate": 8.0034025915427e-06, + "loss": 0.7281460762023926, + "step": 3326 + }, + { + "epoch": 1.1723348017621145, + "grad_norm": 1.8020991914636244, + "learning_rate": 7.997693847673378e-06, + "loss": 0.6877723336219788, + "step": 3327 + }, + { + "epoch": 1.1726872246696036, + "grad_norm": 1.4739994768631006, + "learning_rate": 7.991985783524676e-06, + "loss": 0.6045002937316895, + "step": 3328 + }, + { + "epoch": 1.1730396475770926, + "grad_norm": 1.7637996531853402, + "learning_rate": 7.986278401034315e-06, + "loss": 0.5698690414428711, + "step": 3329 + }, + { + "epoch": 1.1733920704845815, + "grad_norm": 1.879664532548966, + "learning_rate": 7.980571702139759e-06, + "loss": 0.6802438497543335, + "step": 3330 + }, + { + "epoch": 1.1737444933920704, + "grad_norm": 1.9432824884843154, + "learning_rate": 7.974865688778271e-06, + "loss": 0.5840654373168945, + "step": 3331 + }, + { + "epoch": 1.1740969162995594, + "grad_norm": 1.7557288678447098, + "learning_rate": 7.969160362886855e-06, + "loss": 0.5203073024749756, + "step": 3332 + }, + { + "epoch": 1.1744493392070485, + "grad_norm": 1.5188701776399616, + "learning_rate": 7.963455726402292e-06, + "loss": 0.4558306932449341, + "step": 3333 + }, + { + "epoch": 1.1748017621145375, + "grad_norm": 1.8464169088081481, + "learning_rate": 7.957751781261132e-06, + "loss": 0.6200483441352844, + "step": 3334 + }, + { + "epoch": 1.1751541850220264, + "grad_norm": 1.4009839443781218, + "learning_rate": 7.952048529399686e-06, + "loss": 0.559386670589447, + "step": 3335 + }, + { + "epoch": 1.1755066079295153, + "grad_norm": 1.5776847118393618, + "learning_rate": 7.946345972754026e-06, + "loss": 0.5521356463432312, + "step": 3336 + }, + { + "epoch": 1.1758590308370045, + "grad_norm": 1.6725655120909741, + "learning_rate": 7.940644113260001e-06, + "loss": 0.6235495805740356, + "step": 3337 + }, + { + "epoch": 1.1762114537444934, + "grad_norm": 1.6364629990686756, + "learning_rate": 7.934942952853203e-06, + "loss": 0.5196648836135864, + "step": 3338 + }, + { + "epoch": 1.1765638766519824, + "grad_norm": 1.658819201732712, + "learning_rate": 7.929242493469013e-06, + "loss": 0.5959422588348389, + "step": 3339 + }, + { + "epoch": 1.1769162995594713, + "grad_norm": 1.8867606277211662, + "learning_rate": 7.923542737042549e-06, + "loss": 0.5400167107582092, + "step": 3340 + }, + { + "epoch": 1.1772687224669602, + "grad_norm": 1.8686352871929341, + "learning_rate": 7.917843685508702e-06, + "loss": 0.688996434211731, + "step": 3341 + }, + { + "epoch": 1.1776211453744494, + "grad_norm": 1.844624213320976, + "learning_rate": 7.912145340802127e-06, + "loss": 0.623216450214386, + "step": 3342 + }, + { + "epoch": 1.1779735682819383, + "grad_norm": 1.7951119497780943, + "learning_rate": 7.906447704857233e-06, + "loss": 0.587382435798645, + "step": 3343 + }, + { + "epoch": 1.1783259911894273, + "grad_norm": 1.4508698182802122, + "learning_rate": 7.900750779608187e-06, + "loss": 0.6033053398132324, + "step": 3344 + }, + { + "epoch": 1.1786784140969162, + "grad_norm": 1.5026274052311877, + "learning_rate": 7.895054566988924e-06, + "loss": 0.557671308517456, + "step": 3345 + }, + { + "epoch": 1.1790308370044054, + "grad_norm": 1.6193785911353318, + "learning_rate": 7.889359068933122e-06, + "loss": 0.4550681710243225, + "step": 3346 + }, + { + "epoch": 1.1793832599118943, + "grad_norm": 1.7532225132073032, + "learning_rate": 7.883664287374235e-06, + "loss": 0.6417531967163086, + "step": 3347 + }, + { + "epoch": 1.1797356828193832, + "grad_norm": 2.046641045277204, + "learning_rate": 7.877970224245458e-06, + "loss": 0.703549861907959, + "step": 3348 + }, + { + "epoch": 1.1800881057268722, + "grad_norm": 1.9966595548369739, + "learning_rate": 7.87227688147975e-06, + "loss": 0.7438976764678955, + "step": 3349 + }, + { + "epoch": 1.1804405286343613, + "grad_norm": 1.9757665254478705, + "learning_rate": 7.866584261009823e-06, + "loss": 0.5563932657241821, + "step": 3350 + }, + { + "epoch": 1.1807929515418503, + "grad_norm": 1.9705828017858218, + "learning_rate": 7.860892364768145e-06, + "loss": 0.6332740783691406, + "step": 3351 + }, + { + "epoch": 1.1811453744493392, + "grad_norm": 1.6800252042998722, + "learning_rate": 7.855201194686938e-06, + "loss": 0.5207923650741577, + "step": 3352 + }, + { + "epoch": 1.1814977973568281, + "grad_norm": 1.704285155728578, + "learning_rate": 7.849510752698179e-06, + "loss": 0.5930209755897522, + "step": 3353 + }, + { + "epoch": 1.1818502202643173, + "grad_norm": 1.9626347095192314, + "learning_rate": 7.843821040733588e-06, + "loss": 0.6207472085952759, + "step": 3354 + }, + { + "epoch": 1.1822026431718062, + "grad_norm": 1.631891920380694, + "learning_rate": 7.838132060724657e-06, + "loss": 0.5487867593765259, + "step": 3355 + }, + { + "epoch": 1.1825550660792952, + "grad_norm": 1.719446635213068, + "learning_rate": 7.83244381460261e-06, + "loss": 0.5457941889762878, + "step": 3356 + }, + { + "epoch": 1.182907488986784, + "grad_norm": 4.79087339281713, + "learning_rate": 7.826756304298428e-06, + "loss": 0.5203769207000732, + "step": 3357 + }, + { + "epoch": 1.183259911894273, + "grad_norm": 2.2130523974851006, + "learning_rate": 7.821069531742848e-06, + "loss": 0.7241770029067993, + "step": 3358 + }, + { + "epoch": 1.1836123348017622, + "grad_norm": 1.872241533824603, + "learning_rate": 7.815383498866351e-06, + "loss": 0.5085904598236084, + "step": 3359 + }, + { + "epoch": 1.1839647577092511, + "grad_norm": 1.7457024495825946, + "learning_rate": 7.80969820759917e-06, + "loss": 0.6219276785850525, + "step": 3360 + }, + { + "epoch": 1.18431718061674, + "grad_norm": 1.657619548935653, + "learning_rate": 7.804013659871286e-06, + "loss": 0.5621576309204102, + "step": 3361 + }, + { + "epoch": 1.184669603524229, + "grad_norm": 2.006942738555184, + "learning_rate": 7.798329857612415e-06, + "loss": 0.6862529516220093, + "step": 3362 + }, + { + "epoch": 1.1850220264317182, + "grad_norm": 1.6254700608957282, + "learning_rate": 7.792646802752045e-06, + "loss": 0.5536706447601318, + "step": 3363 + }, + { + "epoch": 1.185374449339207, + "grad_norm": 1.8365676060407183, + "learning_rate": 7.786964497219389e-06, + "loss": 0.7158493995666504, + "step": 3364 + }, + { + "epoch": 1.185726872246696, + "grad_norm": 1.5882377854785632, + "learning_rate": 7.781282942943411e-06, + "loss": 0.6510338187217712, + "step": 3365 + }, + { + "epoch": 1.186079295154185, + "grad_norm": 1.6887309758558333, + "learning_rate": 7.775602141852827e-06, + "loss": 0.4999651312828064, + "step": 3366 + }, + { + "epoch": 1.186431718061674, + "grad_norm": 1.7482854003458987, + "learning_rate": 7.769922095876088e-06, + "loss": 0.566371738910675, + "step": 3367 + }, + { + "epoch": 1.186784140969163, + "grad_norm": 1.8523910267151578, + "learning_rate": 7.764242806941396e-06, + "loss": 0.6424880623817444, + "step": 3368 + }, + { + "epoch": 1.187136563876652, + "grad_norm": 1.7770666290685069, + "learning_rate": 7.758564276976696e-06, + "loss": 0.6731792688369751, + "step": 3369 + }, + { + "epoch": 1.187488986784141, + "grad_norm": 1.8284341736993877, + "learning_rate": 7.752886507909661e-06, + "loss": 0.7350698113441467, + "step": 3370 + }, + { + "epoch": 1.1878414096916299, + "grad_norm": 1.6211597569244138, + "learning_rate": 7.747209501667729e-06, + "loss": 0.49212586879730225, + "step": 3371 + }, + { + "epoch": 1.188193832599119, + "grad_norm": 1.8399284999038652, + "learning_rate": 7.741533260178058e-06, + "loss": 0.46775591373443604, + "step": 3372 + }, + { + "epoch": 1.188546255506608, + "grad_norm": 1.9173381710912725, + "learning_rate": 7.73585778536756e-06, + "loss": 0.7006367444992065, + "step": 3373 + }, + { + "epoch": 1.188898678414097, + "grad_norm": 1.9011259462553447, + "learning_rate": 7.730183079162882e-06, + "loss": 0.6403789520263672, + "step": 3374 + }, + { + "epoch": 1.1892511013215858, + "grad_norm": 1.7192698764020407, + "learning_rate": 7.724509143490409e-06, + "loss": 0.5788881778717041, + "step": 3375 + }, + { + "epoch": 1.1896035242290748, + "grad_norm": 1.8160886708158774, + "learning_rate": 7.718835980276265e-06, + "loss": 0.5216118693351746, + "step": 3376 + }, + { + "epoch": 1.189955947136564, + "grad_norm": 1.8022868379388808, + "learning_rate": 7.713163591446318e-06, + "loss": 0.5951248407363892, + "step": 3377 + }, + { + "epoch": 1.1903083700440529, + "grad_norm": 1.7460515067285554, + "learning_rate": 7.707491978926157e-06, + "loss": 0.4975050687789917, + "step": 3378 + }, + { + "epoch": 1.1906607929515418, + "grad_norm": 1.770763460120106, + "learning_rate": 7.701821144641127e-06, + "loss": 0.6019243001937866, + "step": 3379 + }, + { + "epoch": 1.1910132158590307, + "grad_norm": 1.7832166509700509, + "learning_rate": 7.696151090516292e-06, + "loss": 0.6395450830459595, + "step": 3380 + }, + { + "epoch": 1.19136563876652, + "grad_norm": 1.6347986183513594, + "learning_rate": 7.690481818476468e-06, + "loss": 0.579787015914917, + "step": 3381 + }, + { + "epoch": 1.1917180616740088, + "grad_norm": 1.4744637046036069, + "learning_rate": 7.684813330446191e-06, + "loss": 0.5136005878448486, + "step": 3382 + }, + { + "epoch": 1.1920704845814978, + "grad_norm": 1.7266158280823927, + "learning_rate": 7.679145628349734e-06, + "loss": 0.6639782190322876, + "step": 3383 + }, + { + "epoch": 1.1924229074889867, + "grad_norm": 1.8900727159770023, + "learning_rate": 7.673478714111111e-06, + "loss": 0.5575984716415405, + "step": 3384 + }, + { + "epoch": 1.1927753303964757, + "grad_norm": 2.0885094289190658, + "learning_rate": 7.667812589654062e-06, + "loss": 0.6456045508384705, + "step": 3385 + }, + { + "epoch": 1.1931277533039648, + "grad_norm": 1.9286041654650978, + "learning_rate": 7.662147256902055e-06, + "loss": 0.6936196088790894, + "step": 3386 + }, + { + "epoch": 1.1934801762114537, + "grad_norm": 1.758654368664718, + "learning_rate": 7.656482717778299e-06, + "loss": 0.5490384697914124, + "step": 3387 + }, + { + "epoch": 1.1938325991189427, + "grad_norm": 1.9621511017976598, + "learning_rate": 7.650818974205727e-06, + "loss": 0.6973621845245361, + "step": 3388 + }, + { + "epoch": 1.1941850220264318, + "grad_norm": 1.835769632858156, + "learning_rate": 7.645156028107005e-06, + "loss": 0.7471047639846802, + "step": 3389 + }, + { + "epoch": 1.1945374449339208, + "grad_norm": 1.7902415027725214, + "learning_rate": 7.639493881404526e-06, + "loss": 0.6205108165740967, + "step": 3390 + }, + { + "epoch": 1.1948898678414097, + "grad_norm": 1.6920866725907067, + "learning_rate": 7.63383253602041e-06, + "loss": 0.747038722038269, + "step": 3391 + }, + { + "epoch": 1.1952422907488987, + "grad_norm": 1.5771320255200836, + "learning_rate": 7.628171993876514e-06, + "loss": 0.5185794830322266, + "step": 3392 + }, + { + "epoch": 1.1955947136563876, + "grad_norm": 1.6878325344643712, + "learning_rate": 7.6225122568944124e-06, + "loss": 0.6059385538101196, + "step": 3393 + }, + { + "epoch": 1.1959471365638767, + "grad_norm": 1.6275144870635614, + "learning_rate": 7.6168533269954045e-06, + "loss": 0.5154507160186768, + "step": 3394 + }, + { + "epoch": 1.1962995594713657, + "grad_norm": 1.8584269669132367, + "learning_rate": 7.611195206100529e-06, + "loss": 0.684306263923645, + "step": 3395 + }, + { + "epoch": 1.1966519823788546, + "grad_norm": 1.60676147024925, + "learning_rate": 7.605537896130537e-06, + "loss": 0.5637205839157104, + "step": 3396 + }, + { + "epoch": 1.1970044052863436, + "grad_norm": 2.099988274984523, + "learning_rate": 7.599881399005913e-06, + "loss": 0.700809121131897, + "step": 3397 + }, + { + "epoch": 1.1973568281938327, + "grad_norm": 1.8285381374549698, + "learning_rate": 7.594225716646859e-06, + "loss": 0.45139041543006897, + "step": 3398 + }, + { + "epoch": 1.1977092511013216, + "grad_norm": 1.9616153744225684, + "learning_rate": 7.588570850973301e-06, + "loss": 0.6623016595840454, + "step": 3399 + }, + { + "epoch": 1.1980616740088106, + "grad_norm": 1.5510325285611402, + "learning_rate": 7.582916803904899e-06, + "loss": 0.47430598735809326, + "step": 3400 + }, + { + "epoch": 1.1984140969162995, + "grad_norm": 1.7180906175268718, + "learning_rate": 7.57726357736101e-06, + "loss": 0.7190637588500977, + "step": 3401 + }, + { + "epoch": 1.1987665198237885, + "grad_norm": 1.4703339836450204, + "learning_rate": 7.571611173260747e-06, + "loss": 0.552079439163208, + "step": 3402 + }, + { + "epoch": 1.1991189427312776, + "grad_norm": 1.665813020849203, + "learning_rate": 7.565959593522914e-06, + "loss": 0.5499744415283203, + "step": 3403 + }, + { + "epoch": 1.1994713656387666, + "grad_norm": 1.6507149154277247, + "learning_rate": 7.560308840066046e-06, + "loss": 0.6013774871826172, + "step": 3404 + }, + { + "epoch": 1.1998237885462555, + "grad_norm": 1.5847999964914972, + "learning_rate": 7.554658914808404e-06, + "loss": 0.5489538908004761, + "step": 3405 + }, + { + "epoch": 1.2001762114537444, + "grad_norm": 1.72263968265959, + "learning_rate": 7.549009819667956e-06, + "loss": 0.6124382615089417, + "step": 3406 + }, + { + "epoch": 1.2005286343612336, + "grad_norm": 2.1073738195754594, + "learning_rate": 7.543361556562397e-06, + "loss": 0.6895862817764282, + "step": 3407 + }, + { + "epoch": 1.2008810572687225, + "grad_norm": 2.063900978481081, + "learning_rate": 7.537714127409139e-06, + "loss": 0.6632197499275208, + "step": 3408 + }, + { + "epoch": 1.2012334801762115, + "grad_norm": 1.6352648722318401, + "learning_rate": 7.5320675341253e-06, + "loss": 0.5940145254135132, + "step": 3409 + }, + { + "epoch": 1.2015859030837004, + "grad_norm": 1.884013328310988, + "learning_rate": 7.526421778627735e-06, + "loss": 0.646323561668396, + "step": 3410 + }, + { + "epoch": 1.2019383259911893, + "grad_norm": 1.7070941231545174, + "learning_rate": 7.520776862832993e-06, + "loss": 0.6173659563064575, + "step": 3411 + }, + { + "epoch": 1.2022907488986785, + "grad_norm": 1.8582208465763577, + "learning_rate": 7.515132788657347e-06, + "loss": 0.574191689491272, + "step": 3412 + }, + { + "epoch": 1.2026431718061674, + "grad_norm": 1.9220370982111243, + "learning_rate": 7.50948955801679e-06, + "loss": 0.6243089437484741, + "step": 3413 + }, + { + "epoch": 1.2029955947136564, + "grad_norm": 1.7949632694678572, + "learning_rate": 7.503847172827022e-06, + "loss": 0.692270040512085, + "step": 3414 + }, + { + "epoch": 1.2033480176211453, + "grad_norm": 1.6803082040464332, + "learning_rate": 7.498205635003451e-06, + "loss": 0.5929970145225525, + "step": 3415 + }, + { + "epoch": 1.2037004405286345, + "grad_norm": 1.6077232593078599, + "learning_rate": 7.4925649464612126e-06, + "loss": 0.5479272603988647, + "step": 3416 + }, + { + "epoch": 1.2040528634361234, + "grad_norm": 1.5415384890909907, + "learning_rate": 7.486925109115135e-06, + "loss": 0.5923635363578796, + "step": 3417 + }, + { + "epoch": 1.2044052863436123, + "grad_norm": 1.7506756122488851, + "learning_rate": 7.48128612487978e-06, + "loss": 0.6530192494392395, + "step": 3418 + }, + { + "epoch": 1.2047577092511013, + "grad_norm": 1.533550542452438, + "learning_rate": 7.475647995669397e-06, + "loss": 0.5104716420173645, + "step": 3419 + }, + { + "epoch": 1.2051101321585902, + "grad_norm": 1.8415327152950194, + "learning_rate": 7.470010723397958e-06, + "loss": 0.6526790261268616, + "step": 3420 + }, + { + "epoch": 1.2054625550660794, + "grad_norm": 1.746747219195987, + "learning_rate": 7.464374309979143e-06, + "loss": 0.5985254645347595, + "step": 3421 + }, + { + "epoch": 1.2058149779735683, + "grad_norm": 1.9679342498420438, + "learning_rate": 7.458738757326336e-06, + "loss": 0.6575271487236023, + "step": 3422 + }, + { + "epoch": 1.2061674008810572, + "grad_norm": 1.7353179250025277, + "learning_rate": 7.453104067352637e-06, + "loss": 0.5906708836555481, + "step": 3423 + }, + { + "epoch": 1.2065198237885462, + "grad_norm": 1.7518769855954601, + "learning_rate": 7.4474702419708465e-06, + "loss": 0.7992517352104187, + "step": 3424 + }, + { + "epoch": 1.2068722466960353, + "grad_norm": 1.7067520122752557, + "learning_rate": 7.4418372830934645e-06, + "loss": 0.5935543179512024, + "step": 3425 + }, + { + "epoch": 1.2072246696035243, + "grad_norm": 1.877304862966978, + "learning_rate": 7.436205192632719e-06, + "loss": 0.7166613340377808, + "step": 3426 + }, + { + "epoch": 1.2075770925110132, + "grad_norm": 1.7575954983917004, + "learning_rate": 7.430573972500519e-06, + "loss": 0.5254578590393066, + "step": 3427 + }, + { + "epoch": 1.2079295154185021, + "grad_norm": 1.7449214411247376, + "learning_rate": 7.42494362460849e-06, + "loss": 0.6586379408836365, + "step": 3428 + }, + { + "epoch": 1.208281938325991, + "grad_norm": 1.7864206478373184, + "learning_rate": 7.419314150867964e-06, + "loss": 0.6960606575012207, + "step": 3429 + }, + { + "epoch": 1.2086343612334802, + "grad_norm": 1.7557785377406303, + "learning_rate": 7.413685553189969e-06, + "loss": 0.6107728481292725, + "step": 3430 + }, + { + "epoch": 1.2089867841409692, + "grad_norm": 1.624755754090177, + "learning_rate": 7.408057833485241e-06, + "loss": 0.6446499824523926, + "step": 3431 + }, + { + "epoch": 1.209339207048458, + "grad_norm": 1.9153166988080477, + "learning_rate": 7.402430993664216e-06, + "loss": 0.7070472240447998, + "step": 3432 + }, + { + "epoch": 1.2096916299559473, + "grad_norm": 2.004011228140917, + "learning_rate": 7.396805035637023e-06, + "loss": 0.5919365882873535, + "step": 3433 + }, + { + "epoch": 1.2100440528634362, + "grad_norm": 1.7861550041093852, + "learning_rate": 7.391179961313512e-06, + "loss": 0.5975243449211121, + "step": 3434 + }, + { + "epoch": 1.2103964757709251, + "grad_norm": 1.6863010997131964, + "learning_rate": 7.385555772603212e-06, + "loss": 0.5772840976715088, + "step": 3435 + }, + { + "epoch": 1.210748898678414, + "grad_norm": 1.8451401620227157, + "learning_rate": 7.379932471415362e-06, + "loss": 0.7335072755813599, + "step": 3436 + }, + { + "epoch": 1.211101321585903, + "grad_norm": 2.0255796426124877, + "learning_rate": 7.3743100596589e-06, + "loss": 0.6214553713798523, + "step": 3437 + }, + { + "epoch": 1.2114537444933922, + "grad_norm": 1.8204785128516552, + "learning_rate": 7.368688539242457e-06, + "loss": 0.6515316963195801, + "step": 3438 + }, + { + "epoch": 1.211806167400881, + "grad_norm": 1.778475729690813, + "learning_rate": 7.3630679120743665e-06, + "loss": 0.6479551196098328, + "step": 3439 + }, + { + "epoch": 1.21215859030837, + "grad_norm": 1.8992442060407408, + "learning_rate": 7.357448180062657e-06, + "loss": 0.6195069551467896, + "step": 3440 + }, + { + "epoch": 1.212511013215859, + "grad_norm": 1.8044588174946172, + "learning_rate": 7.351829345115047e-06, + "loss": 0.5939193964004517, + "step": 3441 + }, + { + "epoch": 1.2128634361233481, + "grad_norm": 1.7404213735338998, + "learning_rate": 7.346211409138964e-06, + "loss": 0.6346434354782104, + "step": 3442 + }, + { + "epoch": 1.213215859030837, + "grad_norm": 1.7854241859310716, + "learning_rate": 7.340594374041516e-06, + "loss": 0.5924171209335327, + "step": 3443 + }, + { + "epoch": 1.213568281938326, + "grad_norm": 1.4550427635518266, + "learning_rate": 7.334978241729514e-06, + "loss": 0.48560285568237305, + "step": 3444 + }, + { + "epoch": 1.213920704845815, + "grad_norm": 2.0456790867838865, + "learning_rate": 7.329363014109463e-06, + "loss": 0.643998384475708, + "step": 3445 + }, + { + "epoch": 1.2142731277533039, + "grad_norm": 1.9340204732587762, + "learning_rate": 7.323748693087551e-06, + "loss": 0.6041159629821777, + "step": 3446 + }, + { + "epoch": 1.214625550660793, + "grad_norm": 1.991943883280592, + "learning_rate": 7.318135280569674e-06, + "loss": 0.7143498659133911, + "step": 3447 + }, + { + "epoch": 1.214977973568282, + "grad_norm": 1.910490525820005, + "learning_rate": 7.312522778461409e-06, + "loss": 0.5821564197540283, + "step": 3448 + }, + { + "epoch": 1.215330396475771, + "grad_norm": 1.9609409525419488, + "learning_rate": 7.3069111886680166e-06, + "loss": 0.5786745548248291, + "step": 3449 + }, + { + "epoch": 1.2156828193832598, + "grad_norm": 1.7004659993753848, + "learning_rate": 7.3013005130944666e-06, + "loss": 0.6740534901618958, + "step": 3450 + }, + { + "epoch": 1.216035242290749, + "grad_norm": 1.9264837774532027, + "learning_rate": 7.2956907536454045e-06, + "loss": 0.6353983879089355, + "step": 3451 + }, + { + "epoch": 1.216387665198238, + "grad_norm": 1.6467978200520468, + "learning_rate": 7.290081912225172e-06, + "loss": 0.6890027523040771, + "step": 3452 + }, + { + "epoch": 1.2167400881057269, + "grad_norm": 2.194089687314607, + "learning_rate": 7.284473990737795e-06, + "loss": 0.6485118269920349, + "step": 3453 + }, + { + "epoch": 1.2170925110132158, + "grad_norm": 1.8020323615419078, + "learning_rate": 7.2788669910869845e-06, + "loss": 0.5364162921905518, + "step": 3454 + }, + { + "epoch": 1.2174449339207047, + "grad_norm": 1.8770204171846867, + "learning_rate": 7.27326091517615e-06, + "loss": 0.6625754833221436, + "step": 3455 + }, + { + "epoch": 1.217797356828194, + "grad_norm": 1.9138778572255513, + "learning_rate": 7.267655764908374e-06, + "loss": 0.7090050578117371, + "step": 3456 + }, + { + "epoch": 1.2181497797356828, + "grad_norm": 1.7151154871040917, + "learning_rate": 7.26205154218643e-06, + "loss": 0.6556301116943359, + "step": 3457 + }, + { + "epoch": 1.2185022026431718, + "grad_norm": 2.12213118759585, + "learning_rate": 7.2564482489127815e-06, + "loss": 0.7998625636100769, + "step": 3458 + }, + { + "epoch": 1.2188546255506607, + "grad_norm": 1.8721449700246833, + "learning_rate": 7.250845886989568e-06, + "loss": 0.6336952447891235, + "step": 3459 + }, + { + "epoch": 1.2192070484581499, + "grad_norm": 1.7786932342182031, + "learning_rate": 7.245244458318621e-06, + "loss": 0.5072300434112549, + "step": 3460 + }, + { + "epoch": 1.2195594713656388, + "grad_norm": 1.9350920817100896, + "learning_rate": 7.23964396480145e-06, + "loss": 0.6297830939292908, + "step": 3461 + }, + { + "epoch": 1.2199118942731277, + "grad_norm": 1.7384183002767206, + "learning_rate": 7.234044408339243e-06, + "loss": 0.5560386180877686, + "step": 3462 + }, + { + "epoch": 1.2202643171806167, + "grad_norm": 1.7834281461054429, + "learning_rate": 7.228445790832885e-06, + "loss": 0.5180274844169617, + "step": 3463 + }, + { + "epoch": 1.2206167400881056, + "grad_norm": 1.5903839847735544, + "learning_rate": 7.222848114182926e-06, + "loss": 0.4870688319206238, + "step": 3464 + }, + { + "epoch": 1.2209691629955948, + "grad_norm": 1.5913924611315027, + "learning_rate": 7.217251380289602e-06, + "loss": 0.46914681792259216, + "step": 3465 + }, + { + "epoch": 1.2213215859030837, + "grad_norm": 1.6510218664086935, + "learning_rate": 7.211655591052833e-06, + "loss": 0.5980997085571289, + "step": 3466 + }, + { + "epoch": 1.2216740088105726, + "grad_norm": 2.0761228855668468, + "learning_rate": 7.206060748372212e-06, + "loss": 0.5982732772827148, + "step": 3467 + }, + { + "epoch": 1.2220264317180616, + "grad_norm": 1.5384750193393883, + "learning_rate": 7.200466854147019e-06, + "loss": 0.612629771232605, + "step": 3468 + }, + { + "epoch": 1.2223788546255507, + "grad_norm": 1.6776022561511, + "learning_rate": 7.194873910276205e-06, + "loss": 0.606558084487915, + "step": 3469 + }, + { + "epoch": 1.2227312775330397, + "grad_norm": 2.093853594654106, + "learning_rate": 7.189281918658396e-06, + "loss": 0.7133803367614746, + "step": 3470 + }, + { + "epoch": 1.2230837004405286, + "grad_norm": 1.737492396211302, + "learning_rate": 7.183690881191908e-06, + "loss": 0.5640908479690552, + "step": 3471 + }, + { + "epoch": 1.2234361233480175, + "grad_norm": 1.9131350962270206, + "learning_rate": 7.178100799774717e-06, + "loss": 0.6376210451126099, + "step": 3472 + }, + { + "epoch": 1.2237885462555067, + "grad_norm": 1.7418892302924867, + "learning_rate": 7.172511676304481e-06, + "loss": 0.6207184791564941, + "step": 3473 + }, + { + "epoch": 1.2241409691629956, + "grad_norm": 2.0136397077316133, + "learning_rate": 7.166923512678538e-06, + "loss": 0.47848421335220337, + "step": 3474 + }, + { + "epoch": 1.2244933920704846, + "grad_norm": 1.89946756738985, + "learning_rate": 7.161336310793894e-06, + "loss": 0.6052829027175903, + "step": 3475 + }, + { + "epoch": 1.2248458149779735, + "grad_norm": 1.968672987503914, + "learning_rate": 7.155750072547229e-06, + "loss": 0.6050940155982971, + "step": 3476 + }, + { + "epoch": 1.2251982378854627, + "grad_norm": 2.566995671782078, + "learning_rate": 7.150164799834902e-06, + "loss": 0.6121659278869629, + "step": 3477 + }, + { + "epoch": 1.2255506607929516, + "grad_norm": 1.9679344001124786, + "learning_rate": 7.144580494552929e-06, + "loss": 0.6886739730834961, + "step": 3478 + }, + { + "epoch": 1.2259030837004405, + "grad_norm": 1.5760234299307694, + "learning_rate": 7.13899715859702e-06, + "loss": 0.5001103281974792, + "step": 3479 + }, + { + "epoch": 1.2262555066079295, + "grad_norm": 2.1260048612910216, + "learning_rate": 7.133414793862532e-06, + "loss": 0.5948734283447266, + "step": 3480 + }, + { + "epoch": 1.2266079295154184, + "grad_norm": 2.593831579740968, + "learning_rate": 7.127833402244515e-06, + "loss": 0.6179298162460327, + "step": 3481 + }, + { + "epoch": 1.2269603524229076, + "grad_norm": 1.6926296837265904, + "learning_rate": 7.122252985637672e-06, + "loss": 0.5543676614761353, + "step": 3482 + }, + { + "epoch": 1.2273127753303965, + "grad_norm": 1.6008632106545562, + "learning_rate": 7.116673545936379e-06, + "loss": 0.6279658079147339, + "step": 3483 + }, + { + "epoch": 1.2276651982378854, + "grad_norm": 1.5383086530060461, + "learning_rate": 7.111095085034687e-06, + "loss": 0.6692230701446533, + "step": 3484 + }, + { + "epoch": 1.2280176211453744, + "grad_norm": 1.7218507243355061, + "learning_rate": 7.1055176048263085e-06, + "loss": 0.6124502420425415, + "step": 3485 + }, + { + "epoch": 1.2283700440528635, + "grad_norm": 2.0325469007846007, + "learning_rate": 7.09994110720462e-06, + "loss": 0.6241810321807861, + "step": 3486 + }, + { + "epoch": 1.2287224669603525, + "grad_norm": 1.7620353767255947, + "learning_rate": 7.094365594062675e-06, + "loss": 0.6556589603424072, + "step": 3487 + }, + { + "epoch": 1.2290748898678414, + "grad_norm": 1.660185756567605, + "learning_rate": 7.0887910672931815e-06, + "loss": 0.480433851480484, + "step": 3488 + }, + { + "epoch": 1.2294273127753303, + "grad_norm": 1.7666817554476708, + "learning_rate": 7.083217528788524e-06, + "loss": 0.6198803782463074, + "step": 3489 + }, + { + "epoch": 1.2297797356828193, + "grad_norm": 1.7945939958355666, + "learning_rate": 7.077644980440741e-06, + "loss": 0.6368751525878906, + "step": 3490 + }, + { + "epoch": 1.2301321585903084, + "grad_norm": 1.904999974616483, + "learning_rate": 7.072073424141538e-06, + "loss": 0.5992522239685059, + "step": 3491 + }, + { + "epoch": 1.2304845814977974, + "grad_norm": 1.6441410368294835, + "learning_rate": 7.066502861782289e-06, + "loss": 0.5917885303497314, + "step": 3492 + }, + { + "epoch": 1.2308370044052863, + "grad_norm": 1.9090985571817867, + "learning_rate": 7.060933295254027e-06, + "loss": 0.615925669670105, + "step": 3493 + }, + { + "epoch": 1.2311894273127753, + "grad_norm": 1.5510149338562214, + "learning_rate": 7.055364726447437e-06, + "loss": 0.4408820867538452, + "step": 3494 + }, + { + "epoch": 1.2315418502202644, + "grad_norm": 1.706805010144051, + "learning_rate": 7.049797157252889e-06, + "loss": 0.4918386936187744, + "step": 3495 + }, + { + "epoch": 1.2318942731277533, + "grad_norm": 2.0047166519470965, + "learning_rate": 7.0442305895603844e-06, + "loss": 0.6964970827102661, + "step": 3496 + }, + { + "epoch": 1.2322466960352423, + "grad_norm": 1.993882373770559, + "learning_rate": 7.038665025259615e-06, + "loss": 0.5269606113433838, + "step": 3497 + }, + { + "epoch": 1.2325991189427312, + "grad_norm": 1.7338430673292662, + "learning_rate": 7.033100466239908e-06, + "loss": 0.6146842241287231, + "step": 3498 + }, + { + "epoch": 1.2329515418502202, + "grad_norm": 1.8958783101408965, + "learning_rate": 7.027536914390257e-06, + "loss": 0.7163739800453186, + "step": 3499 + }, + { + "epoch": 1.2333039647577093, + "grad_norm": 1.5575657818438158, + "learning_rate": 7.021974371599318e-06, + "loss": 0.5851477980613708, + "step": 3500 + }, + { + "epoch": 1.2336563876651983, + "grad_norm": 1.3831914970718109, + "learning_rate": 7.0164128397554e-06, + "loss": 0.585768461227417, + "step": 3501 + }, + { + "epoch": 1.2340088105726872, + "grad_norm": 1.651121323438745, + "learning_rate": 7.0108523207464706e-06, + "loss": 0.5467718839645386, + "step": 3502 + }, + { + "epoch": 1.2343612334801761, + "grad_norm": 1.8179588757324485, + "learning_rate": 7.0052928164601564e-06, + "loss": 0.638299822807312, + "step": 3503 + }, + { + "epoch": 1.2347136563876653, + "grad_norm": 1.8158584952636452, + "learning_rate": 6.9997343287837275e-06, + "loss": 0.6737650036811829, + "step": 3504 + }, + { + "epoch": 1.2350660792951542, + "grad_norm": 1.7619528960945736, + "learning_rate": 6.9941768596041224e-06, + "loss": 0.6659837961196899, + "step": 3505 + }, + { + "epoch": 1.2354185022026432, + "grad_norm": 1.9059656133131788, + "learning_rate": 6.988620410807932e-06, + "loss": 0.6731020212173462, + "step": 3506 + }, + { + "epoch": 1.235770925110132, + "grad_norm": 1.8111638058637756, + "learning_rate": 6.983064984281389e-06, + "loss": 0.6236598491668701, + "step": 3507 + }, + { + "epoch": 1.236123348017621, + "grad_norm": 1.8485171900570894, + "learning_rate": 6.9775105819103985e-06, + "loss": 0.6233193874359131, + "step": 3508 + }, + { + "epoch": 1.2364757709251102, + "grad_norm": 1.7456936175280036, + "learning_rate": 6.971957205580497e-06, + "loss": 0.5914918184280396, + "step": 3509 + }, + { + "epoch": 1.2368281938325991, + "grad_norm": 2.069060854376664, + "learning_rate": 6.966404857176893e-06, + "loss": 0.6576484441757202, + "step": 3510 + }, + { + "epoch": 1.237180616740088, + "grad_norm": 1.6371442891988068, + "learning_rate": 6.960853538584431e-06, + "loss": 0.5609208941459656, + "step": 3511 + }, + { + "epoch": 1.2375330396475772, + "grad_norm": 1.8336206343046235, + "learning_rate": 6.955303251687609e-06, + "loss": 0.6405455470085144, + "step": 3512 + }, + { + "epoch": 1.2378854625550662, + "grad_norm": 1.6981959386126726, + "learning_rate": 6.949753998370579e-06, + "loss": 0.5621844530105591, + "step": 3513 + }, + { + "epoch": 1.238237885462555, + "grad_norm": 1.6040361718583698, + "learning_rate": 6.944205780517138e-06, + "loss": 0.5674207210540771, + "step": 3514 + }, + { + "epoch": 1.238590308370044, + "grad_norm": 1.8089615708578142, + "learning_rate": 6.938658600010734e-06, + "loss": 0.6744752526283264, + "step": 3515 + }, + { + "epoch": 1.238942731277533, + "grad_norm": 1.851260674535246, + "learning_rate": 6.9331124587344655e-06, + "loss": 0.537495493888855, + "step": 3516 + }, + { + "epoch": 1.2392951541850221, + "grad_norm": 1.7599394880527937, + "learning_rate": 6.92756735857107e-06, + "loss": 0.8405104875564575, + "step": 3517 + }, + { + "epoch": 1.239647577092511, + "grad_norm": 1.7838209985249966, + "learning_rate": 6.92202330140294e-06, + "loss": 0.6751723885536194, + "step": 3518 + }, + { + "epoch": 1.24, + "grad_norm": 1.8012761946666955, + "learning_rate": 6.9164802891121105e-06, + "loss": 0.5763178467750549, + "step": 3519 + }, + { + "epoch": 1.240352422907489, + "grad_norm": 1.7859481797599979, + "learning_rate": 6.910938323580256e-06, + "loss": 0.7713793516159058, + "step": 3520 + }, + { + "epoch": 1.240704845814978, + "grad_norm": 2.0598557028652356, + "learning_rate": 6.90539740668871e-06, + "loss": 0.6354435682296753, + "step": 3521 + }, + { + "epoch": 1.241057268722467, + "grad_norm": 1.6780280463346202, + "learning_rate": 6.899857540318434e-06, + "loss": 0.5121721625328064, + "step": 3522 + }, + { + "epoch": 1.241409691629956, + "grad_norm": 1.8470903920827393, + "learning_rate": 6.894318726350042e-06, + "loss": 0.586428165435791, + "step": 3523 + }, + { + "epoch": 1.241762114537445, + "grad_norm": 1.690234288859414, + "learning_rate": 6.888780966663792e-06, + "loss": 0.4868311285972595, + "step": 3524 + }, + { + "epoch": 1.2421145374449338, + "grad_norm": 1.7688170320163026, + "learning_rate": 6.883244263139578e-06, + "loss": 0.7057775259017944, + "step": 3525 + }, + { + "epoch": 1.242466960352423, + "grad_norm": 1.630207980484645, + "learning_rate": 6.877708617656942e-06, + "loss": 0.4993360638618469, + "step": 3526 + }, + { + "epoch": 1.242819383259912, + "grad_norm": 1.7093781024880734, + "learning_rate": 6.872174032095061e-06, + "loss": 0.6096793413162231, + "step": 3527 + }, + { + "epoch": 1.2431718061674009, + "grad_norm": 1.7005141830755592, + "learning_rate": 6.866640508332751e-06, + "loss": 0.584385871887207, + "step": 3528 + }, + { + "epoch": 1.2435242290748898, + "grad_norm": 1.6033098221924098, + "learning_rate": 6.861108048248477e-06, + "loss": 0.5857449173927307, + "step": 3529 + }, + { + "epoch": 1.243876651982379, + "grad_norm": 1.6447411339873705, + "learning_rate": 6.855576653720333e-06, + "loss": 0.4337875247001648, + "step": 3530 + }, + { + "epoch": 1.244229074889868, + "grad_norm": 1.924557656954366, + "learning_rate": 6.850046326626058e-06, + "loss": 0.6949163675308228, + "step": 3531 + }, + { + "epoch": 1.2445814977973568, + "grad_norm": 2.029468434582643, + "learning_rate": 6.844517068843025e-06, + "loss": 0.5876098871231079, + "step": 3532 + }, + { + "epoch": 1.2449339207048458, + "grad_norm": 2.0143379278356153, + "learning_rate": 6.838988882248243e-06, + "loss": 0.5460488796234131, + "step": 3533 + }, + { + "epoch": 1.2452863436123347, + "grad_norm": 2.284896657447092, + "learning_rate": 6.833461768718365e-06, + "loss": 0.6500875949859619, + "step": 3534 + }, + { + "epoch": 1.2456387665198239, + "grad_norm": 1.9702281980181484, + "learning_rate": 6.82793573012967e-06, + "loss": 0.6504626274108887, + "step": 3535 + }, + { + "epoch": 1.2459911894273128, + "grad_norm": 1.8635901517060365, + "learning_rate": 6.822410768358072e-06, + "loss": 0.6881722211837769, + "step": 3536 + }, + { + "epoch": 1.2463436123348017, + "grad_norm": 1.7111090644899583, + "learning_rate": 6.816886885279132e-06, + "loss": 0.6747599840164185, + "step": 3537 + }, + { + "epoch": 1.2466960352422907, + "grad_norm": 2.61809094535544, + "learning_rate": 6.811364082768028e-06, + "loss": 0.5987570285797119, + "step": 3538 + }, + { + "epoch": 1.2470484581497798, + "grad_norm": 1.8641726073707956, + "learning_rate": 6.8058423626995885e-06, + "loss": 0.6614603996276855, + "step": 3539 + }, + { + "epoch": 1.2474008810572688, + "grad_norm": 1.5529990518062367, + "learning_rate": 6.80032172694826e-06, + "loss": 0.542367696762085, + "step": 3540 + }, + { + "epoch": 1.2477533039647577, + "grad_norm": 1.7771584963866378, + "learning_rate": 6.7948021773881235e-06, + "loss": 0.6200593709945679, + "step": 3541 + }, + { + "epoch": 1.2481057268722466, + "grad_norm": 1.896811225090905, + "learning_rate": 6.789283715892905e-06, + "loss": 0.6425306797027588, + "step": 3542 + }, + { + "epoch": 1.2484581497797356, + "grad_norm": 1.4798584901842344, + "learning_rate": 6.78376634433594e-06, + "loss": 0.5277592539787292, + "step": 3543 + }, + { + "epoch": 1.2488105726872247, + "grad_norm": 1.8357663435279958, + "learning_rate": 6.778250064590206e-06, + "loss": 0.6120523810386658, + "step": 3544 + }, + { + "epoch": 1.2491629955947137, + "grad_norm": 2.0042129559914654, + "learning_rate": 6.772734878528313e-06, + "loss": 0.538428544998169, + "step": 3545 + }, + { + "epoch": 1.2495154185022026, + "grad_norm": 1.7456851140249008, + "learning_rate": 6.76722078802249e-06, + "loss": 0.6439732909202576, + "step": 3546 + }, + { + "epoch": 1.2498678414096915, + "grad_norm": 1.5580174742798336, + "learning_rate": 6.761707794944605e-06, + "loss": 0.5951697826385498, + "step": 3547 + }, + { + "epoch": 1.2502202643171807, + "grad_norm": 1.5461650468928614, + "learning_rate": 6.7561959011661456e-06, + "loss": 0.5548606514930725, + "step": 3548 + }, + { + "epoch": 1.2505726872246696, + "grad_norm": 1.936721806656616, + "learning_rate": 6.750685108558221e-06, + "loss": 0.4768974781036377, + "step": 3549 + }, + { + "epoch": 1.2509251101321586, + "grad_norm": 1.6130866640641843, + "learning_rate": 6.745175418991585e-06, + "loss": 0.6629552245140076, + "step": 3550 + }, + { + "epoch": 1.2512775330396475, + "grad_norm": 1.8826604922139925, + "learning_rate": 6.739666834336599e-06, + "loss": 0.6550329923629761, + "step": 3551 + }, + { + "epoch": 1.2516299559471364, + "grad_norm": 1.7091222991512534, + "learning_rate": 6.734159356463254e-06, + "loss": 0.37340015172958374, + "step": 3552 + }, + { + "epoch": 1.2519823788546256, + "grad_norm": 2.0454082069330424, + "learning_rate": 6.728652987241175e-06, + "loss": 0.6343201398849487, + "step": 3553 + }, + { + "epoch": 1.2523348017621145, + "grad_norm": 1.8938201811077042, + "learning_rate": 6.723147728539596e-06, + "loss": 0.7555221319198608, + "step": 3554 + }, + { + "epoch": 1.2526872246696035, + "grad_norm": 1.7356069524639768, + "learning_rate": 6.717643582227384e-06, + "loss": 0.5944523215293884, + "step": 3555 + }, + { + "epoch": 1.2530396475770926, + "grad_norm": 1.627279375354834, + "learning_rate": 6.71214055017303e-06, + "loss": 0.5686212778091431, + "step": 3556 + }, + { + "epoch": 1.2533920704845816, + "grad_norm": 1.697482530075543, + "learning_rate": 6.706638634244629e-06, + "loss": 0.6401857137680054, + "step": 3557 + }, + { + "epoch": 1.2537444933920705, + "grad_norm": 1.5933991655989903, + "learning_rate": 6.701137836309926e-06, + "loss": 0.4571516513824463, + "step": 3558 + }, + { + "epoch": 1.2540969162995594, + "grad_norm": 1.7606001647916119, + "learning_rate": 6.695638158236255e-06, + "loss": 0.5857570171356201, + "step": 3559 + }, + { + "epoch": 1.2544493392070484, + "grad_norm": 1.7187772621235449, + "learning_rate": 6.690139601890601e-06, + "loss": 0.6981472969055176, + "step": 3560 + }, + { + "epoch": 1.2548017621145373, + "grad_norm": 1.685629147285753, + "learning_rate": 6.684642169139544e-06, + "loss": 0.5120254755020142, + "step": 3561 + }, + { + "epoch": 1.2551541850220265, + "grad_norm": 2.043587366608814, + "learning_rate": 6.67914586184929e-06, + "loss": 0.6975923776626587, + "step": 3562 + }, + { + "epoch": 1.2555066079295154, + "grad_norm": 2.1694224742588233, + "learning_rate": 6.673650681885668e-06, + "loss": 0.5825072526931763, + "step": 3563 + }, + { + "epoch": 1.2558590308370043, + "grad_norm": 1.9388578444875513, + "learning_rate": 6.668156631114124e-06, + "loss": 0.5701749324798584, + "step": 3564 + }, + { + "epoch": 1.2562114537444935, + "grad_norm": 1.6715281124187895, + "learning_rate": 6.662663711399705e-06, + "loss": 0.5230482220649719, + "step": 3565 + }, + { + "epoch": 1.2565638766519824, + "grad_norm": 1.7540798103539514, + "learning_rate": 6.657171924607102e-06, + "loss": 0.6680361032485962, + "step": 3566 + }, + { + "epoch": 1.2569162995594714, + "grad_norm": 1.7792330481880054, + "learning_rate": 6.651681272600592e-06, + "loss": 0.6745159029960632, + "step": 3567 + }, + { + "epoch": 1.2572687224669603, + "grad_norm": 1.5777367956881352, + "learning_rate": 6.646191757244089e-06, + "loss": 0.587162971496582, + "step": 3568 + }, + { + "epoch": 1.2576211453744492, + "grad_norm": 2.0091715660610183, + "learning_rate": 6.640703380401111e-06, + "loss": 0.6170785427093506, + "step": 3569 + }, + { + "epoch": 1.2579735682819384, + "grad_norm": 1.8496931248102404, + "learning_rate": 6.6352161439347875e-06, + "loss": 0.4955494999885559, + "step": 3570 + }, + { + "epoch": 1.2583259911894273, + "grad_norm": 1.8039519732213443, + "learning_rate": 6.62973004970787e-06, + "loss": 0.7183424234390259, + "step": 3571 + }, + { + "epoch": 1.2586784140969163, + "grad_norm": 1.6920151696252388, + "learning_rate": 6.624245099582713e-06, + "loss": 0.6266030669212341, + "step": 3572 + }, + { + "epoch": 1.2590308370044052, + "grad_norm": 1.8260182971737482, + "learning_rate": 6.6187612954212845e-06, + "loss": 0.5234469175338745, + "step": 3573 + }, + { + "epoch": 1.2593832599118944, + "grad_norm": 2.0762206956902234, + "learning_rate": 6.6132786390851725e-06, + "loss": 0.7066231966018677, + "step": 3574 + }, + { + "epoch": 1.2597356828193833, + "grad_norm": 1.8486791061565373, + "learning_rate": 6.60779713243556e-06, + "loss": 0.622086226940155, + "step": 3575 + }, + { + "epoch": 1.2600881057268722, + "grad_norm": 2.003110770323092, + "learning_rate": 6.6023167773332554e-06, + "loss": 0.6607370376586914, + "step": 3576 + }, + { + "epoch": 1.2604405286343612, + "grad_norm": 1.9512971078148649, + "learning_rate": 6.596837575638663e-06, + "loss": 0.6846165657043457, + "step": 3577 + }, + { + "epoch": 1.2607929515418501, + "grad_norm": 2.1137757907106574, + "learning_rate": 6.5913595292118024e-06, + "loss": 0.6329103708267212, + "step": 3578 + }, + { + "epoch": 1.2611453744493393, + "grad_norm": 1.7067433363159659, + "learning_rate": 6.585882639912302e-06, + "loss": 0.7942261695861816, + "step": 3579 + }, + { + "epoch": 1.2614977973568282, + "grad_norm": 1.923592126322299, + "learning_rate": 6.580406909599393e-06, + "loss": 0.5446548461914062, + "step": 3580 + }, + { + "epoch": 1.2618502202643171, + "grad_norm": 2.584270827853736, + "learning_rate": 6.574932340131917e-06, + "loss": 0.581193208694458, + "step": 3581 + }, + { + "epoch": 1.2622026431718063, + "grad_norm": 1.789761494779322, + "learning_rate": 6.569458933368323e-06, + "loss": 0.6099729537963867, + "step": 3582 + }, + { + "epoch": 1.2625550660792952, + "grad_norm": 1.7689292642576144, + "learning_rate": 6.563986691166655e-06, + "loss": 0.45215970277786255, + "step": 3583 + }, + { + "epoch": 1.2629074889867842, + "grad_norm": 1.9037008934232844, + "learning_rate": 6.558515615384573e-06, + "loss": 0.6674731969833374, + "step": 3584 + }, + { + "epoch": 1.2632599118942731, + "grad_norm": 1.4782940862298068, + "learning_rate": 6.553045707879338e-06, + "loss": 0.4951098561286926, + "step": 3585 + }, + { + "epoch": 1.263612334801762, + "grad_norm": 1.7852149202748289, + "learning_rate": 6.54757697050781e-06, + "loss": 0.5853816270828247, + "step": 3586 + }, + { + "epoch": 1.263964757709251, + "grad_norm": 1.5907197274079232, + "learning_rate": 6.5421094051264575e-06, + "loss": 0.5236951112747192, + "step": 3587 + }, + { + "epoch": 1.2643171806167401, + "grad_norm": 1.733068587169355, + "learning_rate": 6.536643013591347e-06, + "loss": 0.5717612504959106, + "step": 3588 + }, + { + "epoch": 1.264669603524229, + "grad_norm": 2.033496211612474, + "learning_rate": 6.531177797758155e-06, + "loss": 0.6144098043441772, + "step": 3589 + }, + { + "epoch": 1.265022026431718, + "grad_norm": 1.6355266077439052, + "learning_rate": 6.525713759482144e-06, + "loss": 0.5634705424308777, + "step": 3590 + }, + { + "epoch": 1.2653744493392072, + "grad_norm": 1.7147225194337798, + "learning_rate": 6.520250900618186e-06, + "loss": 0.582956075668335, + "step": 3591 + }, + { + "epoch": 1.265726872246696, + "grad_norm": 1.843768096592032, + "learning_rate": 6.514789223020754e-06, + "loss": 0.7649297714233398, + "step": 3592 + }, + { + "epoch": 1.266079295154185, + "grad_norm": 1.6261733555902604, + "learning_rate": 6.509328728543918e-06, + "loss": 0.6035098433494568, + "step": 3593 + }, + { + "epoch": 1.266431718061674, + "grad_norm": 1.8493319579504743, + "learning_rate": 6.503869419041344e-06, + "loss": 0.6405705809593201, + "step": 3594 + }, + { + "epoch": 1.266784140969163, + "grad_norm": 2.26304309310324, + "learning_rate": 6.498411296366299e-06, + "loss": 0.674353301525116, + "step": 3595 + }, + { + "epoch": 1.2671365638766519, + "grad_norm": 1.7621656180677492, + "learning_rate": 6.492954362371644e-06, + "loss": 0.6018465757369995, + "step": 3596 + }, + { + "epoch": 1.267488986784141, + "grad_norm": 2.127137234030612, + "learning_rate": 6.487498618909845e-06, + "loss": 0.6491270065307617, + "step": 3597 + }, + { + "epoch": 1.26784140969163, + "grad_norm": 1.6636292273445474, + "learning_rate": 6.4820440678329474e-06, + "loss": 0.5126988887786865, + "step": 3598 + }, + { + "epoch": 1.2681938325991189, + "grad_norm": 1.7884980833676332, + "learning_rate": 6.476590710992605e-06, + "loss": 0.5931694507598877, + "step": 3599 + }, + { + "epoch": 1.268546255506608, + "grad_norm": 1.9386898901162777, + "learning_rate": 6.471138550240066e-06, + "loss": 0.5455423593521118, + "step": 3600 + }, + { + "epoch": 1.268898678414097, + "grad_norm": 1.6361281925349132, + "learning_rate": 6.465687587426166e-06, + "loss": 0.4870053231716156, + "step": 3601 + }, + { + "epoch": 1.269251101321586, + "grad_norm": 1.9069149245463006, + "learning_rate": 6.460237824401337e-06, + "loss": 0.6434903144836426, + "step": 3602 + }, + { + "epoch": 1.2696035242290749, + "grad_norm": 1.676899060774639, + "learning_rate": 6.454789263015609e-06, + "loss": 0.6256476640701294, + "step": 3603 + }, + { + "epoch": 1.2699559471365638, + "grad_norm": 1.8004511475353204, + "learning_rate": 6.449341905118589e-06, + "loss": 0.6304135322570801, + "step": 3604 + }, + { + "epoch": 1.2703083700440527, + "grad_norm": 1.9009929525157667, + "learning_rate": 6.443895752559498e-06, + "loss": 0.5315194725990295, + "step": 3605 + }, + { + "epoch": 1.2706607929515419, + "grad_norm": 1.4321615697348329, + "learning_rate": 6.438450807187127e-06, + "loss": 0.5232852697372437, + "step": 3606 + }, + { + "epoch": 1.2710132158590308, + "grad_norm": 1.6584356511216338, + "learning_rate": 6.433007070849863e-06, + "loss": 0.4462543725967407, + "step": 3607 + }, + { + "epoch": 1.2713656387665198, + "grad_norm": 1.6730765460300174, + "learning_rate": 6.4275645453956945e-06, + "loss": 0.6347709894180298, + "step": 3608 + }, + { + "epoch": 1.271718061674009, + "grad_norm": 1.625329738549371, + "learning_rate": 6.422123232672182e-06, + "loss": 0.5277259349822998, + "step": 3609 + }, + { + "epoch": 1.2720704845814979, + "grad_norm": 1.7954090025098361, + "learning_rate": 6.416683134526486e-06, + "loss": 0.6297650933265686, + "step": 3610 + }, + { + "epoch": 1.2724229074889868, + "grad_norm": 1.7743916636003476, + "learning_rate": 6.411244252805351e-06, + "loss": 0.503609836101532, + "step": 3611 + }, + { + "epoch": 1.2727753303964757, + "grad_norm": 1.7300375262211753, + "learning_rate": 6.405806589355099e-06, + "loss": 0.6026735305786133, + "step": 3612 + }, + { + "epoch": 1.2731277533039647, + "grad_norm": 1.543883502597784, + "learning_rate": 6.400370146021662e-06, + "loss": 0.4918368458747864, + "step": 3613 + }, + { + "epoch": 1.2734801762114538, + "grad_norm": 2.125830682883153, + "learning_rate": 6.394934924650532e-06, + "loss": 0.6215550899505615, + "step": 3614 + }, + { + "epoch": 1.2738325991189428, + "grad_norm": 2.1843858701221563, + "learning_rate": 6.389500927086801e-06, + "loss": 0.6979820728302002, + "step": 3615 + }, + { + "epoch": 1.2741850220264317, + "grad_norm": 1.9168565956279218, + "learning_rate": 6.384068155175143e-06, + "loss": 0.5661836266517639, + "step": 3616 + }, + { + "epoch": 1.2745374449339206, + "grad_norm": 2.2497484972303896, + "learning_rate": 6.378636610759812e-06, + "loss": 0.699792742729187, + "step": 3617 + }, + { + "epoch": 1.2748898678414098, + "grad_norm": 2.1298001613626765, + "learning_rate": 6.373206295684653e-06, + "loss": 0.6418631076812744, + "step": 3618 + }, + { + "epoch": 1.2752422907488987, + "grad_norm": 1.639324838954067, + "learning_rate": 6.3677772117930895e-06, + "loss": 0.4975489675998688, + "step": 3619 + }, + { + "epoch": 1.2755947136563877, + "grad_norm": 1.6787243090627195, + "learning_rate": 6.362349360928117e-06, + "loss": 0.5621567964553833, + "step": 3620 + }, + { + "epoch": 1.2759471365638766, + "grad_norm": 1.9441609125211634, + "learning_rate": 6.356922744932335e-06, + "loss": 0.538573682308197, + "step": 3621 + }, + { + "epoch": 1.2762995594713655, + "grad_norm": 1.8099521315485383, + "learning_rate": 6.351497365647903e-06, + "loss": 0.5726763010025024, + "step": 3622 + }, + { + "epoch": 1.2766519823788547, + "grad_norm": 1.509968688666824, + "learning_rate": 6.346073224916565e-06, + "loss": 0.5911343097686768, + "step": 3623 + }, + { + "epoch": 1.2770044052863436, + "grad_norm": 1.8960352229890238, + "learning_rate": 6.340650324579658e-06, + "loss": 0.6181383728981018, + "step": 3624 + }, + { + "epoch": 1.2773568281938326, + "grad_norm": 1.8065087463718459, + "learning_rate": 6.3352286664780785e-06, + "loss": 0.5941140651702881, + "step": 3625 + }, + { + "epoch": 1.2777092511013217, + "grad_norm": 1.980034412220703, + "learning_rate": 6.329808252452316e-06, + "loss": 0.7604472637176514, + "step": 3626 + }, + { + "epoch": 1.2780616740088107, + "grad_norm": 1.7265138262893938, + "learning_rate": 6.324389084342435e-06, + "loss": 0.6063867211341858, + "step": 3627 + }, + { + "epoch": 1.2784140969162996, + "grad_norm": 1.8844241099487, + "learning_rate": 6.3189711639880644e-06, + "loss": 0.7202302813529968, + "step": 3628 + }, + { + "epoch": 1.2787665198237885, + "grad_norm": 1.7295127580755116, + "learning_rate": 6.313554493228431e-06, + "loss": 0.5934856534004211, + "step": 3629 + }, + { + "epoch": 1.2791189427312775, + "grad_norm": 1.7905829637835577, + "learning_rate": 6.3081390739023175e-06, + "loss": 0.6403088569641113, + "step": 3630 + }, + { + "epoch": 1.2794713656387664, + "grad_norm": 1.9400757232043577, + "learning_rate": 6.302724907848096e-06, + "loss": 0.6679831743240356, + "step": 3631 + }, + { + "epoch": 1.2798237885462556, + "grad_norm": 1.9107919043768602, + "learning_rate": 6.297311996903703e-06, + "loss": 0.6914902329444885, + "step": 3632 + }, + { + "epoch": 1.2801762114537445, + "grad_norm": 1.4865016000129294, + "learning_rate": 6.2919003429066535e-06, + "loss": 0.5391600131988525, + "step": 3633 + }, + { + "epoch": 1.2805286343612334, + "grad_norm": 1.7774288854868727, + "learning_rate": 6.286489947694041e-06, + "loss": 0.5740962028503418, + "step": 3634 + }, + { + "epoch": 1.2808810572687226, + "grad_norm": 1.9144175178404335, + "learning_rate": 6.281080813102523e-06, + "loss": 0.6497045159339905, + "step": 3635 + }, + { + "epoch": 1.2812334801762115, + "grad_norm": 1.6649274023798961, + "learning_rate": 6.275672940968326e-06, + "loss": 0.5481048226356506, + "step": 3636 + }, + { + "epoch": 1.2815859030837005, + "grad_norm": 1.6547388155087517, + "learning_rate": 6.270266333127266e-06, + "loss": 0.5412508249282837, + "step": 3637 + }, + { + "epoch": 1.2819383259911894, + "grad_norm": 1.8289845737684471, + "learning_rate": 6.264860991414709e-06, + "loss": 0.5055446624755859, + "step": 3638 + }, + { + "epoch": 1.2822907488986783, + "grad_norm": 1.9772143213144648, + "learning_rate": 6.259456917665605e-06, + "loss": 0.6073929071426392, + "step": 3639 + }, + { + "epoch": 1.2826431718061673, + "grad_norm": 1.6297327309789957, + "learning_rate": 6.254054113714467e-06, + "loss": 0.5277928113937378, + "step": 3640 + }, + { + "epoch": 1.2829955947136564, + "grad_norm": 1.7440990717646376, + "learning_rate": 6.248652581395378e-06, + "loss": 0.5106299519538879, + "step": 3641 + }, + { + "epoch": 1.2833480176211454, + "grad_norm": 1.612143250274434, + "learning_rate": 6.243252322541993e-06, + "loss": 0.485049843788147, + "step": 3642 + }, + { + "epoch": 1.2837004405286343, + "grad_norm": 2.0115453178937894, + "learning_rate": 6.237853338987532e-06, + "loss": 0.5899066925048828, + "step": 3643 + }, + { + "epoch": 1.2840528634361235, + "grad_norm": 1.6956228425038977, + "learning_rate": 6.2324556325647745e-06, + "loss": 0.5761981010437012, + "step": 3644 + }, + { + "epoch": 1.2844052863436124, + "grad_norm": 1.732932337254408, + "learning_rate": 6.227059205106085e-06, + "loss": 0.6288208961486816, + "step": 3645 + }, + { + "epoch": 1.2847577092511013, + "grad_norm": 1.7671756166643349, + "learning_rate": 6.2216640584433726e-06, + "loss": 0.6122645139694214, + "step": 3646 + }, + { + "epoch": 1.2851101321585903, + "grad_norm": 1.8312838317562172, + "learning_rate": 6.2162701944081295e-06, + "loss": 0.5838489532470703, + "step": 3647 + }, + { + "epoch": 1.2854625550660792, + "grad_norm": 1.5533740438356287, + "learning_rate": 6.2108776148314005e-06, + "loss": 0.6020689606666565, + "step": 3648 + }, + { + "epoch": 1.2858149779735684, + "grad_norm": 1.9453055966993607, + "learning_rate": 6.205486321543798e-06, + "loss": 0.5852698683738708, + "step": 3649 + }, + { + "epoch": 1.2861674008810573, + "grad_norm": 1.7649785944212673, + "learning_rate": 6.2000963163755015e-06, + "loss": 0.560903012752533, + "step": 3650 + }, + { + "epoch": 1.2865198237885462, + "grad_norm": 2.053972717306982, + "learning_rate": 6.194707601156249e-06, + "loss": 0.7750356197357178, + "step": 3651 + }, + { + "epoch": 1.2868722466960352, + "grad_norm": 1.7842589241914402, + "learning_rate": 6.189320177715338e-06, + "loss": 0.5503605604171753, + "step": 3652 + }, + { + "epoch": 1.2872246696035243, + "grad_norm": 1.8162609150425584, + "learning_rate": 6.183934047881636e-06, + "loss": 0.6910672187805176, + "step": 3653 + }, + { + "epoch": 1.2875770925110133, + "grad_norm": 1.6952370527492193, + "learning_rate": 6.1785492134835626e-06, + "loss": 0.7773069739341736, + "step": 3654 + }, + { + "epoch": 1.2879295154185022, + "grad_norm": 1.7765631560225321, + "learning_rate": 6.173165676349103e-06, + "loss": 0.6777454018592834, + "step": 3655 + }, + { + "epoch": 1.2882819383259911, + "grad_norm": 1.6097825614884171, + "learning_rate": 6.167783438305803e-06, + "loss": 0.6103118658065796, + "step": 3656 + }, + { + "epoch": 1.28863436123348, + "grad_norm": 2.4016366240266454, + "learning_rate": 6.1624025011807595e-06, + "loss": 0.593717634677887, + "step": 3657 + }, + { + "epoch": 1.2889867841409692, + "grad_norm": 1.700445284940488, + "learning_rate": 6.1570228668006395e-06, + "loss": 0.5822824835777283, + "step": 3658 + }, + { + "epoch": 1.2893392070484582, + "grad_norm": 1.7095957018221146, + "learning_rate": 6.151644536991656e-06, + "loss": 0.5180603861808777, + "step": 3659 + }, + { + "epoch": 1.289691629955947, + "grad_norm": 1.799926440179644, + "learning_rate": 6.14626751357959e-06, + "loss": 0.6283069849014282, + "step": 3660 + }, + { + "epoch": 1.290044052863436, + "grad_norm": 2.2706339647511613, + "learning_rate": 6.14089179838977e-06, + "loss": 0.7590633630752563, + "step": 3661 + }, + { + "epoch": 1.2903964757709252, + "grad_norm": 1.4238309589699358, + "learning_rate": 6.135517393247081e-06, + "loss": 0.6044079661369324, + "step": 3662 + }, + { + "epoch": 1.2907488986784141, + "grad_norm": 2.078820338247561, + "learning_rate": 6.130144299975973e-06, + "loss": 0.603421688079834, + "step": 3663 + }, + { + "epoch": 1.291101321585903, + "grad_norm": 1.9398452395479244, + "learning_rate": 6.1247725204004395e-06, + "loss": 0.577094554901123, + "step": 3664 + }, + { + "epoch": 1.291453744493392, + "grad_norm": 1.7780187513951604, + "learning_rate": 6.119402056344033e-06, + "loss": 0.5752004981040955, + "step": 3665 + }, + { + "epoch": 1.291806167400881, + "grad_norm": 1.6979532493457608, + "learning_rate": 6.114032909629863e-06, + "loss": 0.730962872505188, + "step": 3666 + }, + { + "epoch": 1.29215859030837, + "grad_norm": 2.0386068832784465, + "learning_rate": 6.108665082080578e-06, + "loss": 0.5361749529838562, + "step": 3667 + }, + { + "epoch": 1.292511013215859, + "grad_norm": 1.470729033877409, + "learning_rate": 6.103298575518401e-06, + "loss": 0.4841603636741638, + "step": 3668 + }, + { + "epoch": 1.292863436123348, + "grad_norm": 1.706501413292354, + "learning_rate": 6.097933391765087e-06, + "loss": 0.6614999771118164, + "step": 3669 + }, + { + "epoch": 1.2932158590308371, + "grad_norm": 1.6930402108862321, + "learning_rate": 6.092569532641947e-06, + "loss": 0.6088405847549438, + "step": 3670 + }, + { + "epoch": 1.293568281938326, + "grad_norm": 1.9173247230823398, + "learning_rate": 6.087206999969848e-06, + "loss": 0.601859986782074, + "step": 3671 + }, + { + "epoch": 1.293920704845815, + "grad_norm": 1.8019332247534052, + "learning_rate": 6.081845795569204e-06, + "loss": 0.5724194049835205, + "step": 3672 + }, + { + "epoch": 1.294273127753304, + "grad_norm": 1.7101141845528827, + "learning_rate": 6.07648592125997e-06, + "loss": 0.7899144887924194, + "step": 3673 + }, + { + "epoch": 1.2946255506607929, + "grad_norm": 1.8438581079047975, + "learning_rate": 6.071127378861667e-06, + "loss": 0.5778594017028809, + "step": 3674 + }, + { + "epoch": 1.2949779735682818, + "grad_norm": 1.6768623613769682, + "learning_rate": 6.065770170193342e-06, + "loss": 0.6357566118240356, + "step": 3675 + }, + { + "epoch": 1.295330396475771, + "grad_norm": 1.5951400768860937, + "learning_rate": 6.0604142970736115e-06, + "loss": 0.511436939239502, + "step": 3676 + }, + { + "epoch": 1.29568281938326, + "grad_norm": 1.883542435313207, + "learning_rate": 6.0550597613206205e-06, + "loss": 0.6469998955726624, + "step": 3677 + }, + { + "epoch": 1.2960352422907488, + "grad_norm": 1.5730405198836903, + "learning_rate": 6.049706564752069e-06, + "loss": 0.5724819898605347, + "step": 3678 + }, + { + "epoch": 1.296387665198238, + "grad_norm": 1.5360587172523898, + "learning_rate": 6.044354709185203e-06, + "loss": 0.6567148566246033, + "step": 3679 + }, + { + "epoch": 1.296740088105727, + "grad_norm": 1.8931575903206552, + "learning_rate": 6.039004196436807e-06, + "loss": 0.6694033145904541, + "step": 3680 + }, + { + "epoch": 1.2970925110132159, + "grad_norm": 1.8190573258877898, + "learning_rate": 6.033655028323215e-06, + "loss": 0.5147275924682617, + "step": 3681 + }, + { + "epoch": 1.2974449339207048, + "grad_norm": 2.0405860057138256, + "learning_rate": 6.0283072066603075e-06, + "loss": 0.5881609320640564, + "step": 3682 + }, + { + "epoch": 1.2977973568281937, + "grad_norm": 1.7248898652229567, + "learning_rate": 6.022960733263493e-06, + "loss": 0.625927209854126, + "step": 3683 + }, + { + "epoch": 1.2981497797356827, + "grad_norm": 1.8738096752650604, + "learning_rate": 6.017615609947747e-06, + "loss": 0.693459153175354, + "step": 3684 + }, + { + "epoch": 1.2985022026431718, + "grad_norm": 1.6745028766810846, + "learning_rate": 6.0122718385275615e-06, + "loss": 0.5185744762420654, + "step": 3685 + }, + { + "epoch": 1.2988546255506608, + "grad_norm": 1.7625922291600025, + "learning_rate": 6.006929420816982e-06, + "loss": 0.5153995752334595, + "step": 3686 + }, + { + "epoch": 1.2992070484581497, + "grad_norm": 1.9617946738772851, + "learning_rate": 6.001588358629598e-06, + "loss": 0.5844067931175232, + "step": 3687 + }, + { + "epoch": 1.2995594713656389, + "grad_norm": 1.7999387557140187, + "learning_rate": 5.996248653778529e-06, + "loss": 0.6021767854690552, + "step": 3688 + }, + { + "epoch": 1.2999118942731278, + "grad_norm": 1.650868828635221, + "learning_rate": 5.990910308076443e-06, + "loss": 0.573150098323822, + "step": 3689 + }, + { + "epoch": 1.3002643171806167, + "grad_norm": 1.8809065032795727, + "learning_rate": 5.985573323335541e-06, + "loss": 0.5125507116317749, + "step": 3690 + }, + { + "epoch": 1.3006167400881057, + "grad_norm": 1.5884199689542184, + "learning_rate": 5.980237701367556e-06, + "loss": 0.541732668876648, + "step": 3691 + }, + { + "epoch": 1.3009691629955946, + "grad_norm": 2.0151748973563577, + "learning_rate": 5.974903443983778e-06, + "loss": 0.66359543800354, + "step": 3692 + }, + { + "epoch": 1.3013215859030838, + "grad_norm": 1.8831727632454829, + "learning_rate": 5.969570552995014e-06, + "loss": 0.6986300349235535, + "step": 3693 + }, + { + "epoch": 1.3016740088105727, + "grad_norm": 2.0800644206104195, + "learning_rate": 5.9642390302116125e-06, + "loss": 0.6829022169113159, + "step": 3694 + }, + { + "epoch": 1.3020264317180616, + "grad_norm": 1.9073088749861613, + "learning_rate": 5.9589088774434655e-06, + "loss": 0.5710464715957642, + "step": 3695 + }, + { + "epoch": 1.3023788546255506, + "grad_norm": 1.8154393300824316, + "learning_rate": 5.953580096499989e-06, + "loss": 0.5604938268661499, + "step": 3696 + }, + { + "epoch": 1.3027312775330397, + "grad_norm": 1.755426899711885, + "learning_rate": 5.948252689190141e-06, + "loss": 0.678723931312561, + "step": 3697 + }, + { + "epoch": 1.3030837004405287, + "grad_norm": 1.8845664461665383, + "learning_rate": 5.9429266573224145e-06, + "loss": 0.6652591228485107, + "step": 3698 + }, + { + "epoch": 1.3034361233480176, + "grad_norm": 1.8800654237619134, + "learning_rate": 5.937602002704819e-06, + "loss": 0.6141147017478943, + "step": 3699 + }, + { + "epoch": 1.3037885462555066, + "grad_norm": 1.937561336880738, + "learning_rate": 5.932278727144924e-06, + "loss": 0.5260860919952393, + "step": 3700 + }, + { + "epoch": 1.3041409691629955, + "grad_norm": 1.6945627397292862, + "learning_rate": 5.926956832449806e-06, + "loss": 0.464357852935791, + "step": 3701 + }, + { + "epoch": 1.3044933920704846, + "grad_norm": 1.8301641414278105, + "learning_rate": 5.921636320426085e-06, + "loss": 0.6513686180114746, + "step": 3702 + }, + { + "epoch": 1.3048458149779736, + "grad_norm": 1.7297134138158161, + "learning_rate": 5.91631719287991e-06, + "loss": 0.44547855854034424, + "step": 3703 + }, + { + "epoch": 1.3051982378854625, + "grad_norm": 1.8572950621020996, + "learning_rate": 5.910999451616959e-06, + "loss": 0.714026153087616, + "step": 3704 + }, + { + "epoch": 1.3055506607929517, + "grad_norm": 1.5164059156260825, + "learning_rate": 5.90568309844244e-06, + "loss": 0.48294252157211304, + "step": 3705 + }, + { + "epoch": 1.3059030837004406, + "grad_norm": 2.0148835282111275, + "learning_rate": 5.900368135161093e-06, + "loss": 0.587759256362915, + "step": 3706 + }, + { + "epoch": 1.3062555066079296, + "grad_norm": 1.7833437474608147, + "learning_rate": 5.895054563577172e-06, + "loss": 0.6251810789108276, + "step": 3707 + }, + { + "epoch": 1.3066079295154185, + "grad_norm": 1.98023378159902, + "learning_rate": 5.889742385494481e-06, + "loss": 0.6488438844680786, + "step": 3708 + }, + { + "epoch": 1.3069603524229074, + "grad_norm": 2.3062951128393325, + "learning_rate": 5.8844316027163315e-06, + "loss": 0.6682882308959961, + "step": 3709 + }, + { + "epoch": 1.3073127753303964, + "grad_norm": 1.9459894886811675, + "learning_rate": 5.879122217045573e-06, + "loss": 0.6537875533103943, + "step": 3710 + }, + { + "epoch": 1.3076651982378855, + "grad_norm": 1.994395753049965, + "learning_rate": 5.873814230284576e-06, + "loss": 0.6813541650772095, + "step": 3711 + }, + { + "epoch": 1.3080176211453745, + "grad_norm": 2.002875607232805, + "learning_rate": 5.868507644235233e-06, + "loss": 0.6962395906448364, + "step": 3712 + }, + { + "epoch": 1.3083700440528634, + "grad_norm": 1.8811127927416966, + "learning_rate": 5.863202460698972e-06, + "loss": 0.6872841119766235, + "step": 3713 + }, + { + "epoch": 1.3087224669603525, + "grad_norm": 2.007681646131619, + "learning_rate": 5.857898681476732e-06, + "loss": 0.7200508117675781, + "step": 3714 + }, + { + "epoch": 1.3090748898678415, + "grad_norm": 1.7850989505478374, + "learning_rate": 5.852596308368982e-06, + "loss": 0.6100003719329834, + "step": 3715 + }, + { + "epoch": 1.3094273127753304, + "grad_norm": 1.962305695853223, + "learning_rate": 5.847295343175714e-06, + "loss": 0.7347345352172852, + "step": 3716 + }, + { + "epoch": 1.3097797356828194, + "grad_norm": 1.8094012131106647, + "learning_rate": 5.841995787696438e-06, + "loss": 0.6955733895301819, + "step": 3717 + }, + { + "epoch": 1.3101321585903083, + "grad_norm": 1.6497459626323396, + "learning_rate": 5.836697643730193e-06, + "loss": 0.5266987085342407, + "step": 3718 + }, + { + "epoch": 1.3104845814977972, + "grad_norm": 1.7072540878561502, + "learning_rate": 5.83140091307553e-06, + "loss": 0.5978814363479614, + "step": 3719 + }, + { + "epoch": 1.3108370044052864, + "grad_norm": 1.9008641546548906, + "learning_rate": 5.826105597530526e-06, + "loss": 0.608231782913208, + "step": 3720 + }, + { + "epoch": 1.3111894273127753, + "grad_norm": 1.660571967924875, + "learning_rate": 5.820811698892775e-06, + "loss": 0.5834963321685791, + "step": 3721 + }, + { + "epoch": 1.3115418502202643, + "grad_norm": 1.7715871926900555, + "learning_rate": 5.8155192189593915e-06, + "loss": 0.6675208806991577, + "step": 3722 + }, + { + "epoch": 1.3118942731277534, + "grad_norm": 2.0125396897962156, + "learning_rate": 5.810228159527003e-06, + "loss": 0.655093789100647, + "step": 3723 + }, + { + "epoch": 1.3122466960352424, + "grad_norm": 1.832975656309839, + "learning_rate": 5.804938522391768e-06, + "loss": 0.5658842921257019, + "step": 3724 + }, + { + "epoch": 1.3125991189427313, + "grad_norm": 1.7484570770381627, + "learning_rate": 5.799650309349348e-06, + "loss": 0.4502618610858917, + "step": 3725 + }, + { + "epoch": 1.3129515418502202, + "grad_norm": 1.6150871905896036, + "learning_rate": 5.79436352219493e-06, + "loss": 0.6165845394134521, + "step": 3726 + }, + { + "epoch": 1.3133039647577092, + "grad_norm": 1.6734001609648903, + "learning_rate": 5.7890781627232115e-06, + "loss": 0.6315968036651611, + "step": 3727 + }, + { + "epoch": 1.313656387665198, + "grad_norm": 1.5048326218576167, + "learning_rate": 5.783794232728408e-06, + "loss": 0.58831787109375, + "step": 3728 + }, + { + "epoch": 1.3140088105726873, + "grad_norm": 1.7597864288310854, + "learning_rate": 5.778511734004248e-06, + "loss": 0.5056396722793579, + "step": 3729 + }, + { + "epoch": 1.3143612334801762, + "grad_norm": 2.3417954571274753, + "learning_rate": 5.773230668343978e-06, + "loss": 0.5469251871109009, + "step": 3730 + }, + { + "epoch": 1.3147136563876651, + "grad_norm": 1.768855633328091, + "learning_rate": 5.76795103754035e-06, + "loss": 0.7011934518814087, + "step": 3731 + }, + { + "epoch": 1.3150660792951543, + "grad_norm": 1.574817644372446, + "learning_rate": 5.762672843385643e-06, + "loss": 0.7080543041229248, + "step": 3732 + }, + { + "epoch": 1.3154185022026432, + "grad_norm": 1.7812689751161113, + "learning_rate": 5.757396087671634e-06, + "loss": 0.5180330276489258, + "step": 3733 + }, + { + "epoch": 1.3157709251101322, + "grad_norm": 1.6465709022018649, + "learning_rate": 5.75212077218962e-06, + "loss": 0.5282220840454102, + "step": 3734 + }, + { + "epoch": 1.316123348017621, + "grad_norm": 1.9100789844293367, + "learning_rate": 5.746846898730403e-06, + "loss": 0.7174440026283264, + "step": 3735 + }, + { + "epoch": 1.31647577092511, + "grad_norm": 1.7156784573652895, + "learning_rate": 5.7415744690843025e-06, + "loss": 0.537194013595581, + "step": 3736 + }, + { + "epoch": 1.3168281938325992, + "grad_norm": 1.714186482517803, + "learning_rate": 5.7363034850411415e-06, + "loss": 0.7514588832855225, + "step": 3737 + }, + { + "epoch": 1.3171806167400881, + "grad_norm": 1.6138774970176952, + "learning_rate": 5.731033948390252e-06, + "loss": 0.601151704788208, + "step": 3738 + }, + { + "epoch": 1.317533039647577, + "grad_norm": 1.9652638368208295, + "learning_rate": 5.7257658609204865e-06, + "loss": 0.6046192646026611, + "step": 3739 + }, + { + "epoch": 1.317885462555066, + "grad_norm": 1.9909773544544114, + "learning_rate": 5.720499224420196e-06, + "loss": 0.5003835558891296, + "step": 3740 + }, + { + "epoch": 1.3182378854625552, + "grad_norm": 2.7143275056165237, + "learning_rate": 5.715234040677229e-06, + "loss": 0.6251966953277588, + "step": 3741 + }, + { + "epoch": 1.318590308370044, + "grad_norm": 1.9483642954012013, + "learning_rate": 5.709970311478961e-06, + "loss": 0.6681240797042847, + "step": 3742 + }, + { + "epoch": 1.318942731277533, + "grad_norm": 1.6278748497204938, + "learning_rate": 5.704708038612261e-06, + "loss": 0.582561194896698, + "step": 3743 + }, + { + "epoch": 1.319295154185022, + "grad_norm": 1.8550137845260724, + "learning_rate": 5.699447223863508e-06, + "loss": 0.5616302490234375, + "step": 3744 + }, + { + "epoch": 1.319647577092511, + "grad_norm": 1.7452561285826282, + "learning_rate": 5.6941878690185835e-06, + "loss": 0.6131408214569092, + "step": 3745 + }, + { + "epoch": 1.32, + "grad_norm": 1.8334584062109562, + "learning_rate": 5.688929975862873e-06, + "loss": 0.5772547721862793, + "step": 3746 + }, + { + "epoch": 1.320352422907489, + "grad_norm": 1.7519534139582256, + "learning_rate": 5.683673546181274e-06, + "loss": 0.5927203893661499, + "step": 3747 + }, + { + "epoch": 1.320704845814978, + "grad_norm": 1.9849489030223588, + "learning_rate": 5.67841858175818e-06, + "loss": 0.6001334190368652, + "step": 3748 + }, + { + "epoch": 1.321057268722467, + "grad_norm": 1.584893703676267, + "learning_rate": 5.673165084377479e-06, + "loss": 0.4598100781440735, + "step": 3749 + }, + { + "epoch": 1.321409691629956, + "grad_norm": 1.9316178856088813, + "learning_rate": 5.667913055822578e-06, + "loss": 0.6455222368240356, + "step": 3750 + }, + { + "epoch": 1.321762114537445, + "grad_norm": 1.9234057001448424, + "learning_rate": 5.662662497876375e-06, + "loss": 0.6327164173126221, + "step": 3751 + }, + { + "epoch": 1.322114537444934, + "grad_norm": 1.7096288638222439, + "learning_rate": 5.657413412321271e-06, + "loss": 0.6699539422988892, + "step": 3752 + }, + { + "epoch": 1.3224669603524228, + "grad_norm": 2.0694083676949107, + "learning_rate": 5.6521658009391676e-06, + "loss": 0.7507830858230591, + "step": 3753 + }, + { + "epoch": 1.3228193832599118, + "grad_norm": 1.7615687866950613, + "learning_rate": 5.646919665511461e-06, + "loss": 0.5164662003517151, + "step": 3754 + }, + { + "epoch": 1.323171806167401, + "grad_norm": 2.267697288539615, + "learning_rate": 5.641675007819058e-06, + "loss": 0.7059702277183533, + "step": 3755 + }, + { + "epoch": 1.3235242290748899, + "grad_norm": 2.1165471311290243, + "learning_rate": 5.636431829642359e-06, + "loss": 0.6535515189170837, + "step": 3756 + }, + { + "epoch": 1.3238766519823788, + "grad_norm": 1.782117402624855, + "learning_rate": 5.631190132761247e-06, + "loss": 0.5912176370620728, + "step": 3757 + }, + { + "epoch": 1.324229074889868, + "grad_norm": 1.6111457739999588, + "learning_rate": 5.625949918955126e-06, + "loss": 0.6527940034866333, + "step": 3758 + }, + { + "epoch": 1.324581497797357, + "grad_norm": 1.9751426120017839, + "learning_rate": 5.620711190002879e-06, + "loss": 0.7236875295639038, + "step": 3759 + }, + { + "epoch": 1.3249339207048458, + "grad_norm": 2.042390900324052, + "learning_rate": 5.6154739476829e-06, + "loss": 0.6823146343231201, + "step": 3760 + }, + { + "epoch": 1.3252863436123348, + "grad_norm": 2.058457581887865, + "learning_rate": 5.610238193773061e-06, + "loss": 0.5795537233352661, + "step": 3761 + }, + { + "epoch": 1.3256387665198237, + "grad_norm": 1.90461931046175, + "learning_rate": 5.605003930050738e-06, + "loss": 0.5530939102172852, + "step": 3762 + }, + { + "epoch": 1.3259911894273126, + "grad_norm": 1.6978922894801083, + "learning_rate": 5.599771158292806e-06, + "loss": 0.5362278819084167, + "step": 3763 + }, + { + "epoch": 1.3263436123348018, + "grad_norm": 1.9521190182519916, + "learning_rate": 5.5945398802756315e-06, + "loss": 0.6136768460273743, + "step": 3764 + }, + { + "epoch": 1.3266960352422907, + "grad_norm": 1.7782753118174626, + "learning_rate": 5.589310097775055e-06, + "loss": 0.5979033708572388, + "step": 3765 + }, + { + "epoch": 1.3270484581497797, + "grad_norm": 1.810593191069574, + "learning_rate": 5.584081812566439e-06, + "loss": 0.6750006675720215, + "step": 3766 + }, + { + "epoch": 1.3274008810572688, + "grad_norm": 1.6815578779160076, + "learning_rate": 5.578855026424619e-06, + "loss": 0.6004951000213623, + "step": 3767 + }, + { + "epoch": 1.3277533039647578, + "grad_norm": 1.522422246822047, + "learning_rate": 5.573629741123926e-06, + "loss": 0.570702075958252, + "step": 3768 + }, + { + "epoch": 1.3281057268722467, + "grad_norm": 1.5435622334320813, + "learning_rate": 5.5684059584381826e-06, + "loss": 0.506945788860321, + "step": 3769 + }, + { + "epoch": 1.3284581497797356, + "grad_norm": 1.647967795112189, + "learning_rate": 5.563183680140696e-06, + "loss": 0.5935436487197876, + "step": 3770 + }, + { + "epoch": 1.3288105726872246, + "grad_norm": 2.7715355389110043, + "learning_rate": 5.5579629080042755e-06, + "loss": 0.641446590423584, + "step": 3771 + }, + { + "epoch": 1.3291629955947137, + "grad_norm": 1.7489195207611605, + "learning_rate": 5.552743643801209e-06, + "loss": 0.5816437005996704, + "step": 3772 + }, + { + "epoch": 1.3295154185022027, + "grad_norm": 1.7699530777692443, + "learning_rate": 5.547525889303265e-06, + "loss": 0.666487991809845, + "step": 3773 + }, + { + "epoch": 1.3298678414096916, + "grad_norm": 2.100750588167558, + "learning_rate": 5.542309646281718e-06, + "loss": 0.7961397767066956, + "step": 3774 + }, + { + "epoch": 1.3302202643171805, + "grad_norm": 1.5292695888779975, + "learning_rate": 5.53709491650732e-06, + "loss": 0.4736033082008362, + "step": 3775 + }, + { + "epoch": 1.3305726872246697, + "grad_norm": 1.8004482810288622, + "learning_rate": 5.531881701750304e-06, + "loss": 0.542208194732666, + "step": 3776 + }, + { + "epoch": 1.3309251101321586, + "grad_norm": 1.8151751535940353, + "learning_rate": 5.526670003780399e-06, + "loss": 0.6306429505348206, + "step": 3777 + }, + { + "epoch": 1.3312775330396476, + "grad_norm": 1.7520809852323194, + "learning_rate": 5.521459824366808e-06, + "loss": 0.531991720199585, + "step": 3778 + }, + { + "epoch": 1.3316299559471365, + "grad_norm": 1.9852873895231067, + "learning_rate": 5.516251165278235e-06, + "loss": 0.688262939453125, + "step": 3779 + }, + { + "epoch": 1.3319823788546254, + "grad_norm": 2.0026356133489416, + "learning_rate": 5.511044028282853e-06, + "loss": 0.7555293440818787, + "step": 3780 + }, + { + "epoch": 1.3323348017621146, + "grad_norm": 1.9387490035628434, + "learning_rate": 5.505838415148317e-06, + "loss": 0.7518796324729919, + "step": 3781 + }, + { + "epoch": 1.3326872246696035, + "grad_norm": 1.859399241253671, + "learning_rate": 5.500634327641777e-06, + "loss": 0.5161253809928894, + "step": 3782 + }, + { + "epoch": 1.3330396475770925, + "grad_norm": 1.5897606830745852, + "learning_rate": 5.4954317675298586e-06, + "loss": 0.5617681741714478, + "step": 3783 + }, + { + "epoch": 1.3333920704845814, + "grad_norm": 1.6894758792140483, + "learning_rate": 5.4902307365786676e-06, + "loss": 0.5707885026931763, + "step": 3784 + }, + { + "epoch": 1.3337444933920706, + "grad_norm": 1.9016603426520955, + "learning_rate": 5.485031236553792e-06, + "loss": 0.5842025876045227, + "step": 3785 + }, + { + "epoch": 1.3340969162995595, + "grad_norm": 2.278549510271659, + "learning_rate": 5.479833269220296e-06, + "loss": 0.7103949785232544, + "step": 3786 + }, + { + "epoch": 1.3344493392070484, + "grad_norm": 1.8432428404869632, + "learning_rate": 5.474636836342737e-06, + "loss": 0.7704740762710571, + "step": 3787 + }, + { + "epoch": 1.3348017621145374, + "grad_norm": 1.808727631247744, + "learning_rate": 5.469441939685137e-06, + "loss": 0.6402652263641357, + "step": 3788 + }, + { + "epoch": 1.3351541850220263, + "grad_norm": 1.892219877227891, + "learning_rate": 5.464248581011002e-06, + "loss": 0.8214348554611206, + "step": 3789 + }, + { + "epoch": 1.3355066079295155, + "grad_norm": 1.9758909531924576, + "learning_rate": 5.459056762083318e-06, + "loss": 0.6372429132461548, + "step": 3790 + }, + { + "epoch": 1.3358590308370044, + "grad_norm": 1.849044346394621, + "learning_rate": 5.453866484664543e-06, + "loss": 0.5418422222137451, + "step": 3791 + }, + { + "epoch": 1.3362114537444933, + "grad_norm": 1.7395663492002502, + "learning_rate": 5.448677750516613e-06, + "loss": 0.6574567556381226, + "step": 3792 + }, + { + "epoch": 1.3365638766519825, + "grad_norm": 1.9976311809706857, + "learning_rate": 5.443490561400948e-06, + "loss": 0.5174030661582947, + "step": 3793 + }, + { + "epoch": 1.3369162995594714, + "grad_norm": 1.5627335899600845, + "learning_rate": 5.4383049190784275e-06, + "loss": 0.595477819442749, + "step": 3794 + }, + { + "epoch": 1.3372687224669604, + "grad_norm": 1.845680624563864, + "learning_rate": 5.4331208253094255e-06, + "loss": 0.6177364587783813, + "step": 3795 + }, + { + "epoch": 1.3376211453744493, + "grad_norm": 1.6348460055259042, + "learning_rate": 5.4279382818537774e-06, + "loss": 0.6106897592544556, + "step": 3796 + }, + { + "epoch": 1.3379735682819383, + "grad_norm": 1.8500671496295353, + "learning_rate": 5.422757290470795e-06, + "loss": 0.46700483560562134, + "step": 3797 + }, + { + "epoch": 1.3383259911894272, + "grad_norm": 1.952200717602712, + "learning_rate": 5.417577852919262e-06, + "loss": 0.5408231019973755, + "step": 3798 + }, + { + "epoch": 1.3386784140969163, + "grad_norm": 1.8733329229880296, + "learning_rate": 5.412399970957439e-06, + "loss": 0.6430809497833252, + "step": 3799 + }, + { + "epoch": 1.3390308370044053, + "grad_norm": 1.9515663922431925, + "learning_rate": 5.4072236463430535e-06, + "loss": 0.6817858219146729, + "step": 3800 + }, + { + "epoch": 1.3393832599118942, + "grad_norm": 1.7386331074635664, + "learning_rate": 5.402048880833308e-06, + "loss": 0.5492604970932007, + "step": 3801 + }, + { + "epoch": 1.3397356828193834, + "grad_norm": 1.9883458715986422, + "learning_rate": 5.39687567618487e-06, + "loss": 0.6148543357849121, + "step": 3802 + }, + { + "epoch": 1.3400881057268723, + "grad_norm": 1.7245960691315507, + "learning_rate": 5.391704034153894e-06, + "loss": 0.5921820402145386, + "step": 3803 + }, + { + "epoch": 1.3404405286343613, + "grad_norm": 1.8759210914719033, + "learning_rate": 5.386533956495974e-06, + "loss": 0.49728113412857056, + "step": 3804 + }, + { + "epoch": 1.3407929515418502, + "grad_norm": 1.7899218455267007, + "learning_rate": 5.381365444966205e-06, + "loss": 0.5944808125495911, + "step": 3805 + }, + { + "epoch": 1.3411453744493391, + "grad_norm": 1.6022996204023598, + "learning_rate": 5.376198501319128e-06, + "loss": 0.5197580456733704, + "step": 3806 + }, + { + "epoch": 1.341497797356828, + "grad_norm": 1.5953524266203611, + "learning_rate": 5.3710331273087625e-06, + "loss": 0.6229256391525269, + "step": 3807 + }, + { + "epoch": 1.3418502202643172, + "grad_norm": 2.0736813734241073, + "learning_rate": 5.365869324688591e-06, + "loss": 0.5305753946304321, + "step": 3808 + }, + { + "epoch": 1.3422026431718062, + "grad_norm": 1.4520191291543518, + "learning_rate": 5.360707095211566e-06, + "loss": 0.4002259373664856, + "step": 3809 + }, + { + "epoch": 1.342555066079295, + "grad_norm": 1.8821320745162777, + "learning_rate": 5.3555464406300965e-06, + "loss": 0.5211426615715027, + "step": 3810 + }, + { + "epoch": 1.3429074889867842, + "grad_norm": 1.7112007743194535, + "learning_rate": 5.350387362696077e-06, + "loss": 0.5998013019561768, + "step": 3811 + }, + { + "epoch": 1.3432599118942732, + "grad_norm": 1.6128635046491597, + "learning_rate": 5.345229863160839e-06, + "loss": 0.5330953598022461, + "step": 3812 + }, + { + "epoch": 1.3436123348017621, + "grad_norm": 1.6570398271033384, + "learning_rate": 5.340073943775206e-06, + "loss": 0.6999118328094482, + "step": 3813 + }, + { + "epoch": 1.343964757709251, + "grad_norm": 2.060346240780723, + "learning_rate": 5.334919606289446e-06, + "loss": 0.6286367177963257, + "step": 3814 + }, + { + "epoch": 1.34431718061674, + "grad_norm": 1.4130805934733843, + "learning_rate": 5.329766852453296e-06, + "loss": 0.5793008804321289, + "step": 3815 + }, + { + "epoch": 1.3446696035242292, + "grad_norm": 1.7815340287164039, + "learning_rate": 5.324615684015957e-06, + "loss": 0.5811383128166199, + "step": 3816 + }, + { + "epoch": 1.345022026431718, + "grad_norm": 1.8888368809882845, + "learning_rate": 5.319466102726087e-06, + "loss": 0.7389675378799438, + "step": 3817 + }, + { + "epoch": 1.345374449339207, + "grad_norm": 1.9482215135863048, + "learning_rate": 5.314318110331815e-06, + "loss": 0.6105868220329285, + "step": 3818 + }, + { + "epoch": 1.345726872246696, + "grad_norm": 1.648111237588601, + "learning_rate": 5.3091717085807235e-06, + "loss": 0.5979465842247009, + "step": 3819 + }, + { + "epoch": 1.3460792951541851, + "grad_norm": 2.100772248921902, + "learning_rate": 5.304026899219846e-06, + "loss": 0.6722681522369385, + "step": 3820 + }, + { + "epoch": 1.346431718061674, + "grad_norm": 1.5469717835195365, + "learning_rate": 5.298883683995697e-06, + "loss": 0.4687497913837433, + "step": 3821 + }, + { + "epoch": 1.346784140969163, + "grad_norm": 1.6982574361909266, + "learning_rate": 5.29374206465423e-06, + "loss": 0.563692569732666, + "step": 3822 + }, + { + "epoch": 1.347136563876652, + "grad_norm": 1.7298606992172854, + "learning_rate": 5.2886020429408716e-06, + "loss": 0.604897141456604, + "step": 3823 + }, + { + "epoch": 1.3474889867841409, + "grad_norm": 2.111770720101543, + "learning_rate": 5.283463620600493e-06, + "loss": 0.6270164251327515, + "step": 3824 + }, + { + "epoch": 1.34784140969163, + "grad_norm": 2.1238324371472954, + "learning_rate": 5.278326799377428e-06, + "loss": 0.6487830877304077, + "step": 3825 + }, + { + "epoch": 1.348193832599119, + "grad_norm": 1.58718768900561, + "learning_rate": 5.273191581015474e-06, + "loss": 0.5816935896873474, + "step": 3826 + }, + { + "epoch": 1.348546255506608, + "grad_norm": 1.72099904065486, + "learning_rate": 5.26805796725788e-06, + "loss": 0.6281115412712097, + "step": 3827 + }, + { + "epoch": 1.348898678414097, + "grad_norm": 2.0975447662151288, + "learning_rate": 5.2629259598473335e-06, + "loss": 0.5031973123550415, + "step": 3828 + }, + { + "epoch": 1.349251101321586, + "grad_norm": 1.6391975654545219, + "learning_rate": 5.257795560526005e-06, + "loss": 0.6220165491104126, + "step": 3829 + }, + { + "epoch": 1.349603524229075, + "grad_norm": 1.8177506583957952, + "learning_rate": 5.2526667710354995e-06, + "loss": 0.6451058387756348, + "step": 3830 + }, + { + "epoch": 1.3499559471365639, + "grad_norm": 2.000132155225934, + "learning_rate": 5.247539593116884e-06, + "loss": 0.7524863481521606, + "step": 3831 + }, + { + "epoch": 1.3503083700440528, + "grad_norm": 1.7855711080776688, + "learning_rate": 5.242414028510674e-06, + "loss": 0.6270921230316162, + "step": 3832 + }, + { + "epoch": 1.3506607929515417, + "grad_norm": 1.8779302666662292, + "learning_rate": 5.237290078956836e-06, + "loss": 0.6196550130844116, + "step": 3833 + }, + { + "epoch": 1.351013215859031, + "grad_norm": 1.932517845360487, + "learning_rate": 5.232167746194798e-06, + "loss": 0.8512230515480042, + "step": 3834 + }, + { + "epoch": 1.3513656387665198, + "grad_norm": 1.672868645098828, + "learning_rate": 5.227047031963435e-06, + "loss": 0.5196807980537415, + "step": 3835 + }, + { + "epoch": 1.3517180616740088, + "grad_norm": 1.890472281368116, + "learning_rate": 5.2219279380010565e-06, + "loss": 0.6713111400604248, + "step": 3836 + }, + { + "epoch": 1.352070484581498, + "grad_norm": 1.8891048300322977, + "learning_rate": 5.216810466045448e-06, + "loss": 0.7150874137878418, + "step": 3837 + }, + { + "epoch": 1.3524229074889869, + "grad_norm": 1.9379344809365882, + "learning_rate": 5.211694617833827e-06, + "loss": 0.5812375545501709, + "step": 3838 + }, + { + "epoch": 1.3527753303964758, + "grad_norm": 1.6232111313971074, + "learning_rate": 5.2065803951028675e-06, + "loss": 0.5842182040214539, + "step": 3839 + }, + { + "epoch": 1.3531277533039647, + "grad_norm": 1.7655789614212678, + "learning_rate": 5.201467799588685e-06, + "loss": 0.5432665348052979, + "step": 3840 + }, + { + "epoch": 1.3534801762114537, + "grad_norm": 1.610757257105171, + "learning_rate": 5.196356833026845e-06, + "loss": 0.551771879196167, + "step": 3841 + }, + { + "epoch": 1.3538325991189426, + "grad_norm": 2.0105503681662076, + "learning_rate": 5.19124749715237e-06, + "loss": 0.6961710453033447, + "step": 3842 + }, + { + "epoch": 1.3541850220264318, + "grad_norm": 1.9510922019810755, + "learning_rate": 5.18613979369972e-06, + "loss": 0.7105714678764343, + "step": 3843 + }, + { + "epoch": 1.3545374449339207, + "grad_norm": 1.9369232024679732, + "learning_rate": 5.181033724402789e-06, + "loss": 0.7100229263305664, + "step": 3844 + }, + { + "epoch": 1.3548898678414096, + "grad_norm": 1.6852711649451124, + "learning_rate": 5.175929290994941e-06, + "loss": 0.651812732219696, + "step": 3845 + }, + { + "epoch": 1.3552422907488988, + "grad_norm": 2.308449923325572, + "learning_rate": 5.170826495208967e-06, + "loss": 0.5194147825241089, + "step": 3846 + }, + { + "epoch": 1.3555947136563877, + "grad_norm": 1.6095794520986102, + "learning_rate": 5.16572533877711e-06, + "loss": 0.5939956307411194, + "step": 3847 + }, + { + "epoch": 1.3559471365638767, + "grad_norm": 1.7731843322868706, + "learning_rate": 5.160625823431051e-06, + "loss": 0.6434104442596436, + "step": 3848 + }, + { + "epoch": 1.3562995594713656, + "grad_norm": 1.9584483919337772, + "learning_rate": 5.155527950901914e-06, + "loss": 0.5256108045578003, + "step": 3849 + }, + { + "epoch": 1.3566519823788545, + "grad_norm": 1.5746637659323357, + "learning_rate": 5.150431722920277e-06, + "loss": 0.5632717609405518, + "step": 3850 + }, + { + "epoch": 1.3570044052863435, + "grad_norm": 1.8450205582439452, + "learning_rate": 5.145337141216149e-06, + "loss": 0.5964382886886597, + "step": 3851 + }, + { + "epoch": 1.3573568281938326, + "grad_norm": 1.9383063853676261, + "learning_rate": 5.140244207518971e-06, + "loss": 0.7268366813659668, + "step": 3852 + }, + { + "epoch": 1.3577092511013216, + "grad_norm": 2.357958765027834, + "learning_rate": 5.135152923557647e-06, + "loss": 0.7376477122306824, + "step": 3853 + }, + { + "epoch": 1.3580616740088105, + "grad_norm": 1.9573550951394243, + "learning_rate": 5.130063291060505e-06, + "loss": 0.50569748878479, + "step": 3854 + }, + { + "epoch": 1.3584140969162997, + "grad_norm": 1.684535591269265, + "learning_rate": 5.12497531175532e-06, + "loss": 0.5639374256134033, + "step": 3855 + }, + { + "epoch": 1.3587665198237886, + "grad_norm": 2.0009335012534146, + "learning_rate": 5.1198889873692994e-06, + "loss": 0.5051915645599365, + "step": 3856 + }, + { + "epoch": 1.3591189427312775, + "grad_norm": 1.979939818228197, + "learning_rate": 5.114804319629088e-06, + "loss": 0.4718795120716095, + "step": 3857 + }, + { + "epoch": 1.3594713656387665, + "grad_norm": 1.7040447839749338, + "learning_rate": 5.109721310260781e-06, + "loss": 0.5684067606925964, + "step": 3858 + }, + { + "epoch": 1.3598237885462554, + "grad_norm": 1.687205926430453, + "learning_rate": 5.104639960989903e-06, + "loss": 0.5757609605789185, + "step": 3859 + }, + { + "epoch": 1.3601762114537446, + "grad_norm": 1.637859976815221, + "learning_rate": 5.099560273541401e-06, + "loss": 0.5971167087554932, + "step": 3860 + }, + { + "epoch": 1.3605286343612335, + "grad_norm": 1.9766573766085018, + "learning_rate": 5.094482249639683e-06, + "loss": 0.6959896683692932, + "step": 3861 + }, + { + "epoch": 1.3608810572687224, + "grad_norm": 1.8397057454745067, + "learning_rate": 5.089405891008574e-06, + "loss": 0.6954548358917236, + "step": 3862 + }, + { + "epoch": 1.3612334801762114, + "grad_norm": 1.5747472561310782, + "learning_rate": 5.084331199371343e-06, + "loss": 0.5659986138343811, + "step": 3863 + }, + { + "epoch": 1.3615859030837005, + "grad_norm": 1.9340659365358734, + "learning_rate": 5.079258176450687e-06, + "loss": 0.5582559108734131, + "step": 3864 + }, + { + "epoch": 1.3619383259911895, + "grad_norm": 1.5684621947501252, + "learning_rate": 5.0741868239687395e-06, + "loss": 0.5337075591087341, + "step": 3865 + }, + { + "epoch": 1.3622907488986784, + "grad_norm": 1.8617666338346237, + "learning_rate": 5.069117143647075e-06, + "loss": 0.621441125869751, + "step": 3866 + }, + { + "epoch": 1.3626431718061673, + "grad_norm": 1.7285404952370873, + "learning_rate": 5.064049137206677e-06, + "loss": 0.5476670861244202, + "step": 3867 + }, + { + "epoch": 1.3629955947136563, + "grad_norm": 1.9444577342582248, + "learning_rate": 5.058982806367989e-06, + "loss": 0.5357356071472168, + "step": 3868 + }, + { + "epoch": 1.3633480176211454, + "grad_norm": 2.032867685216442, + "learning_rate": 5.053918152850868e-06, + "loss": 0.5722761750221252, + "step": 3869 + }, + { + "epoch": 1.3637004405286344, + "grad_norm": 1.8019521015311857, + "learning_rate": 5.048855178374606e-06, + "loss": 0.7271207571029663, + "step": 3870 + }, + { + "epoch": 1.3640528634361233, + "grad_norm": 2.149716528128109, + "learning_rate": 5.043793884657926e-06, + "loss": 0.6213557720184326, + "step": 3871 + }, + { + "epoch": 1.3644052863436125, + "grad_norm": 1.9750542918701046, + "learning_rate": 5.03873427341898e-06, + "loss": 0.6509476900100708, + "step": 3872 + }, + { + "epoch": 1.3647577092511014, + "grad_norm": 1.8266690493980986, + "learning_rate": 5.0336763463753425e-06, + "loss": 0.5321642756462097, + "step": 3873 + }, + { + "epoch": 1.3651101321585903, + "grad_norm": 1.8114804761469812, + "learning_rate": 5.028620105244035e-06, + "loss": 0.7237476110458374, + "step": 3874 + }, + { + "epoch": 1.3654625550660793, + "grad_norm": 2.014453779183698, + "learning_rate": 5.0235655517414805e-06, + "loss": 0.6653447151184082, + "step": 3875 + }, + { + "epoch": 1.3658149779735682, + "grad_norm": 1.843622237552059, + "learning_rate": 5.018512687583552e-06, + "loss": 0.6188938617706299, + "step": 3876 + }, + { + "epoch": 1.3661674008810571, + "grad_norm": 1.8211870806299153, + "learning_rate": 5.013461514485536e-06, + "loss": 0.6341606378555298, + "step": 3877 + }, + { + "epoch": 1.3665198237885463, + "grad_norm": 1.6224290182707664, + "learning_rate": 5.00841203416215e-06, + "loss": 0.6148994565010071, + "step": 3878 + }, + { + "epoch": 1.3668722466960352, + "grad_norm": 1.8692541577175399, + "learning_rate": 5.003364248327533e-06, + "loss": 0.6292222142219543, + "step": 3879 + }, + { + "epoch": 1.3672246696035242, + "grad_norm": 1.618170468267519, + "learning_rate": 4.998318158695255e-06, + "loss": 0.6648836135864258, + "step": 3880 + }, + { + "epoch": 1.3675770925110133, + "grad_norm": 6.866040476375875, + "learning_rate": 4.993273766978297e-06, + "loss": 0.5175273418426514, + "step": 3881 + }, + { + "epoch": 1.3679295154185023, + "grad_norm": 1.5661461645683938, + "learning_rate": 4.98823107488909e-06, + "loss": 0.5686253309249878, + "step": 3882 + }, + { + "epoch": 1.3682819383259912, + "grad_norm": 1.9697672783538545, + "learning_rate": 4.983190084139452e-06, + "loss": 0.6128156185150146, + "step": 3883 + }, + { + "epoch": 1.3686343612334801, + "grad_norm": 1.9331016188284555, + "learning_rate": 4.978150796440656e-06, + "loss": 0.6849625110626221, + "step": 3884 + }, + { + "epoch": 1.368986784140969, + "grad_norm": 1.5986771035358114, + "learning_rate": 4.973113213503379e-06, + "loss": 0.5735955238342285, + "step": 3885 + }, + { + "epoch": 1.369339207048458, + "grad_norm": 1.6049593584012303, + "learning_rate": 4.968077337037724e-06, + "loss": 0.4584425091743469, + "step": 3886 + }, + { + "epoch": 1.3696916299559472, + "grad_norm": 1.9525312670752564, + "learning_rate": 4.963043168753212e-06, + "loss": 0.547109067440033, + "step": 3887 + }, + { + "epoch": 1.3700440528634361, + "grad_norm": 2.113357180829694, + "learning_rate": 4.9580107103587895e-06, + "loss": 0.6966128349304199, + "step": 3888 + }, + { + "epoch": 1.370396475770925, + "grad_norm": 1.7817002019358994, + "learning_rate": 4.952979963562814e-06, + "loss": 0.6275819540023804, + "step": 3889 + }, + { + "epoch": 1.3707488986784142, + "grad_norm": 1.6096829752005641, + "learning_rate": 4.94795093007308e-06, + "loss": 0.5678467750549316, + "step": 3890 + }, + { + "epoch": 1.3711013215859031, + "grad_norm": 1.8874234747665013, + "learning_rate": 4.942923611596772e-06, + "loss": 0.6516115665435791, + "step": 3891 + }, + { + "epoch": 1.371453744493392, + "grad_norm": 1.8638529672264463, + "learning_rate": 4.937898009840518e-06, + "loss": 0.6279621124267578, + "step": 3892 + }, + { + "epoch": 1.371806167400881, + "grad_norm": 1.6187117518672614, + "learning_rate": 4.932874126510353e-06, + "loss": 0.6123322248458862, + "step": 3893 + }, + { + "epoch": 1.37215859030837, + "grad_norm": 1.6259761787603553, + "learning_rate": 4.927851963311726e-06, + "loss": 0.43412432074546814, + "step": 3894 + }, + { + "epoch": 1.372511013215859, + "grad_norm": 1.859998329311036, + "learning_rate": 4.922831521949507e-06, + "loss": 0.6582022905349731, + "step": 3895 + }, + { + "epoch": 1.372863436123348, + "grad_norm": 1.8966645456702385, + "learning_rate": 4.917812804127976e-06, + "loss": 0.6219466328620911, + "step": 3896 + }, + { + "epoch": 1.373215859030837, + "grad_norm": 2.056798959647299, + "learning_rate": 4.9127958115508365e-06, + "loss": 0.5352981090545654, + "step": 3897 + }, + { + "epoch": 1.373568281938326, + "grad_norm": 1.5240218181276974, + "learning_rate": 4.907780545921205e-06, + "loss": 0.47646182775497437, + "step": 3898 + }, + { + "epoch": 1.373920704845815, + "grad_norm": 1.6949945802187276, + "learning_rate": 4.902767008941594e-06, + "loss": 0.5335453748703003, + "step": 3899 + }, + { + "epoch": 1.374273127753304, + "grad_norm": 1.7931951401372748, + "learning_rate": 4.897755202313954e-06, + "loss": 0.576435923576355, + "step": 3900 + }, + { + "epoch": 1.374625550660793, + "grad_norm": 1.6675338707159029, + "learning_rate": 4.8927451277396365e-06, + "loss": 0.533431887626648, + "step": 3901 + }, + { + "epoch": 1.3749779735682819, + "grad_norm": 1.7439550653197133, + "learning_rate": 4.8877367869194035e-06, + "loss": 0.6892110109329224, + "step": 3902 + }, + { + "epoch": 1.3753303964757708, + "grad_norm": 1.9209875137364842, + "learning_rate": 4.8827301815534335e-06, + "loss": 0.7028052806854248, + "step": 3903 + }, + { + "epoch": 1.37568281938326, + "grad_norm": 1.8413166797931897, + "learning_rate": 4.877725313341306e-06, + "loss": 0.6883414387702942, + "step": 3904 + }, + { + "epoch": 1.376035242290749, + "grad_norm": 2.145518516472349, + "learning_rate": 4.8727221839820285e-06, + "loss": 0.6712944507598877, + "step": 3905 + }, + { + "epoch": 1.3763876651982379, + "grad_norm": 1.6297297090329885, + "learning_rate": 4.867720795174006e-06, + "loss": 0.6139085292816162, + "step": 3906 + }, + { + "epoch": 1.3767400881057268, + "grad_norm": 1.8425831405666082, + "learning_rate": 4.862721148615043e-06, + "loss": 0.6463953256607056, + "step": 3907 + }, + { + "epoch": 1.377092511013216, + "grad_norm": 1.768461759599311, + "learning_rate": 4.857723246002376e-06, + "loss": 0.6790587306022644, + "step": 3908 + }, + { + "epoch": 1.3774449339207049, + "grad_norm": 1.7177146369820009, + "learning_rate": 4.852727089032634e-06, + "loss": 0.4996854066848755, + "step": 3909 + }, + { + "epoch": 1.3777973568281938, + "grad_norm": 1.8098347886488457, + "learning_rate": 4.847732679401855e-06, + "loss": 0.5826590061187744, + "step": 3910 + }, + { + "epoch": 1.3781497797356828, + "grad_norm": 1.8997892974208295, + "learning_rate": 4.842740018805489e-06, + "loss": 0.5044558048248291, + "step": 3911 + }, + { + "epoch": 1.3785022026431717, + "grad_norm": 1.873679943847948, + "learning_rate": 4.837749108938381e-06, + "loss": 0.49022918939590454, + "step": 3912 + }, + { + "epoch": 1.3788546255506609, + "grad_norm": 1.9497488299017371, + "learning_rate": 4.832759951494798e-06, + "loss": 0.7034850120544434, + "step": 3913 + }, + { + "epoch": 1.3792070484581498, + "grad_norm": 1.8582811393472771, + "learning_rate": 4.827772548168408e-06, + "loss": 0.5835636854171753, + "step": 3914 + }, + { + "epoch": 1.3795594713656387, + "grad_norm": 1.8615896532434415, + "learning_rate": 4.822786900652262e-06, + "loss": 0.6000608205795288, + "step": 3915 + }, + { + "epoch": 1.3799118942731279, + "grad_norm": 2.003742345218382, + "learning_rate": 4.817803010638847e-06, + "loss": 0.6121091842651367, + "step": 3916 + }, + { + "epoch": 1.3802643171806168, + "grad_norm": 1.80308866184307, + "learning_rate": 4.812820879820034e-06, + "loss": 0.457197904586792, + "step": 3917 + }, + { + "epoch": 1.3806167400881058, + "grad_norm": 1.8962611537179284, + "learning_rate": 4.807840509887102e-06, + "loss": 0.6495843529701233, + "step": 3918 + }, + { + "epoch": 1.3809691629955947, + "grad_norm": 1.9212587769996015, + "learning_rate": 4.80286190253073e-06, + "loss": 0.6245059967041016, + "step": 3919 + }, + { + "epoch": 1.3813215859030836, + "grad_norm": 2.020688644956673, + "learning_rate": 4.797885059440998e-06, + "loss": 0.5648606419563293, + "step": 3920 + }, + { + "epoch": 1.3816740088105726, + "grad_norm": 1.93208096226899, + "learning_rate": 4.7929099823073945e-06, + "loss": 0.6593670845031738, + "step": 3921 + }, + { + "epoch": 1.3820264317180617, + "grad_norm": 1.8973564890389945, + "learning_rate": 4.787936672818807e-06, + "loss": 0.6400346159934998, + "step": 3922 + }, + { + "epoch": 1.3823788546255507, + "grad_norm": 1.8684904083901948, + "learning_rate": 4.782965132663505e-06, + "loss": 0.6042170524597168, + "step": 3923 + }, + { + "epoch": 1.3827312775330396, + "grad_norm": 1.8230700495851246, + "learning_rate": 4.777995363529184e-06, + "loss": 0.6224586963653564, + "step": 3924 + }, + { + "epoch": 1.3830837004405288, + "grad_norm": 2.09797321253942, + "learning_rate": 4.7730273671029235e-06, + "loss": 0.6944444179534912, + "step": 3925 + }, + { + "epoch": 1.3834361233480177, + "grad_norm": 1.976613089140818, + "learning_rate": 4.768061145071201e-06, + "loss": 0.5871950387954712, + "step": 3926 + }, + { + "epoch": 1.3837885462555066, + "grad_norm": 1.7713632438369786, + "learning_rate": 4.763096699119897e-06, + "loss": 0.6438909769058228, + "step": 3927 + }, + { + "epoch": 1.3841409691629956, + "grad_norm": 1.6141008005869943, + "learning_rate": 4.75813403093428e-06, + "loss": 0.6338443756103516, + "step": 3928 + }, + { + "epoch": 1.3844933920704845, + "grad_norm": 2.2680544531424753, + "learning_rate": 4.753173142199036e-06, + "loss": 0.6343874931335449, + "step": 3929 + }, + { + "epoch": 1.3848458149779734, + "grad_norm": 1.7233771229601555, + "learning_rate": 4.7482140345982174e-06, + "loss": 0.5383629202842712, + "step": 3930 + }, + { + "epoch": 1.3851982378854626, + "grad_norm": 1.8699549247596075, + "learning_rate": 4.743256709815289e-06, + "loss": 0.5365063548088074, + "step": 3931 + }, + { + "epoch": 1.3855506607929515, + "grad_norm": 2.2583515376147694, + "learning_rate": 4.738301169533116e-06, + "loss": 0.6310757398605347, + "step": 3932 + }, + { + "epoch": 1.3859030837004405, + "grad_norm": 2.1022070754037476, + "learning_rate": 4.733347415433946e-06, + "loss": 0.7609038949012756, + "step": 3933 + }, + { + "epoch": 1.3862555066079296, + "grad_norm": 2.174490642392946, + "learning_rate": 4.728395449199423e-06, + "loss": 0.5837516784667969, + "step": 3934 + }, + { + "epoch": 1.3866079295154186, + "grad_norm": 1.719340289699717, + "learning_rate": 4.7234452725105875e-06, + "loss": 0.6075407862663269, + "step": 3935 + }, + { + "epoch": 1.3869603524229075, + "grad_norm": 1.7651152509667416, + "learning_rate": 4.718496887047864e-06, + "loss": 0.5246843099594116, + "step": 3936 + }, + { + "epoch": 1.3873127753303964, + "grad_norm": 1.6874306455639787, + "learning_rate": 4.713550294491091e-06, + "loss": 0.6256884336471558, + "step": 3937 + }, + { + "epoch": 1.3876651982378854, + "grad_norm": 1.632156841956259, + "learning_rate": 4.708605496519467e-06, + "loss": 0.5039727687835693, + "step": 3938 + }, + { + "epoch": 1.3880176211453745, + "grad_norm": 2.0143508196146196, + "learning_rate": 4.703662494811599e-06, + "loss": 0.5302769541740417, + "step": 3939 + }, + { + "epoch": 1.3883700440528635, + "grad_norm": 1.6358403288542849, + "learning_rate": 4.698721291045491e-06, + "loss": 0.654889702796936, + "step": 3940 + }, + { + "epoch": 1.3887224669603524, + "grad_norm": 1.8724260838054423, + "learning_rate": 4.693781886898521e-06, + "loss": 0.5571156740188599, + "step": 3941 + }, + { + "epoch": 1.3890748898678413, + "grad_norm": 1.8352093678478665, + "learning_rate": 4.688844284047466e-06, + "loss": 0.489155113697052, + "step": 3942 + }, + { + "epoch": 1.3894273127753305, + "grad_norm": 2.3056906716340793, + "learning_rate": 4.683908484168487e-06, + "loss": 0.6422649621963501, + "step": 3943 + }, + { + "epoch": 1.3897797356828194, + "grad_norm": 2.1056674936107345, + "learning_rate": 4.67897448893713e-06, + "loss": 0.6800041794776917, + "step": 3944 + }, + { + "epoch": 1.3901321585903084, + "grad_norm": 1.9512416893069657, + "learning_rate": 4.674042300028345e-06, + "loss": 0.6091655492782593, + "step": 3945 + }, + { + "epoch": 1.3904845814977973, + "grad_norm": 1.5832960247380383, + "learning_rate": 4.669111919116442e-06, + "loss": 0.6217864751815796, + "step": 3946 + }, + { + "epoch": 1.3908370044052862, + "grad_norm": 1.9328669999328483, + "learning_rate": 4.664183347875144e-06, + "loss": 0.6140862703323364, + "step": 3947 + }, + { + "epoch": 1.3911894273127754, + "grad_norm": 1.5467868836495022, + "learning_rate": 4.659256587977542e-06, + "loss": 0.5485835075378418, + "step": 3948 + }, + { + "epoch": 1.3915418502202643, + "grad_norm": 1.9704789330010746, + "learning_rate": 4.654331641096118e-06, + "loss": 0.642849862575531, + "step": 3949 + }, + { + "epoch": 1.3918942731277533, + "grad_norm": 3.421035640959237, + "learning_rate": 4.649408508902739e-06, + "loss": 0.7084407806396484, + "step": 3950 + }, + { + "epoch": 1.3922466960352424, + "grad_norm": 1.780782004302536, + "learning_rate": 4.644487193068653e-06, + "loss": 0.4798510670661926, + "step": 3951 + }, + { + "epoch": 1.3925991189427314, + "grad_norm": 2.0571809281532056, + "learning_rate": 4.639567695264493e-06, + "loss": 0.6350974440574646, + "step": 3952 + }, + { + "epoch": 1.3929515418502203, + "grad_norm": 1.6636780012798107, + "learning_rate": 4.634650017160285e-06, + "loss": 0.6046940684318542, + "step": 3953 + }, + { + "epoch": 1.3933039647577092, + "grad_norm": 1.8656342511774384, + "learning_rate": 4.629734160425412e-06, + "loss": 0.5262438058853149, + "step": 3954 + }, + { + "epoch": 1.3936563876651982, + "grad_norm": 1.6602375526420536, + "learning_rate": 4.6248201267286666e-06, + "loss": 0.4836997985839844, + "step": 3955 + }, + { + "epoch": 1.394008810572687, + "grad_norm": 1.8387545975251456, + "learning_rate": 4.619907917738206e-06, + "loss": 0.5491573810577393, + "step": 3956 + }, + { + "epoch": 1.3943612334801763, + "grad_norm": 1.7103638500009937, + "learning_rate": 4.614997535121574e-06, + "loss": 0.5778772830963135, + "step": 3957 + }, + { + "epoch": 1.3947136563876652, + "grad_norm": 1.886204345973439, + "learning_rate": 4.61008898054569e-06, + "loss": 0.6235651969909668, + "step": 3958 + }, + { + "epoch": 1.3950660792951541, + "grad_norm": 1.533461324415723, + "learning_rate": 4.605182255676857e-06, + "loss": 0.5192956924438477, + "step": 3959 + }, + { + "epoch": 1.3954185022026433, + "grad_norm": 1.6490801359766816, + "learning_rate": 4.600277362180753e-06, + "loss": 0.5652563571929932, + "step": 3960 + }, + { + "epoch": 1.3957709251101322, + "grad_norm": 2.0491508628562594, + "learning_rate": 4.595374301722445e-06, + "loss": 0.6451884508132935, + "step": 3961 + }, + { + "epoch": 1.3961233480176212, + "grad_norm": 1.6267669051180629, + "learning_rate": 4.5904730759663555e-06, + "loss": 0.6358006000518799, + "step": 3962 + }, + { + "epoch": 1.39647577092511, + "grad_norm": 1.9868299068304147, + "learning_rate": 4.5855736865763104e-06, + "loss": 0.6122751832008362, + "step": 3963 + }, + { + "epoch": 1.396828193832599, + "grad_norm": 1.6563994945684704, + "learning_rate": 4.580676135215495e-06, + "loss": 0.5563797354698181, + "step": 3964 + }, + { + "epoch": 1.397180616740088, + "grad_norm": 1.7043306637307543, + "learning_rate": 4.575780423546476e-06, + "loss": 0.5915960669517517, + "step": 3965 + }, + { + "epoch": 1.3975330396475771, + "grad_norm": 2.2793683384994363, + "learning_rate": 4.570886553231196e-06, + "loss": 0.5755159854888916, + "step": 3966 + }, + { + "epoch": 1.397885462555066, + "grad_norm": 1.713166792254198, + "learning_rate": 4.565994525930967e-06, + "loss": 0.7017625570297241, + "step": 3967 + }, + { + "epoch": 1.398237885462555, + "grad_norm": 1.901331269180062, + "learning_rate": 4.5611043433064875e-06, + "loss": 0.7623441815376282, + "step": 3968 + }, + { + "epoch": 1.3985903083700442, + "grad_norm": 1.772343766995311, + "learning_rate": 4.556216007017822e-06, + "loss": 0.5561864376068115, + "step": 3969 + }, + { + "epoch": 1.398942731277533, + "grad_norm": 1.7107369517825557, + "learning_rate": 4.5513295187243975e-06, + "loss": 0.516582727432251, + "step": 3970 + }, + { + "epoch": 1.399295154185022, + "grad_norm": 1.6087287767761917, + "learning_rate": 4.5464448800850366e-06, + "loss": 0.6324976682662964, + "step": 3971 + }, + { + "epoch": 1.399647577092511, + "grad_norm": 1.660721417089598, + "learning_rate": 4.541562092757918e-06, + "loss": 0.5926251411437988, + "step": 3972 + }, + { + "epoch": 1.4, + "grad_norm": 1.7443423550845751, + "learning_rate": 4.536681158400598e-06, + "loss": 0.5677082538604736, + "step": 3973 + }, + { + "epoch": 1.400352422907489, + "grad_norm": 1.791823926745788, + "learning_rate": 4.531802078669997e-06, + "loss": 0.5267887115478516, + "step": 3974 + }, + { + "epoch": 1.400704845814978, + "grad_norm": 2.3840846637544617, + "learning_rate": 4.526924855222411e-06, + "loss": 0.6361796855926514, + "step": 3975 + }, + { + "epoch": 1.401057268722467, + "grad_norm": 1.9992656380929168, + "learning_rate": 4.522049489713513e-06, + "loss": 0.5906916856765747, + "step": 3976 + }, + { + "epoch": 1.4014096916299559, + "grad_norm": 1.932616358578933, + "learning_rate": 4.517175983798334e-06, + "loss": 0.647320568561554, + "step": 3977 + }, + { + "epoch": 1.401762114537445, + "grad_norm": 1.7297380971513312, + "learning_rate": 4.512304339131271e-06, + "loss": 0.6129240989685059, + "step": 3978 + }, + { + "epoch": 1.402114537444934, + "grad_norm": 1.8820056515419912, + "learning_rate": 4.507434557366106e-06, + "loss": 0.5550417900085449, + "step": 3979 + }, + { + "epoch": 1.402466960352423, + "grad_norm": 3.410101687197828, + "learning_rate": 4.502566640155972e-06, + "loss": 0.5677829384803772, + "step": 3980 + }, + { + "epoch": 1.4028193832599118, + "grad_norm": 2.037826582552855, + "learning_rate": 4.497700589153379e-06, + "loss": 0.6627114415168762, + "step": 3981 + }, + { + "epoch": 1.4031718061674008, + "grad_norm": 2.0278559165710197, + "learning_rate": 4.492836406010197e-06, + "loss": 0.7225712537765503, + "step": 3982 + }, + { + "epoch": 1.40352422907489, + "grad_norm": 1.6877243893704514, + "learning_rate": 4.487974092377661e-06, + "loss": 0.5259708762168884, + "step": 3983 + }, + { + "epoch": 1.4038766519823789, + "grad_norm": 1.930838228409862, + "learning_rate": 4.4831136499063856e-06, + "loss": 0.5509500503540039, + "step": 3984 + }, + { + "epoch": 1.4042290748898678, + "grad_norm": 1.862328702111506, + "learning_rate": 4.478255080246338e-06, + "loss": 0.5436242818832397, + "step": 3985 + }, + { + "epoch": 1.4045814977973567, + "grad_norm": 1.9252586062101578, + "learning_rate": 4.473398385046839e-06, + "loss": 0.591008186340332, + "step": 3986 + }, + { + "epoch": 1.404933920704846, + "grad_norm": 1.8551590253300663, + "learning_rate": 4.4685435659565975e-06, + "loss": 0.7463438510894775, + "step": 3987 + }, + { + "epoch": 1.4052863436123348, + "grad_norm": 2.7212267236094445, + "learning_rate": 4.46369062462367e-06, + "loss": 0.5672414898872375, + "step": 3988 + }, + { + "epoch": 1.4056387665198238, + "grad_norm": 1.9023461618951703, + "learning_rate": 4.458839562695481e-06, + "loss": 0.6022762060165405, + "step": 3989 + }, + { + "epoch": 1.4059911894273127, + "grad_norm": 2.975414442801074, + "learning_rate": 4.453990381818811e-06, + "loss": 0.8312792181968689, + "step": 3990 + }, + { + "epoch": 1.4063436123348017, + "grad_norm": 1.5291152049255947, + "learning_rate": 4.4491430836398055e-06, + "loss": 0.475655198097229, + "step": 3991 + }, + { + "epoch": 1.4066960352422908, + "grad_norm": 2.205738960261052, + "learning_rate": 4.444297669803981e-06, + "loss": 0.6317172050476074, + "step": 3992 + }, + { + "epoch": 1.4070484581497797, + "grad_norm": 1.7590033801874774, + "learning_rate": 4.439454141956194e-06, + "loss": 0.5412036180496216, + "step": 3993 + }, + { + "epoch": 1.4074008810572687, + "grad_norm": 1.8534848369039538, + "learning_rate": 4.434612501740671e-06, + "loss": 0.6401170492172241, + "step": 3994 + }, + { + "epoch": 1.4077533039647578, + "grad_norm": 1.6819739888663638, + "learning_rate": 4.429772750801007e-06, + "loss": 0.6175628900527954, + "step": 3995 + }, + { + "epoch": 1.4081057268722468, + "grad_norm": 1.9863542351176011, + "learning_rate": 4.424934890780142e-06, + "loss": 0.6875946521759033, + "step": 3996 + }, + { + "epoch": 1.4084581497797357, + "grad_norm": 1.6357928529424866, + "learning_rate": 4.420098923320378e-06, + "loss": 0.6404017210006714, + "step": 3997 + }, + { + "epoch": 1.4088105726872246, + "grad_norm": 2.096371594852834, + "learning_rate": 4.415264850063378e-06, + "loss": 0.7569783329963684, + "step": 3998 + }, + { + "epoch": 1.4091629955947136, + "grad_norm": 1.9373448832520324, + "learning_rate": 4.410432672650154e-06, + "loss": 0.6125228404998779, + "step": 3999 + }, + { + "epoch": 1.4095154185022025, + "grad_norm": 1.8206271046178746, + "learning_rate": 4.405602392721091e-06, + "loss": 0.6187582015991211, + "step": 4000 + }, + { + "epoch": 1.4098678414096917, + "grad_norm": 1.6622405329305723, + "learning_rate": 4.400774011915907e-06, + "loss": 0.6086148023605347, + "step": 4001 + }, + { + "epoch": 1.4102202643171806, + "grad_norm": 1.4174012456939833, + "learning_rate": 4.3959475318736885e-06, + "loss": 0.4140232801437378, + "step": 4002 + }, + { + "epoch": 1.4105726872246696, + "grad_norm": 1.836512159334361, + "learning_rate": 4.391122954232883e-06, + "loss": 0.5065237879753113, + "step": 4003 + }, + { + "epoch": 1.4109251101321587, + "grad_norm": 1.458932644295331, + "learning_rate": 4.386300280631279e-06, + "loss": 0.4817734658718109, + "step": 4004 + }, + { + "epoch": 1.4112775330396476, + "grad_norm": 1.6662288245729417, + "learning_rate": 4.381479512706025e-06, + "loss": 0.6339706778526306, + "step": 4005 + }, + { + "epoch": 1.4116299559471366, + "grad_norm": 2.1459595089971653, + "learning_rate": 4.376660652093621e-06, + "loss": 0.6581720113754272, + "step": 4006 + }, + { + "epoch": 1.4119823788546255, + "grad_norm": 2.1052256395432503, + "learning_rate": 4.3718437004299174e-06, + "loss": 0.722156286239624, + "step": 4007 + }, + { + "epoch": 1.4123348017621145, + "grad_norm": 2.007137048045836, + "learning_rate": 4.36702865935013e-06, + "loss": 0.5262913703918457, + "step": 4008 + }, + { + "epoch": 1.4126872246696034, + "grad_norm": 1.6239575731802327, + "learning_rate": 4.362215530488805e-06, + "loss": 0.6242132186889648, + "step": 4009 + }, + { + "epoch": 1.4130396475770926, + "grad_norm": 1.6412038783326008, + "learning_rate": 4.35740431547985e-06, + "loss": 0.48776593804359436, + "step": 4010 + }, + { + "epoch": 1.4133920704845815, + "grad_norm": 1.4539922592281447, + "learning_rate": 4.352595015956528e-06, + "loss": 0.5528746843338013, + "step": 4011 + }, + { + "epoch": 1.4137444933920704, + "grad_norm": 1.881555645901769, + "learning_rate": 4.347787633551444e-06, + "loss": 0.6282942295074463, + "step": 4012 + }, + { + "epoch": 1.4140969162995596, + "grad_norm": 1.997464157113011, + "learning_rate": 4.342982169896555e-06, + "loss": 0.6113284826278687, + "step": 4013 + }, + { + "epoch": 1.4144493392070485, + "grad_norm": 1.696170493669202, + "learning_rate": 4.3381786266231685e-06, + "loss": 0.5756875872612, + "step": 4014 + }, + { + "epoch": 1.4148017621145375, + "grad_norm": 1.8012350757266906, + "learning_rate": 4.333377005361931e-06, + "loss": 0.6180154085159302, + "step": 4015 + }, + { + "epoch": 1.4151541850220264, + "grad_norm": 2.2454634074572146, + "learning_rate": 4.328577307742855e-06, + "loss": 0.5728827118873596, + "step": 4016 + }, + { + "epoch": 1.4155066079295153, + "grad_norm": 1.7928891595746113, + "learning_rate": 4.323779535395278e-06, + "loss": 0.5248062014579773, + "step": 4017 + }, + { + "epoch": 1.4158590308370045, + "grad_norm": 1.7454680737255013, + "learning_rate": 4.318983689947895e-06, + "loss": 0.5938228368759155, + "step": 4018 + }, + { + "epoch": 1.4162114537444934, + "grad_norm": 1.8931460456480809, + "learning_rate": 4.3141897730287544e-06, + "loss": 0.7085045576095581, + "step": 4019 + }, + { + "epoch": 1.4165638766519824, + "grad_norm": 2.566425134177144, + "learning_rate": 4.309397786265235e-06, + "loss": 0.599969744682312, + "step": 4020 + }, + { + "epoch": 1.4169162995594713, + "grad_norm": 2.186511304730039, + "learning_rate": 4.30460773128407e-06, + "loss": 0.5784738063812256, + "step": 4021 + }, + { + "epoch": 1.4172687224669605, + "grad_norm": 1.8802349185240168, + "learning_rate": 4.299819609711332e-06, + "loss": 0.6492793560028076, + "step": 4022 + }, + { + "epoch": 1.4176211453744494, + "grad_norm": 1.6886854891683005, + "learning_rate": 4.2950334231724375e-06, + "loss": 0.6690749526023865, + "step": 4023 + }, + { + "epoch": 1.4179735682819383, + "grad_norm": 1.8482135160791267, + "learning_rate": 4.290249173292158e-06, + "loss": 0.5919139981269836, + "step": 4024 + }, + { + "epoch": 1.4183259911894273, + "grad_norm": 1.6202611135629348, + "learning_rate": 4.285466861694583e-06, + "loss": 0.5661630630493164, + "step": 4025 + }, + { + "epoch": 1.4186784140969162, + "grad_norm": 1.7328062744712673, + "learning_rate": 4.280686490003169e-06, + "loss": 0.547730565071106, + "step": 4026 + }, + { + "epoch": 1.4190308370044054, + "grad_norm": 1.7270546788274348, + "learning_rate": 4.2759080598406985e-06, + "loss": 0.6150445938110352, + "step": 4027 + }, + { + "epoch": 1.4193832599118943, + "grad_norm": 2.048539568947664, + "learning_rate": 4.271131572829303e-06, + "loss": 0.6522917747497559, + "step": 4028 + }, + { + "epoch": 1.4197356828193832, + "grad_norm": 1.952118534937186, + "learning_rate": 4.266357030590449e-06, + "loss": 0.8456230163574219, + "step": 4029 + }, + { + "epoch": 1.4200881057268724, + "grad_norm": 1.810792149813479, + "learning_rate": 4.261584434744945e-06, + "loss": 0.6059526801109314, + "step": 4030 + }, + { + "epoch": 1.4204405286343613, + "grad_norm": 1.8213808222910857, + "learning_rate": 4.256813786912937e-06, + "loss": 0.6289907693862915, + "step": 4031 + }, + { + "epoch": 1.4207929515418503, + "grad_norm": 1.5510911353998291, + "learning_rate": 4.252045088713919e-06, + "loss": 0.48954638838768005, + "step": 4032 + }, + { + "epoch": 1.4211453744493392, + "grad_norm": 2.020061779490103, + "learning_rate": 4.2472783417667055e-06, + "loss": 0.6999461054801941, + "step": 4033 + }, + { + "epoch": 1.4214977973568281, + "grad_norm": 1.9629261898681878, + "learning_rate": 4.242513547689466e-06, + "loss": 0.5610899925231934, + "step": 4034 + }, + { + "epoch": 1.421850220264317, + "grad_norm": 1.8415242379631616, + "learning_rate": 4.237750708099697e-06, + "loss": 0.6240172386169434, + "step": 4035 + }, + { + "epoch": 1.4222026431718062, + "grad_norm": 1.887889822972652, + "learning_rate": 4.2329898246142356e-06, + "loss": 0.6368240118026733, + "step": 4036 + }, + { + "epoch": 1.4225550660792952, + "grad_norm": 2.0839652521207483, + "learning_rate": 4.228230898849253e-06, + "loss": 0.6242600679397583, + "step": 4037 + }, + { + "epoch": 1.422907488986784, + "grad_norm": 1.7622749957844728, + "learning_rate": 4.223473932420255e-06, + "loss": 0.6446138620376587, + "step": 4038 + }, + { + "epoch": 1.4232599118942733, + "grad_norm": 1.8800444061446486, + "learning_rate": 4.218718926942081e-06, + "loss": 0.7108229398727417, + "step": 4039 + }, + { + "epoch": 1.4236123348017622, + "grad_norm": 1.7917659431298882, + "learning_rate": 4.213965884028919e-06, + "loss": 0.5279660820960999, + "step": 4040 + }, + { + "epoch": 1.4239647577092511, + "grad_norm": 1.7747691200912903, + "learning_rate": 4.209214805294264e-06, + "loss": 0.6422853469848633, + "step": 4041 + }, + { + "epoch": 1.42431718061674, + "grad_norm": 1.848339978168105, + "learning_rate": 4.2044656923509704e-06, + "loss": 0.6355602741241455, + "step": 4042 + }, + { + "epoch": 1.424669603524229, + "grad_norm": 1.7787421175687093, + "learning_rate": 4.19971854681121e-06, + "loss": 0.5351370573043823, + "step": 4043 + }, + { + "epoch": 1.425022026431718, + "grad_norm": 2.0300248809256987, + "learning_rate": 4.194973370286492e-06, + "loss": 0.5573978424072266, + "step": 4044 + }, + { + "epoch": 1.425374449339207, + "grad_norm": 1.9433750628346875, + "learning_rate": 4.1902301643876555e-06, + "loss": 0.5865412950515747, + "step": 4045 + }, + { + "epoch": 1.425726872246696, + "grad_norm": 2.102324249123369, + "learning_rate": 4.185488930724868e-06, + "loss": 0.6231919527053833, + "step": 4046 + }, + { + "epoch": 1.426079295154185, + "grad_norm": 1.7135783989067233, + "learning_rate": 4.180749670907638e-06, + "loss": 0.48964112997055054, + "step": 4047 + }, + { + "epoch": 1.4264317180616741, + "grad_norm": 2.0973459527664686, + "learning_rate": 4.176012386544796e-06, + "loss": 0.6299121975898743, + "step": 4048 + }, + { + "epoch": 1.426784140969163, + "grad_norm": 1.7239115182277114, + "learning_rate": 4.171277079244492e-06, + "loss": 0.5612789392471313, + "step": 4049 + }, + { + "epoch": 1.427136563876652, + "grad_norm": 1.7396324571675468, + "learning_rate": 4.166543750614227e-06, + "loss": 0.4315321147441864, + "step": 4050 + }, + { + "epoch": 1.427488986784141, + "grad_norm": 2.0031203112343094, + "learning_rate": 4.1618124022608146e-06, + "loss": 0.6300851702690125, + "step": 4051 + }, + { + "epoch": 1.4278414096916299, + "grad_norm": 1.7808675185736187, + "learning_rate": 4.1570830357904e-06, + "loss": 0.6258795261383057, + "step": 4052 + }, + { + "epoch": 1.4281938325991188, + "grad_norm": 1.9069027085637078, + "learning_rate": 4.152355652808457e-06, + "loss": 0.7364479303359985, + "step": 4053 + }, + { + "epoch": 1.428546255506608, + "grad_norm": 1.8474323145969993, + "learning_rate": 4.147630254919781e-06, + "loss": 0.44845038652420044, + "step": 4054 + }, + { + "epoch": 1.428898678414097, + "grad_norm": 1.6823890398766386, + "learning_rate": 4.142906843728504e-06, + "loss": 0.516815185546875, + "step": 4055 + }, + { + "epoch": 1.4292511013215858, + "grad_norm": 1.6276914964492604, + "learning_rate": 4.138185420838079e-06, + "loss": 0.6296960711479187, + "step": 4056 + }, + { + "epoch": 1.429603524229075, + "grad_norm": 1.728227730408027, + "learning_rate": 4.133465987851269e-06, + "loss": 0.5709103345870972, + "step": 4057 + }, + { + "epoch": 1.429955947136564, + "grad_norm": 1.7709951139170081, + "learning_rate": 4.128748546370186e-06, + "loss": 0.5672547817230225, + "step": 4058 + }, + { + "epoch": 1.4303083700440529, + "grad_norm": 1.8161591736426752, + "learning_rate": 4.124033097996252e-06, + "loss": 0.5927014350891113, + "step": 4059 + }, + { + "epoch": 1.4306607929515418, + "grad_norm": 1.75056683772296, + "learning_rate": 4.119319644330214e-06, + "loss": 0.7021238803863525, + "step": 4060 + }, + { + "epoch": 1.4310132158590307, + "grad_norm": 1.7949933259038664, + "learning_rate": 4.114608186972143e-06, + "loss": 0.5940784215927124, + "step": 4061 + }, + { + "epoch": 1.43136563876652, + "grad_norm": 1.7958424742287702, + "learning_rate": 4.109898727521429e-06, + "loss": 0.46511122584342957, + "step": 4062 + }, + { + "epoch": 1.4317180616740088, + "grad_norm": 1.7489789285307085, + "learning_rate": 4.105191267576797e-06, + "loss": 0.4710976481437683, + "step": 4063 + }, + { + "epoch": 1.4320704845814978, + "grad_norm": 1.650142742870973, + "learning_rate": 4.100485808736273e-06, + "loss": 0.5947977900505066, + "step": 4064 + }, + { + "epoch": 1.4324229074889867, + "grad_norm": 1.7620222249444284, + "learning_rate": 4.095782352597214e-06, + "loss": 0.6312115788459778, + "step": 4065 + }, + { + "epoch": 1.4327753303964759, + "grad_norm": 1.7689711305484843, + "learning_rate": 4.091080900756303e-06, + "loss": 0.5709977149963379, + "step": 4066 + }, + { + "epoch": 1.4331277533039648, + "grad_norm": 1.8903042666510779, + "learning_rate": 4.086381454809535e-06, + "loss": 0.6183716058731079, + "step": 4067 + }, + { + "epoch": 1.4334801762114537, + "grad_norm": 1.8677159370638265, + "learning_rate": 4.081684016352223e-06, + "loss": 0.5139745473861694, + "step": 4068 + }, + { + "epoch": 1.4338325991189427, + "grad_norm": 1.9141879794373917, + "learning_rate": 4.076988586979004e-06, + "loss": 0.6611173152923584, + "step": 4069 + }, + { + "epoch": 1.4341850220264316, + "grad_norm": 1.7276457807578136, + "learning_rate": 4.072295168283824e-06, + "loss": 0.616943359375, + "step": 4070 + }, + { + "epoch": 1.4345374449339208, + "grad_norm": 2.331740237042665, + "learning_rate": 4.067603761859965e-06, + "loss": 0.5388625264167786, + "step": 4071 + }, + { + "epoch": 1.4348898678414097, + "grad_norm": 1.9571975377572324, + "learning_rate": 4.062914369300002e-06, + "loss": 0.5523884892463684, + "step": 4072 + }, + { + "epoch": 1.4352422907488986, + "grad_norm": 1.8860165198416616, + "learning_rate": 4.058226992195838e-06, + "loss": 0.5610285997390747, + "step": 4073 + }, + { + "epoch": 1.4355947136563878, + "grad_norm": 1.8522832262316333, + "learning_rate": 4.0535416321387e-06, + "loss": 0.583917498588562, + "step": 4074 + }, + { + "epoch": 1.4359471365638767, + "grad_norm": 1.677482186323321, + "learning_rate": 4.048858290719115e-06, + "loss": 0.6025276184082031, + "step": 4075 + }, + { + "epoch": 1.4362995594713657, + "grad_norm": 1.8037188167117204, + "learning_rate": 4.044176969526936e-06, + "loss": 0.5643888711929321, + "step": 4076 + }, + { + "epoch": 1.4366519823788546, + "grad_norm": 1.709713655992042, + "learning_rate": 4.0394976701513235e-06, + "loss": 0.550167977809906, + "step": 4077 + }, + { + "epoch": 1.4370044052863435, + "grad_norm": 2.1319034629476747, + "learning_rate": 4.034820394180749e-06, + "loss": 0.6182876825332642, + "step": 4078 + }, + { + "epoch": 1.4373568281938325, + "grad_norm": 2.018408244379198, + "learning_rate": 4.030145143203016e-06, + "loss": 0.5197434425354004, + "step": 4079 + }, + { + "epoch": 1.4377092511013216, + "grad_norm": 2.037308833831004, + "learning_rate": 4.025471918805214e-06, + "loss": 0.5351034998893738, + "step": 4080 + }, + { + "epoch": 1.4380616740088106, + "grad_norm": 1.988655670021041, + "learning_rate": 4.020800722573758e-06, + "loss": 0.5576729774475098, + "step": 4081 + }, + { + "epoch": 1.4384140969162995, + "grad_norm": 2.03830396836609, + "learning_rate": 4.016131556094381e-06, + "loss": 0.5557611584663391, + "step": 4082 + }, + { + "epoch": 1.4387665198237887, + "grad_norm": 1.6841558782049018, + "learning_rate": 4.011464420952115e-06, + "loss": 0.5300010442733765, + "step": 4083 + }, + { + "epoch": 1.4391189427312776, + "grad_norm": 2.5196291624747387, + "learning_rate": 4.0067993187313085e-06, + "loss": 0.5254991054534912, + "step": 4084 + }, + { + "epoch": 1.4394713656387665, + "grad_norm": 1.9569129587138865, + "learning_rate": 4.002136251015617e-06, + "loss": 0.5044848322868347, + "step": 4085 + }, + { + "epoch": 1.4398237885462555, + "grad_norm": 1.7587820286029368, + "learning_rate": 3.997475219388005e-06, + "loss": 0.6422302722930908, + "step": 4086 + }, + { + "epoch": 1.4401762114537444, + "grad_norm": 1.7785161433093049, + "learning_rate": 3.992816225430758e-06, + "loss": 0.5502497553825378, + "step": 4087 + }, + { + "epoch": 1.4405286343612334, + "grad_norm": 1.9272648866171629, + "learning_rate": 3.988159270725448e-06, + "loss": 0.7479537129402161, + "step": 4088 + }, + { + "epoch": 1.4408810572687225, + "grad_norm": 1.8882665464741835, + "learning_rate": 3.983504356852967e-06, + "loss": 0.5418091416358948, + "step": 4089 + }, + { + "epoch": 1.4412334801762114, + "grad_norm": 2.1909054908738805, + "learning_rate": 3.978851485393519e-06, + "loss": 0.5262568593025208, + "step": 4090 + }, + { + "epoch": 1.4415859030837004, + "grad_norm": 1.7855475608149034, + "learning_rate": 3.974200657926607e-06, + "loss": 0.5419692397117615, + "step": 4091 + }, + { + "epoch": 1.4419383259911895, + "grad_norm": 1.84908442821801, + "learning_rate": 3.9695518760310425e-06, + "loss": 0.5202164649963379, + "step": 4092 + }, + { + "epoch": 1.4422907488986785, + "grad_norm": 1.6256093479781946, + "learning_rate": 3.96490514128494e-06, + "loss": 0.7232608795166016, + "step": 4093 + }, + { + "epoch": 1.4426431718061674, + "grad_norm": 3.2107784732452473, + "learning_rate": 3.960260455265721e-06, + "loss": 0.5899156332015991, + "step": 4094 + }, + { + "epoch": 1.4429955947136563, + "grad_norm": 1.9995831956411032, + "learning_rate": 3.95561781955012e-06, + "loss": 0.629068911075592, + "step": 4095 + }, + { + "epoch": 1.4433480176211453, + "grad_norm": 1.9520751138167456, + "learning_rate": 3.950977235714154e-06, + "loss": 0.5584920644760132, + "step": 4096 + }, + { + "epoch": 1.4437004405286344, + "grad_norm": 1.7280125643736322, + "learning_rate": 3.9463387053331685e-06, + "loss": 0.713936984539032, + "step": 4097 + }, + { + "epoch": 1.4440528634361234, + "grad_norm": 2.7226452019662357, + "learning_rate": 3.9417022299817944e-06, + "loss": 0.7157076001167297, + "step": 4098 + }, + { + "epoch": 1.4444052863436123, + "grad_norm": 1.940369638230087, + "learning_rate": 3.937067811233972e-06, + "loss": 0.6540844440460205, + "step": 4099 + }, + { + "epoch": 1.4447577092511013, + "grad_norm": 1.6342043838390767, + "learning_rate": 3.9324354506629425e-06, + "loss": 0.5350022315979004, + "step": 4100 + }, + { + "epoch": 1.4451101321585904, + "grad_norm": 1.9186113150470587, + "learning_rate": 3.9278051498412475e-06, + "loss": 0.6852695941925049, + "step": 4101 + }, + { + "epoch": 1.4454625550660793, + "grad_norm": 1.8060312138879744, + "learning_rate": 3.923176910340728e-06, + "loss": 0.6059536337852478, + "step": 4102 + }, + { + "epoch": 1.4458149779735683, + "grad_norm": 1.6721278909458728, + "learning_rate": 3.918550733732536e-06, + "loss": 0.5787979364395142, + "step": 4103 + }, + { + "epoch": 1.4461674008810572, + "grad_norm": 1.8059605647431092, + "learning_rate": 3.9139266215871e-06, + "loss": 0.6068835258483887, + "step": 4104 + }, + { + "epoch": 1.4465198237885462, + "grad_norm": 1.7804694224195132, + "learning_rate": 3.909304575474175e-06, + "loss": 0.5123663544654846, + "step": 4105 + }, + { + "epoch": 1.4468722466960353, + "grad_norm": 1.832785857954117, + "learning_rate": 3.9046845969627975e-06, + "loss": 0.6285420656204224, + "step": 4106 + }, + { + "epoch": 1.4472246696035242, + "grad_norm": 1.8029701329975896, + "learning_rate": 3.9000666876213056e-06, + "loss": 0.6186035871505737, + "step": 4107 + }, + { + "epoch": 1.4475770925110132, + "grad_norm": 2.8121411727628174, + "learning_rate": 3.895450849017336e-06, + "loss": 0.6222661733627319, + "step": 4108 + }, + { + "epoch": 1.4479295154185021, + "grad_norm": 1.7965214044078308, + "learning_rate": 3.890837082717822e-06, + "loss": 0.5889515280723572, + "step": 4109 + }, + { + "epoch": 1.4482819383259913, + "grad_norm": 1.8839124618745182, + "learning_rate": 3.8862253902889925e-06, + "loss": 0.6160309314727783, + "step": 4110 + }, + { + "epoch": 1.4486343612334802, + "grad_norm": 1.7651875927016676, + "learning_rate": 3.881615773296381e-06, + "loss": 0.48093074560165405, + "step": 4111 + }, + { + "epoch": 1.4489867841409692, + "grad_norm": 1.8283039880345147, + "learning_rate": 3.877008233304796e-06, + "loss": 0.5851131677627563, + "step": 4112 + }, + { + "epoch": 1.449339207048458, + "grad_norm": 1.7366010221761805, + "learning_rate": 3.872402771878365e-06, + "loss": 0.5322093963623047, + "step": 4113 + }, + { + "epoch": 1.449691629955947, + "grad_norm": 1.7342840660368584, + "learning_rate": 3.8677993905804956e-06, + "loss": 0.652804970741272, + "step": 4114 + }, + { + "epoch": 1.4500440528634362, + "grad_norm": 1.9583669696557284, + "learning_rate": 3.863198090973891e-06, + "loss": 0.5494996309280396, + "step": 4115 + }, + { + "epoch": 1.4503964757709251, + "grad_norm": 1.910811405312081, + "learning_rate": 3.8585988746205505e-06, + "loss": 0.5641331672668457, + "step": 4116 + }, + { + "epoch": 1.450748898678414, + "grad_norm": 1.7616537450177998, + "learning_rate": 3.854001743081764e-06, + "loss": 0.5415998697280884, + "step": 4117 + }, + { + "epoch": 1.4511013215859032, + "grad_norm": 1.599490372210091, + "learning_rate": 3.849406697918113e-06, + "loss": 0.4399813711643219, + "step": 4118 + }, + { + "epoch": 1.4514537444933922, + "grad_norm": 2.0642862733318115, + "learning_rate": 3.84481374068948e-06, + "loss": 0.6228655576705933, + "step": 4119 + }, + { + "epoch": 1.451806167400881, + "grad_norm": 1.650547077673145, + "learning_rate": 3.8402228729550195e-06, + "loss": 0.5575108528137207, + "step": 4120 + }, + { + "epoch": 1.45215859030837, + "grad_norm": 2.4780057667058704, + "learning_rate": 3.835634096273197e-06, + "loss": 0.5705434679985046, + "step": 4121 + }, + { + "epoch": 1.452511013215859, + "grad_norm": 2.1620556917486184, + "learning_rate": 3.831047412201758e-06, + "loss": 0.5649456977844238, + "step": 4122 + }, + { + "epoch": 1.452863436123348, + "grad_norm": 1.9734169166383557, + "learning_rate": 3.826462822297736e-06, + "loss": 0.5656554698944092, + "step": 4123 + }, + { + "epoch": 1.453215859030837, + "grad_norm": 1.8883507101257415, + "learning_rate": 3.82188032811746e-06, + "loss": 0.6565591096878052, + "step": 4124 + }, + { + "epoch": 1.453568281938326, + "grad_norm": 1.9823635297408013, + "learning_rate": 3.817299931216537e-06, + "loss": 0.6553423404693604, + "step": 4125 + }, + { + "epoch": 1.453920704845815, + "grad_norm": 1.8362785094722764, + "learning_rate": 3.812721633149883e-06, + "loss": 0.5401671528816223, + "step": 4126 + }, + { + "epoch": 1.454273127753304, + "grad_norm": 2.008049720412482, + "learning_rate": 3.808145435471674e-06, + "loss": 0.7275381088256836, + "step": 4127 + }, + { + "epoch": 1.454625550660793, + "grad_norm": 1.827455905179675, + "learning_rate": 3.80357133973539e-06, + "loss": 0.6384310722351074, + "step": 4128 + }, + { + "epoch": 1.454977973568282, + "grad_norm": 1.986935058055083, + "learning_rate": 3.7989993474937993e-06, + "loss": 0.7783250212669373, + "step": 4129 + }, + { + "epoch": 1.455330396475771, + "grad_norm": 2.1923612655628624, + "learning_rate": 3.7944294602989473e-06, + "loss": 0.752954363822937, + "step": 4130 + }, + { + "epoch": 1.4556828193832598, + "grad_norm": 1.801491937261316, + "learning_rate": 3.789861679702169e-06, + "loss": 0.6099411249160767, + "step": 4131 + }, + { + "epoch": 1.4560352422907488, + "grad_norm": 2.12230143233965, + "learning_rate": 3.7852960072540845e-06, + "loss": 0.6608012914657593, + "step": 4132 + }, + { + "epoch": 1.456387665198238, + "grad_norm": 1.836228759881875, + "learning_rate": 3.7807324445045924e-06, + "loss": 0.5119853615760803, + "step": 4133 + }, + { + "epoch": 1.4567400881057269, + "grad_norm": 2.036719543857632, + "learning_rate": 3.7761709930028923e-06, + "loss": 0.6353520750999451, + "step": 4134 + }, + { + "epoch": 1.4570925110132158, + "grad_norm": 1.9234147822597618, + "learning_rate": 3.7716116542974434e-06, + "loss": 0.6427614688873291, + "step": 4135 + }, + { + "epoch": 1.457444933920705, + "grad_norm": 2.34139645382815, + "learning_rate": 3.7670544299360003e-06, + "loss": 0.6205203533172607, + "step": 4136 + }, + { + "epoch": 1.457797356828194, + "grad_norm": 1.940401751978381, + "learning_rate": 3.7624993214656046e-06, + "loss": 0.5957762002944946, + "step": 4137 + }, + { + "epoch": 1.4581497797356828, + "grad_norm": 1.8842452122457418, + "learning_rate": 3.7579463304325714e-06, + "loss": 0.6698043346405029, + "step": 4138 + }, + { + "epoch": 1.4585022026431718, + "grad_norm": 1.825534553754035, + "learning_rate": 3.7533954583824982e-06, + "loss": 0.5947796106338501, + "step": 4139 + }, + { + "epoch": 1.4588546255506607, + "grad_norm": 1.7817965501913557, + "learning_rate": 3.7488467068602664e-06, + "loss": 0.5905463695526123, + "step": 4140 + }, + { + "epoch": 1.4592070484581499, + "grad_norm": 1.8530726474927524, + "learning_rate": 3.74430007741003e-06, + "loss": 0.6218722462654114, + "step": 4141 + }, + { + "epoch": 1.4595594713656388, + "grad_norm": 1.9872212615104103, + "learning_rate": 3.739755571575241e-06, + "loss": 0.6124013066291809, + "step": 4142 + }, + { + "epoch": 1.4599118942731277, + "grad_norm": 1.8897226451904012, + "learning_rate": 3.7352131908986046e-06, + "loss": 0.5816842317581177, + "step": 4143 + }, + { + "epoch": 1.4602643171806167, + "grad_norm": 1.780742815029414, + "learning_rate": 3.7306729369221197e-06, + "loss": 0.5225531458854675, + "step": 4144 + }, + { + "epoch": 1.4606167400881058, + "grad_norm": 1.5899946748394236, + "learning_rate": 3.7261348111870663e-06, + "loss": 0.4536696672439575, + "step": 4145 + }, + { + "epoch": 1.4609691629955948, + "grad_norm": 1.6530094281559282, + "learning_rate": 3.7215988152339968e-06, + "loss": 0.5777832269668579, + "step": 4146 + }, + { + "epoch": 1.4613215859030837, + "grad_norm": 2.0042576738233993, + "learning_rate": 3.717064950602737e-06, + "loss": 0.5964622497558594, + "step": 4147 + }, + { + "epoch": 1.4616740088105726, + "grad_norm": 1.634683701176406, + "learning_rate": 3.7125332188323937e-06, + "loss": 0.47224390506744385, + "step": 4148 + }, + { + "epoch": 1.4620264317180616, + "grad_norm": 2.0219703130043474, + "learning_rate": 3.708003621461347e-06, + "loss": 0.5989271402359009, + "step": 4149 + }, + { + "epoch": 1.4623788546255507, + "grad_norm": 1.7865027495889427, + "learning_rate": 3.7034761600272627e-06, + "loss": 0.6171919107437134, + "step": 4150 + }, + { + "epoch": 1.4627312775330397, + "grad_norm": 1.742831115289917, + "learning_rate": 3.6989508360670647e-06, + "loss": 0.7064418792724609, + "step": 4151 + }, + { + "epoch": 1.4630837004405286, + "grad_norm": 2.236539087690149, + "learning_rate": 3.6944276511169577e-06, + "loss": 0.6055941581726074, + "step": 4152 + }, + { + "epoch": 1.4634361233480178, + "grad_norm": 1.7433765587507288, + "learning_rate": 3.689906606712429e-06, + "loss": 0.4550645351409912, + "step": 4153 + }, + { + "epoch": 1.4637885462555067, + "grad_norm": 1.929810725161399, + "learning_rate": 3.68538770438823e-06, + "loss": 0.5958502292633057, + "step": 4154 + }, + { + "epoch": 1.4641409691629956, + "grad_norm": 2.057185852502653, + "learning_rate": 3.680870945678389e-06, + "loss": 0.5574297904968262, + "step": 4155 + }, + { + "epoch": 1.4644933920704846, + "grad_norm": 1.443537567568116, + "learning_rate": 3.676356332116202e-06, + "loss": 0.46494683623313904, + "step": 4156 + }, + { + "epoch": 1.4648458149779735, + "grad_norm": 1.7341220293452018, + "learning_rate": 3.671843865234238e-06, + "loss": 0.5549977421760559, + "step": 4157 + }, + { + "epoch": 1.4651982378854624, + "grad_norm": 1.7585158502615206, + "learning_rate": 3.6673335465643488e-06, + "loss": 0.5620779395103455, + "step": 4158 + }, + { + "epoch": 1.4655506607929516, + "grad_norm": 1.845448976603358, + "learning_rate": 3.662825377637638e-06, + "loss": 0.5945389270782471, + "step": 4159 + }, + { + "epoch": 1.4659030837004405, + "grad_norm": 1.9218401758762256, + "learning_rate": 3.6583193599844867e-06, + "loss": 0.6923668384552002, + "step": 4160 + }, + { + "epoch": 1.4662555066079295, + "grad_norm": 2.16834694145402, + "learning_rate": 3.653815495134557e-06, + "loss": 0.6848515868186951, + "step": 4161 + }, + { + "epoch": 1.4666079295154186, + "grad_norm": 1.922504159473904, + "learning_rate": 3.649313784616765e-06, + "loss": 0.640125036239624, + "step": 4162 + }, + { + "epoch": 1.4669603524229076, + "grad_norm": 1.816415927402479, + "learning_rate": 3.6448142299593026e-06, + "loss": 0.6879653930664062, + "step": 4163 + }, + { + "epoch": 1.4673127753303965, + "grad_norm": 1.9534619637738762, + "learning_rate": 3.6403168326896286e-06, + "loss": 0.6757794618606567, + "step": 4164 + }, + { + "epoch": 1.4676651982378854, + "grad_norm": 1.7476054801499117, + "learning_rate": 3.6358215943344664e-06, + "loss": 0.6405826807022095, + "step": 4165 + }, + { + "epoch": 1.4680176211453744, + "grad_norm": 2.1448885390527064, + "learning_rate": 3.6313285164198187e-06, + "loss": 0.692136287689209, + "step": 4166 + }, + { + "epoch": 1.4683700440528633, + "grad_norm": 1.8449983835752888, + "learning_rate": 3.626837600470935e-06, + "loss": 0.6305568218231201, + "step": 4167 + }, + { + "epoch": 1.4687224669603525, + "grad_norm": 2.1026866185280264, + "learning_rate": 3.6223488480123427e-06, + "loss": 0.7040449380874634, + "step": 4168 + }, + { + "epoch": 1.4690748898678414, + "grad_norm": 1.5463095765444386, + "learning_rate": 3.6178622605678403e-06, + "loss": 0.5064427852630615, + "step": 4169 + }, + { + "epoch": 1.4694273127753303, + "grad_norm": 1.6549157120829303, + "learning_rate": 3.6133778396604813e-06, + "loss": 0.46597155928611755, + "step": 4170 + }, + { + "epoch": 1.4697797356828195, + "grad_norm": 1.9774014610728103, + "learning_rate": 3.6088955868125864e-06, + "loss": 0.5764908790588379, + "step": 4171 + }, + { + "epoch": 1.4701321585903084, + "grad_norm": 1.9347279402338318, + "learning_rate": 3.6044155035457405e-06, + "loss": 0.5808656215667725, + "step": 4172 + }, + { + "epoch": 1.4704845814977974, + "grad_norm": 2.0116811142135202, + "learning_rate": 3.599937591380791e-06, + "loss": 0.5439287424087524, + "step": 4173 + }, + { + "epoch": 1.4708370044052863, + "grad_norm": 1.5674669602592264, + "learning_rate": 3.595461851837857e-06, + "loss": 0.5943965911865234, + "step": 4174 + }, + { + "epoch": 1.4711894273127752, + "grad_norm": 1.8847509954427386, + "learning_rate": 3.590988286436302e-06, + "loss": 0.631833016872406, + "step": 4175 + }, + { + "epoch": 1.4715418502202642, + "grad_norm": 1.9232774716266652, + "learning_rate": 3.5865168966947718e-06, + "loss": 0.514176070690155, + "step": 4176 + }, + { + "epoch": 1.4718942731277533, + "grad_norm": 1.7211351925277203, + "learning_rate": 3.582047684131159e-06, + "loss": 0.584772527217865, + "step": 4177 + }, + { + "epoch": 1.4722466960352423, + "grad_norm": 1.7726013207799318, + "learning_rate": 3.5775806502626244e-06, + "loss": 0.5085974931716919, + "step": 4178 + }, + { + "epoch": 1.4725991189427312, + "grad_norm": 2.1422494719737464, + "learning_rate": 3.573115796605584e-06, + "loss": 0.62562495470047, + "step": 4179 + }, + { + "epoch": 1.4729515418502204, + "grad_norm": 1.9507950967896, + "learning_rate": 3.5686531246757206e-06, + "loss": 0.5815824270248413, + "step": 4180 + }, + { + "epoch": 1.4733039647577093, + "grad_norm": 1.8811159721586839, + "learning_rate": 3.5641926359879663e-06, + "loss": 0.6639705300331116, + "step": 4181 + }, + { + "epoch": 1.4736563876651982, + "grad_norm": 1.8978737039698366, + "learning_rate": 3.5597343320565293e-06, + "loss": 0.6265684962272644, + "step": 4182 + }, + { + "epoch": 1.4740088105726872, + "grad_norm": 1.877895350809495, + "learning_rate": 3.5552782143948504e-06, + "loss": 0.6113626956939697, + "step": 4183 + }, + { + "epoch": 1.4743612334801761, + "grad_norm": 1.8492974346484832, + "learning_rate": 3.550824284515655e-06, + "loss": 0.5247244834899902, + "step": 4184 + }, + { + "epoch": 1.4747136563876653, + "grad_norm": 1.871370335191458, + "learning_rate": 3.5463725439309083e-06, + "loss": 0.5524521470069885, + "step": 4185 + }, + { + "epoch": 1.4750660792951542, + "grad_norm": 1.9955136913094453, + "learning_rate": 3.5419229941518384e-06, + "loss": 0.5462251305580139, + "step": 4186 + }, + { + "epoch": 1.4754185022026431, + "grad_norm": 1.6609337480864497, + "learning_rate": 3.5374756366889297e-06, + "loss": 0.6500638723373413, + "step": 4187 + }, + { + "epoch": 1.475770925110132, + "grad_norm": 2.0744035602538586, + "learning_rate": 3.5330304730519216e-06, + "loss": 0.4445904791355133, + "step": 4188 + }, + { + "epoch": 1.4761233480176212, + "grad_norm": 1.7788816335434026, + "learning_rate": 3.5285875047498075e-06, + "loss": 0.6068017482757568, + "step": 4189 + }, + { + "epoch": 1.4764757709251102, + "grad_norm": 1.683605461123042, + "learning_rate": 3.5241467332908384e-06, + "loss": 0.5577334761619568, + "step": 4190 + }, + { + "epoch": 1.4768281938325991, + "grad_norm": 1.9605228698426533, + "learning_rate": 3.5197081601825135e-06, + "loss": 0.6596503257751465, + "step": 4191 + }, + { + "epoch": 1.477180616740088, + "grad_norm": 1.9912955738456768, + "learning_rate": 3.5152717869315965e-06, + "loss": 0.6260303258895874, + "step": 4192 + }, + { + "epoch": 1.477533039647577, + "grad_norm": 2.010278739994815, + "learning_rate": 3.510837615044097e-06, + "loss": 0.5467355251312256, + "step": 4193 + }, + { + "epoch": 1.4778854625550661, + "grad_norm": 2.516516212561754, + "learning_rate": 3.506405646025276e-06, + "loss": 0.5306693911552429, + "step": 4194 + }, + { + "epoch": 1.478237885462555, + "grad_norm": 1.7497505820795882, + "learning_rate": 3.5019758813796513e-06, + "loss": 0.6130725741386414, + "step": 4195 + }, + { + "epoch": 1.478590308370044, + "grad_norm": 2.2199157894914143, + "learning_rate": 3.4975483226109874e-06, + "loss": 0.6656272411346436, + "step": 4196 + }, + { + "epoch": 1.4789427312775332, + "grad_norm": 1.8654097488268417, + "learning_rate": 3.4931229712223047e-06, + "loss": 0.6018439531326294, + "step": 4197 + }, + { + "epoch": 1.479295154185022, + "grad_norm": 2.0982915779378137, + "learning_rate": 3.488699828715871e-06, + "loss": 0.6635257005691528, + "step": 4198 + }, + { + "epoch": 1.479647577092511, + "grad_norm": 1.8412514150393455, + "learning_rate": 3.4842788965932038e-06, + "loss": 0.5760075449943542, + "step": 4199 + }, + { + "epoch": 1.48, + "grad_norm": 1.7009881043074442, + "learning_rate": 3.4798601763550778e-06, + "loss": 0.6951982975006104, + "step": 4200 + }, + { + "epoch": 1.480352422907489, + "grad_norm": 1.880170776358824, + "learning_rate": 3.475443669501508e-06, + "loss": 0.6574405431747437, + "step": 4201 + }, + { + "epoch": 1.4807048458149779, + "grad_norm": 1.8075997179509888, + "learning_rate": 3.4710293775317593e-06, + "loss": 0.5912263989448547, + "step": 4202 + }, + { + "epoch": 1.481057268722467, + "grad_norm": 1.7703606198961421, + "learning_rate": 3.4666173019443485e-06, + "loss": 0.5169661045074463, + "step": 4203 + }, + { + "epoch": 1.481409691629956, + "grad_norm": 1.6923587460137135, + "learning_rate": 3.4622074442370345e-06, + "loss": 0.5707660913467407, + "step": 4204 + }, + { + "epoch": 1.4817621145374449, + "grad_norm": 1.7929036165873167, + "learning_rate": 3.4577998059068354e-06, + "loss": 0.5856584310531616, + "step": 4205 + }, + { + "epoch": 1.482114537444934, + "grad_norm": 2.0144464412272636, + "learning_rate": 3.4533943884499975e-06, + "loss": 0.6306010484695435, + "step": 4206 + }, + { + "epoch": 1.482466960352423, + "grad_norm": 1.9708292107625427, + "learning_rate": 3.4489911933620245e-06, + "loss": 0.6177140474319458, + "step": 4207 + }, + { + "epoch": 1.482819383259912, + "grad_norm": 1.6187910458828605, + "learning_rate": 3.4445902221376694e-06, + "loss": 0.5527759790420532, + "step": 4208 + }, + { + "epoch": 1.4831718061674009, + "grad_norm": 1.792049785406371, + "learning_rate": 3.440191476270922e-06, + "loss": 0.6838431358337402, + "step": 4209 + }, + { + "epoch": 1.4835242290748898, + "grad_norm": 2.0640892173970933, + "learning_rate": 3.4357949572550196e-06, + "loss": 0.4876987636089325, + "step": 4210 + }, + { + "epoch": 1.4838766519823787, + "grad_norm": 1.66358265635652, + "learning_rate": 3.4314006665824427e-06, + "loss": 0.5639296770095825, + "step": 4211 + }, + { + "epoch": 1.4842290748898679, + "grad_norm": 1.9264745517709694, + "learning_rate": 3.427008605744915e-06, + "loss": 0.4189109802246094, + "step": 4212 + }, + { + "epoch": 1.4845814977973568, + "grad_norm": 1.7041726412059042, + "learning_rate": 3.422618776233413e-06, + "loss": 0.6602882146835327, + "step": 4213 + }, + { + "epoch": 1.4849339207048458, + "grad_norm": 2.105857994769297, + "learning_rate": 3.4182311795381373e-06, + "loss": 0.7642478942871094, + "step": 4214 + }, + { + "epoch": 1.485286343612335, + "grad_norm": 1.703090383184888, + "learning_rate": 3.41384581714854e-06, + "loss": 0.5550031065940857, + "step": 4215 + }, + { + "epoch": 1.4856387665198239, + "grad_norm": 1.956165427853548, + "learning_rate": 3.4094626905533223e-06, + "loss": 0.7036092281341553, + "step": 4216 + }, + { + "epoch": 1.4859911894273128, + "grad_norm": 1.9055824872661757, + "learning_rate": 3.4050818012404165e-06, + "loss": 0.693780779838562, + "step": 4217 + }, + { + "epoch": 1.4863436123348017, + "grad_norm": 1.750544621689218, + "learning_rate": 3.4007031506969977e-06, + "loss": 0.6315299868583679, + "step": 4218 + }, + { + "epoch": 1.4866960352422907, + "grad_norm": 2.0036955114247355, + "learning_rate": 3.396326740409481e-06, + "loss": 0.47849225997924805, + "step": 4219 + }, + { + "epoch": 1.4870484581497798, + "grad_norm": 1.9433930854735686, + "learning_rate": 3.3919525718635195e-06, + "loss": 0.6200336217880249, + "step": 4220 + }, + { + "epoch": 1.4874008810572688, + "grad_norm": 1.7540152253976415, + "learning_rate": 3.3875806465440152e-06, + "loss": 0.7594903707504272, + "step": 4221 + }, + { + "epoch": 1.4877533039647577, + "grad_norm": 1.8336468945254887, + "learning_rate": 3.383210965935093e-06, + "loss": 0.47159409523010254, + "step": 4222 + }, + { + "epoch": 1.4881057268722466, + "grad_norm": 1.6169320059495966, + "learning_rate": 3.3788435315201216e-06, + "loss": 0.5272495150566101, + "step": 4223 + }, + { + "epoch": 1.4884581497797358, + "grad_norm": 1.8268891947791475, + "learning_rate": 3.3744783447817177e-06, + "loss": 0.43847334384918213, + "step": 4224 + }, + { + "epoch": 1.4888105726872247, + "grad_norm": 1.7778298915864024, + "learning_rate": 3.370115407201724e-06, + "loss": 0.656914234161377, + "step": 4225 + }, + { + "epoch": 1.4891629955947137, + "grad_norm": 1.9329427480605288, + "learning_rate": 3.3657547202612128e-06, + "loss": 0.6379527449607849, + "step": 4226 + }, + { + "epoch": 1.4895154185022026, + "grad_norm": 2.0864597290427103, + "learning_rate": 3.3613962854405136e-06, + "loss": 0.6254120469093323, + "step": 4227 + }, + { + "epoch": 1.4898678414096915, + "grad_norm": 1.9356514762449182, + "learning_rate": 3.35704010421917e-06, + "loss": 0.6567566990852356, + "step": 4228 + }, + { + "epoch": 1.4902202643171807, + "grad_norm": 1.7537675986626187, + "learning_rate": 3.352686178075981e-06, + "loss": 0.5121499300003052, + "step": 4229 + }, + { + "epoch": 1.4905726872246696, + "grad_norm": 1.5674669685831402, + "learning_rate": 3.3483345084889595e-06, + "loss": 0.5727466344833374, + "step": 4230 + }, + { + "epoch": 1.4909251101321586, + "grad_norm": 2.005473410378335, + "learning_rate": 3.3439850969353614e-06, + "loss": 0.6013318300247192, + "step": 4231 + }, + { + "epoch": 1.4912775330396475, + "grad_norm": 1.8391832358116647, + "learning_rate": 3.3396379448916836e-06, + "loss": 0.6350653767585754, + "step": 4232 + }, + { + "epoch": 1.4916299559471367, + "grad_norm": 1.8543607360516363, + "learning_rate": 3.335293053833645e-06, + "loss": 0.7072123885154724, + "step": 4233 + }, + { + "epoch": 1.4919823788546256, + "grad_norm": 1.7154205163032374, + "learning_rate": 3.330950425236201e-06, + "loss": 0.6208378076553345, + "step": 4234 + }, + { + "epoch": 1.4923348017621145, + "grad_norm": 2.0942315596519667, + "learning_rate": 3.3266100605735397e-06, + "loss": 0.656146764755249, + "step": 4235 + }, + { + "epoch": 1.4926872246696035, + "grad_norm": 1.953931788636606, + "learning_rate": 3.322271961319076e-06, + "loss": 0.6145347952842712, + "step": 4236 + }, + { + "epoch": 1.4930396475770924, + "grad_norm": 1.7528969029549952, + "learning_rate": 3.3179361289454694e-06, + "loss": 0.5876312255859375, + "step": 4237 + }, + { + "epoch": 1.4933920704845816, + "grad_norm": 1.601290490679199, + "learning_rate": 3.3136025649245897e-06, + "loss": 0.48365384340286255, + "step": 4238 + }, + { + "epoch": 1.4937444933920705, + "grad_norm": 2.189978154300805, + "learning_rate": 3.3092712707275467e-06, + "loss": 0.566576361656189, + "step": 4239 + }, + { + "epoch": 1.4940969162995594, + "grad_norm": 1.9878421762040837, + "learning_rate": 3.3049422478246886e-06, + "loss": 0.6982032060623169, + "step": 4240 + }, + { + "epoch": 1.4944493392070486, + "grad_norm": 1.9039317781349454, + "learning_rate": 3.3006154976855787e-06, + "loss": 0.5802686214447021, + "step": 4241 + }, + { + "epoch": 1.4948017621145375, + "grad_norm": 1.8292762393050834, + "learning_rate": 3.296291021779016e-06, + "loss": 0.6656724214553833, + "step": 4242 + }, + { + "epoch": 1.4951541850220265, + "grad_norm": 1.8194685949700777, + "learning_rate": 3.2919688215730227e-06, + "loss": 0.5081402063369751, + "step": 4243 + }, + { + "epoch": 1.4955066079295154, + "grad_norm": 1.858251792062496, + "learning_rate": 3.28764889853485e-06, + "loss": 0.6963785290718079, + "step": 4244 + }, + { + "epoch": 1.4958590308370043, + "grad_norm": 1.6076782907375928, + "learning_rate": 3.283331254130987e-06, + "loss": 0.4953869581222534, + "step": 4245 + }, + { + "epoch": 1.4962114537444933, + "grad_norm": 1.7009256372822803, + "learning_rate": 3.2790158898271283e-06, + "loss": 0.5495179295539856, + "step": 4246 + }, + { + "epoch": 1.4965638766519824, + "grad_norm": 1.5282320768079813, + "learning_rate": 3.274702807088208e-06, + "loss": 0.6238610148429871, + "step": 4247 + }, + { + "epoch": 1.4969162995594714, + "grad_norm": 1.7145305152154042, + "learning_rate": 3.270392007378389e-06, + "loss": 0.557083249092102, + "step": 4248 + }, + { + "epoch": 1.4972687224669603, + "grad_norm": 1.8458971793579602, + "learning_rate": 3.2660834921610495e-06, + "loss": 0.6317561864852905, + "step": 4249 + }, + { + "epoch": 1.4976211453744495, + "grad_norm": 2.025909664851984, + "learning_rate": 3.2617772628987974e-06, + "loss": 0.5957529544830322, + "step": 4250 + }, + { + "epoch": 1.4979735682819384, + "grad_norm": 1.8950835934769208, + "learning_rate": 3.2574733210534637e-06, + "loss": 0.7661205530166626, + "step": 4251 + }, + { + "epoch": 1.4983259911894273, + "grad_norm": 1.6920068443310292, + "learning_rate": 3.2531716680861024e-06, + "loss": 0.5616782903671265, + "step": 4252 + }, + { + "epoch": 1.4986784140969163, + "grad_norm": 1.770897830706882, + "learning_rate": 3.2488723054569905e-06, + "loss": 0.5679990649223328, + "step": 4253 + }, + { + "epoch": 1.4990308370044052, + "grad_norm": 1.8387315322882807, + "learning_rate": 3.2445752346256244e-06, + "loss": 0.6355923414230347, + "step": 4254 + }, + { + "epoch": 1.4993832599118941, + "grad_norm": 1.5734033631685667, + "learning_rate": 3.2402804570507316e-06, + "loss": 0.5050745010375977, + "step": 4255 + }, + { + "epoch": 1.4997356828193833, + "grad_norm": 1.9535070131295427, + "learning_rate": 3.2359879741902557e-06, + "loss": 0.6585286855697632, + "step": 4256 + }, + { + "epoch": 1.5000881057268722, + "grad_norm": 1.9369843836982625, + "learning_rate": 3.2316977875013567e-06, + "loss": 0.5108245611190796, + "step": 4257 + }, + { + "epoch": 1.5004405286343614, + "grad_norm": 1.7460361732263896, + "learning_rate": 3.2274098984404223e-06, + "loss": 0.5270702838897705, + "step": 4258 + }, + { + "epoch": 1.5007929515418503, + "grad_norm": 2.09582870313145, + "learning_rate": 3.223124308463057e-06, + "loss": 0.6421051025390625, + "step": 4259 + }, + { + "epoch": 1.5011453744493393, + "grad_norm": 2.0173715825527454, + "learning_rate": 3.218841019024084e-06, + "loss": 0.6040945053100586, + "step": 4260 + }, + { + "epoch": 1.5014977973568282, + "grad_norm": 3.5488261180155023, + "learning_rate": 3.214560031577548e-06, + "loss": 0.6389988660812378, + "step": 4261 + }, + { + "epoch": 1.5018502202643171, + "grad_norm": 2.1449229280338096, + "learning_rate": 3.210281347576707e-06, + "loss": 0.6474273800849915, + "step": 4262 + }, + { + "epoch": 1.502202643171806, + "grad_norm": 2.0726789637634666, + "learning_rate": 3.206004968474048e-06, + "loss": 0.7020560503005981, + "step": 4263 + }, + { + "epoch": 1.502555066079295, + "grad_norm": 1.7677587583992656, + "learning_rate": 3.2017308957212644e-06, + "loss": 0.574647068977356, + "step": 4264 + }, + { + "epoch": 1.5029074889867842, + "grad_norm": 1.8152121117445819, + "learning_rate": 3.1974591307692724e-06, + "loss": 0.6912944316864014, + "step": 4265 + }, + { + "epoch": 1.503259911894273, + "grad_norm": 1.7825438750387144, + "learning_rate": 3.1931896750682036e-06, + "loss": 0.7738592028617859, + "step": 4266 + }, + { + "epoch": 1.5036123348017623, + "grad_norm": 1.7835054391965142, + "learning_rate": 3.188922530067402e-06, + "loss": 0.6418012380599976, + "step": 4267 + }, + { + "epoch": 1.5039647577092512, + "grad_norm": 2.0481798246782628, + "learning_rate": 3.1846576972154343e-06, + "loss": 0.639055609703064, + "step": 4268 + }, + { + "epoch": 1.5043171806167401, + "grad_norm": 1.8365579809471801, + "learning_rate": 3.1803951779600774e-06, + "loss": 0.5512406229972839, + "step": 4269 + }, + { + "epoch": 1.504669603524229, + "grad_norm": 1.7182877192220278, + "learning_rate": 3.1761349737483194e-06, + "loss": 0.5838354229927063, + "step": 4270 + }, + { + "epoch": 1.505022026431718, + "grad_norm": 1.5090233544437164, + "learning_rate": 3.1718770860263747e-06, + "loss": 0.5903568267822266, + "step": 4271 + }, + { + "epoch": 1.505374449339207, + "grad_norm": 1.761348463041135, + "learning_rate": 3.1676215162396604e-06, + "loss": 0.5610073804855347, + "step": 4272 + }, + { + "epoch": 1.5057268722466959, + "grad_norm": 1.9899291186285208, + "learning_rate": 3.163368265832809e-06, + "loss": 0.6543136835098267, + "step": 4273 + }, + { + "epoch": 1.506079295154185, + "grad_norm": 1.9484911821126696, + "learning_rate": 3.1591173362496686e-06, + "loss": 0.6586440801620483, + "step": 4274 + }, + { + "epoch": 1.506431718061674, + "grad_norm": 1.7389367867721892, + "learning_rate": 3.1548687289332958e-06, + "loss": 0.5360713601112366, + "step": 4275 + }, + { + "epoch": 1.5067841409691631, + "grad_norm": 2.1157677007043243, + "learning_rate": 3.1506224453259615e-06, + "loss": 0.6695356369018555, + "step": 4276 + }, + { + "epoch": 1.507136563876652, + "grad_norm": 1.7594436585853632, + "learning_rate": 3.146378486869146e-06, + "loss": 0.5708016753196716, + "step": 4277 + }, + { + "epoch": 1.507488986784141, + "grad_norm": 1.997964983412431, + "learning_rate": 3.142136855003538e-06, + "loss": 0.5412342548370361, + "step": 4278 + }, + { + "epoch": 1.50784140969163, + "grad_norm": 1.645092688511499, + "learning_rate": 3.1378975511690468e-06, + "loss": 0.5392874479293823, + "step": 4279 + }, + { + "epoch": 1.5081938325991189, + "grad_norm": 2.1591157791946256, + "learning_rate": 3.133660576804781e-06, + "loss": 0.6559237241744995, + "step": 4280 + }, + { + "epoch": 1.5085462555066078, + "grad_norm": 1.6625372644532221, + "learning_rate": 3.1294259333490597e-06, + "loss": 0.49973511695861816, + "step": 4281 + }, + { + "epoch": 1.5088986784140968, + "grad_norm": 1.9292311285357981, + "learning_rate": 3.1251936222394152e-06, + "loss": 0.5458316206932068, + "step": 4282 + }, + { + "epoch": 1.509251101321586, + "grad_norm": 1.7771232071244591, + "learning_rate": 3.120963644912579e-06, + "loss": 0.628986656665802, + "step": 4283 + }, + { + "epoch": 1.5096035242290748, + "grad_norm": 1.956059007614116, + "learning_rate": 3.1167360028045103e-06, + "loss": 0.6234235167503357, + "step": 4284 + }, + { + "epoch": 1.509955947136564, + "grad_norm": 1.832929038299875, + "learning_rate": 3.112510697350348e-06, + "loss": 0.49892476201057434, + "step": 4285 + }, + { + "epoch": 1.510308370044053, + "grad_norm": 1.859590789761001, + "learning_rate": 3.1082877299844562e-06, + "loss": 0.42951709032058716, + "step": 4286 + }, + { + "epoch": 1.5106607929515419, + "grad_norm": 1.9658176092994974, + "learning_rate": 3.1040671021404045e-06, + "loss": 0.6392263770103455, + "step": 4287 + }, + { + "epoch": 1.5110132158590308, + "grad_norm": 1.9240075529588605, + "learning_rate": 3.099848815250964e-06, + "loss": 0.6198933124542236, + "step": 4288 + }, + { + "epoch": 1.5113656387665197, + "grad_norm": 2.6550374581713436, + "learning_rate": 3.0956328707481055e-06, + "loss": 0.7626048922538757, + "step": 4289 + }, + { + "epoch": 1.5117180616740087, + "grad_norm": 2.057470135822257, + "learning_rate": 3.0914192700630175e-06, + "loss": 0.5245747566223145, + "step": 4290 + }, + { + "epoch": 1.5120704845814978, + "grad_norm": 2.016409834872785, + "learning_rate": 3.0872080146260818e-06, + "loss": 0.6788556575775146, + "step": 4291 + }, + { + "epoch": 1.5124229074889868, + "grad_norm": 1.8970717527543317, + "learning_rate": 3.082999105866897e-06, + "loss": 0.6224241852760315, + "step": 4292 + }, + { + "epoch": 1.512775330396476, + "grad_norm": 1.8828342237083628, + "learning_rate": 3.0787925452142477e-06, + "loss": 0.706061840057373, + "step": 4293 + }, + { + "epoch": 1.5131277533039649, + "grad_norm": 1.8530285911040203, + "learning_rate": 3.07458833409613e-06, + "loss": 0.7075262665748596, + "step": 4294 + }, + { + "epoch": 1.5134801762114538, + "grad_norm": 1.8075779914700747, + "learning_rate": 3.0703864739397494e-06, + "loss": 0.4912101626396179, + "step": 4295 + }, + { + "epoch": 1.5138325991189427, + "grad_norm": 1.908543179959353, + "learning_rate": 3.066186966171507e-06, + "loss": 0.6530265808105469, + "step": 4296 + }, + { + "epoch": 1.5141850220264317, + "grad_norm": 2.12821134565194, + "learning_rate": 3.0619898122169946e-06, + "loss": 0.4905887246131897, + "step": 4297 + }, + { + "epoch": 1.5145374449339206, + "grad_norm": 1.626400447189927, + "learning_rate": 3.057795013501025e-06, + "loss": 0.5025225281715393, + "step": 4298 + }, + { + "epoch": 1.5148898678414096, + "grad_norm": 1.945132129374327, + "learning_rate": 3.0536025714475946e-06, + "loss": 0.5769479274749756, + "step": 4299 + }, + { + "epoch": 1.5152422907488987, + "grad_norm": 2.0281621255217526, + "learning_rate": 3.049412487479919e-06, + "loss": 0.6275384426116943, + "step": 4300 + }, + { + "epoch": 1.5155947136563876, + "grad_norm": 1.6860684781531563, + "learning_rate": 3.04522476302039e-06, + "loss": 0.5555096864700317, + "step": 4301 + }, + { + "epoch": 1.5159471365638768, + "grad_norm": 1.9260394424858205, + "learning_rate": 3.0410393994906096e-06, + "loss": 0.5605635643005371, + "step": 4302 + }, + { + "epoch": 1.5162995594713657, + "grad_norm": 2.127824876873509, + "learning_rate": 3.0368563983113864e-06, + "loss": 0.6006621718406677, + "step": 4303 + }, + { + "epoch": 1.5166519823788547, + "grad_norm": 2.012171091410243, + "learning_rate": 3.0326757609027147e-06, + "loss": 0.5288259983062744, + "step": 4304 + }, + { + "epoch": 1.5170044052863436, + "grad_norm": 1.7034257525965926, + "learning_rate": 3.0284974886837903e-06, + "loss": 0.5671676993370056, + "step": 4305 + }, + { + "epoch": 1.5173568281938326, + "grad_norm": 2.496889571382279, + "learning_rate": 3.0243215830730075e-06, + "loss": 0.6072134971618652, + "step": 4306 + }, + { + "epoch": 1.5177092511013215, + "grad_norm": 1.726261889224961, + "learning_rate": 3.020148045487953e-06, + "loss": 0.6010481119155884, + "step": 4307 + }, + { + "epoch": 1.5180616740088104, + "grad_norm": 1.6250908189476003, + "learning_rate": 3.0159768773454225e-06, + "loss": 0.6126751899719238, + "step": 4308 + }, + { + "epoch": 1.5184140969162996, + "grad_norm": 1.6123380534859018, + "learning_rate": 3.011808080061387e-06, + "loss": 0.5408819317817688, + "step": 4309 + }, + { + "epoch": 1.5187665198237885, + "grad_norm": 1.6792977324898095, + "learning_rate": 3.0076416550510255e-06, + "loss": 0.6528562307357788, + "step": 4310 + }, + { + "epoch": 1.5191189427312777, + "grad_norm": 1.6431948485087644, + "learning_rate": 3.003477603728715e-06, + "loss": 0.6355241537094116, + "step": 4311 + }, + { + "epoch": 1.5194713656387666, + "grad_norm": 1.7630338655444058, + "learning_rate": 2.9993159275080174e-06, + "loss": 0.5511878728866577, + "step": 4312 + }, + { + "epoch": 1.5198237885462555, + "grad_norm": 1.9093354982688662, + "learning_rate": 2.9951566278016943e-06, + "loss": 0.5066816806793213, + "step": 4313 + }, + { + "epoch": 1.5201762114537445, + "grad_norm": 1.676344611272679, + "learning_rate": 2.9909997060216966e-06, + "loss": 0.5636533498764038, + "step": 4314 + }, + { + "epoch": 1.5205286343612334, + "grad_norm": 1.8885420705538216, + "learning_rate": 2.9868451635791706e-06, + "loss": 0.49742352962493896, + "step": 4315 + }, + { + "epoch": 1.5208810572687224, + "grad_norm": 2.013877525146858, + "learning_rate": 2.9826930018844533e-06, + "loss": 0.7264617681503296, + "step": 4316 + }, + { + "epoch": 1.5212334801762113, + "grad_norm": 1.8792043539230026, + "learning_rate": 2.978543222347076e-06, + "loss": 0.5342350006103516, + "step": 4317 + }, + { + "epoch": 1.5215859030837005, + "grad_norm": 1.7569176385310192, + "learning_rate": 2.9743958263757554e-06, + "loss": 0.4324883818626404, + "step": 4318 + }, + { + "epoch": 1.5219383259911894, + "grad_norm": 1.8546496052344164, + "learning_rate": 2.970250815378409e-06, + "loss": 0.5867510437965393, + "step": 4319 + }, + { + "epoch": 1.5222907488986785, + "grad_norm": 1.8415605839915816, + "learning_rate": 2.966108190762138e-06, + "loss": 0.7176594734191895, + "step": 4320 + }, + { + "epoch": 1.5226431718061675, + "grad_norm": 1.967906535494615, + "learning_rate": 2.9619679539332337e-06, + "loss": 0.5810995101928711, + "step": 4321 + }, + { + "epoch": 1.5229955947136564, + "grad_norm": 2.478705006420029, + "learning_rate": 2.957830106297177e-06, + "loss": 0.6262675523757935, + "step": 4322 + }, + { + "epoch": 1.5233480176211454, + "grad_norm": 2.1743670559442245, + "learning_rate": 2.9536946492586383e-06, + "loss": 0.7743325233459473, + "step": 4323 + }, + { + "epoch": 1.5237004405286343, + "grad_norm": 1.7129659102014092, + "learning_rate": 2.9495615842214776e-06, + "loss": 0.7706553936004639, + "step": 4324 + }, + { + "epoch": 1.5240528634361232, + "grad_norm": 1.6835245148440698, + "learning_rate": 2.9454309125887405e-06, + "loss": 0.5982425808906555, + "step": 4325 + }, + { + "epoch": 1.5244052863436124, + "grad_norm": 1.8547174799711497, + "learning_rate": 2.9413026357626596e-06, + "loss": 0.5580830574035645, + "step": 4326 + }, + { + "epoch": 1.5247577092511013, + "grad_norm": 1.8954299514318398, + "learning_rate": 2.937176755144662e-06, + "loss": 0.5316063165664673, + "step": 4327 + }, + { + "epoch": 1.5251101321585903, + "grad_norm": 1.7578719545795178, + "learning_rate": 2.9330532721353523e-06, + "loss": 0.574161171913147, + "step": 4328 + }, + { + "epoch": 1.5254625550660794, + "grad_norm": 1.7055567103896054, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.5339558720588684, + "step": 4329 + }, + { + "epoch": 1.5258149779735684, + "grad_norm": 1.7071106155323514, + "learning_rate": 2.9248135045411607e-06, + "loss": 0.594109296798706, + "step": 4330 + }, + { + "epoch": 1.5261674008810573, + "grad_norm": 2.0022142230843873, + "learning_rate": 2.9206972227534237e-06, + "loss": 0.5953024625778198, + "step": 4331 + }, + { + "epoch": 1.5265198237885462, + "grad_norm": 1.6369885387081085, + "learning_rate": 2.916583344168663e-06, + "loss": 0.5142296552658081, + "step": 4332 + }, + { + "epoch": 1.5268722466960352, + "grad_norm": 1.7205930689665365, + "learning_rate": 2.912471870183411e-06, + "loss": 0.5796314477920532, + "step": 4333 + }, + { + "epoch": 1.527224669603524, + "grad_norm": 1.984086822092815, + "learning_rate": 2.9083628021933886e-06, + "loss": 0.7202566862106323, + "step": 4334 + }, + { + "epoch": 1.5275770925110133, + "grad_norm": 2.2205082372485756, + "learning_rate": 2.9042561415934956e-06, + "loss": 0.6684188842773438, + "step": 4335 + }, + { + "epoch": 1.5279295154185022, + "grad_norm": 1.7000543577524454, + "learning_rate": 2.9001518897778147e-06, + "loss": 0.5377634763717651, + "step": 4336 + }, + { + "epoch": 1.5282819383259914, + "grad_norm": 1.7985805373418047, + "learning_rate": 2.8960500481396115e-06, + "loss": 0.5780486464500427, + "step": 4337 + }, + { + "epoch": 1.5286343612334803, + "grad_norm": 1.7528900086241466, + "learning_rate": 2.891950618071333e-06, + "loss": 0.6020476818084717, + "step": 4338 + }, + { + "epoch": 1.5289867841409692, + "grad_norm": 2.1939247460310303, + "learning_rate": 2.8878536009646106e-06, + "loss": 0.6076337099075317, + "step": 4339 + }, + { + "epoch": 1.5293392070484582, + "grad_norm": 1.9795227787355654, + "learning_rate": 2.883758998210251e-06, + "loss": 0.6370673179626465, + "step": 4340 + }, + { + "epoch": 1.529691629955947, + "grad_norm": 1.8686242611734982, + "learning_rate": 2.879666811198244e-06, + "loss": 0.41594892740249634, + "step": 4341 + }, + { + "epoch": 1.530044052863436, + "grad_norm": 1.9135398095116771, + "learning_rate": 2.8755770413177632e-06, + "loss": 0.4506857693195343, + "step": 4342 + }, + { + "epoch": 1.530396475770925, + "grad_norm": 1.730478313082556, + "learning_rate": 2.8714896899571575e-06, + "loss": 0.5883188247680664, + "step": 4343 + }, + { + "epoch": 1.5307488986784141, + "grad_norm": 1.9672614741507624, + "learning_rate": 2.8674047585039545e-06, + "loss": 0.6327757239341736, + "step": 4344 + }, + { + "epoch": 1.531101321585903, + "grad_norm": 1.958310227558085, + "learning_rate": 2.863322248344862e-06, + "loss": 0.6241307258605957, + "step": 4345 + }, + { + "epoch": 1.5314537444933922, + "grad_norm": 1.9476460174005816, + "learning_rate": 2.859242160865764e-06, + "loss": 0.6982603669166565, + "step": 4346 + }, + { + "epoch": 1.5318061674008812, + "grad_norm": 1.8365437525119523, + "learning_rate": 2.8551644974517236e-06, + "loss": 0.6293624639511108, + "step": 4347 + }, + { + "epoch": 1.53215859030837, + "grad_norm": 1.8744498682554205, + "learning_rate": 2.85108925948698e-06, + "loss": 0.5630898475646973, + "step": 4348 + }, + { + "epoch": 1.532511013215859, + "grad_norm": 1.9156047998547734, + "learning_rate": 2.847016448354948e-06, + "loss": 0.5300726294517517, + "step": 4349 + }, + { + "epoch": 1.532863436123348, + "grad_norm": 1.763805411540431, + "learning_rate": 2.8429460654382257e-06, + "loss": 0.6302311420440674, + "step": 4350 + }, + { + "epoch": 1.533215859030837, + "grad_norm": 2.0169994553834467, + "learning_rate": 2.8388781121185815e-06, + "loss": 0.5063371658325195, + "step": 4351 + }, + { + "epoch": 1.5335682819383258, + "grad_norm": 1.995201540637565, + "learning_rate": 2.8348125897769496e-06, + "loss": 0.6116877198219299, + "step": 4352 + }, + { + "epoch": 1.533920704845815, + "grad_norm": 1.7792408625607217, + "learning_rate": 2.830749499793458e-06, + "loss": 0.5671982169151306, + "step": 4353 + }, + { + "epoch": 1.534273127753304, + "grad_norm": 1.927509688688397, + "learning_rate": 2.826688843547395e-06, + "loss": 0.6537752747535706, + "step": 4354 + }, + { + "epoch": 1.534625550660793, + "grad_norm": 2.0558781996543805, + "learning_rate": 2.8226306224172283e-06, + "loss": 0.6608545780181885, + "step": 4355 + }, + { + "epoch": 1.534977973568282, + "grad_norm": 2.2266474146630655, + "learning_rate": 2.8185748377805977e-06, + "loss": 0.7038587331771851, + "step": 4356 + }, + { + "epoch": 1.535330396475771, + "grad_norm": 2.084521400671401, + "learning_rate": 2.8145214910143128e-06, + "loss": 0.7422336339950562, + "step": 4357 + }, + { + "epoch": 1.53568281938326, + "grad_norm": 1.7545985325343467, + "learning_rate": 2.8104705834943625e-06, + "loss": 0.5739270448684692, + "step": 4358 + }, + { + "epoch": 1.5360352422907488, + "grad_norm": 1.8063052751864486, + "learning_rate": 2.8064221165959073e-06, + "loss": 0.6429908275604248, + "step": 4359 + }, + { + "epoch": 1.5363876651982378, + "grad_norm": 2.071223390835828, + "learning_rate": 2.802376091693264e-06, + "loss": 0.5660578012466431, + "step": 4360 + }, + { + "epoch": 1.5367400881057267, + "grad_norm": 1.7313945668822706, + "learning_rate": 2.798332510159942e-06, + "loss": 0.4507398009300232, + "step": 4361 + }, + { + "epoch": 1.5370925110132159, + "grad_norm": 1.638677595892734, + "learning_rate": 2.7942913733686063e-06, + "loss": 0.5107634663581848, + "step": 4362 + }, + { + "epoch": 1.5374449339207048, + "grad_norm": 1.8351099655043759, + "learning_rate": 2.790252682691106e-06, + "loss": 0.505529522895813, + "step": 4363 + }, + { + "epoch": 1.537797356828194, + "grad_norm": 1.7171378177734038, + "learning_rate": 2.7862164394984405e-06, + "loss": 0.459098219871521, + "step": 4364 + }, + { + "epoch": 1.538149779735683, + "grad_norm": 1.6256824509042396, + "learning_rate": 2.782182645160789e-06, + "loss": 0.5200169086456299, + "step": 4365 + }, + { + "epoch": 1.5385022026431718, + "grad_norm": 2.1593491644665908, + "learning_rate": 2.778151301047506e-06, + "loss": 0.6723796725273132, + "step": 4366 + }, + { + "epoch": 1.5388546255506608, + "grad_norm": 1.7552269414614434, + "learning_rate": 2.7741224085271067e-06, + "loss": 0.5385584831237793, + "step": 4367 + }, + { + "epoch": 1.5392070484581497, + "grad_norm": 1.6358174263890735, + "learning_rate": 2.770095968967267e-06, + "loss": 0.5766934156417847, + "step": 4368 + }, + { + "epoch": 1.5395594713656386, + "grad_norm": 1.6116842273066272, + "learning_rate": 2.766071983734845e-06, + "loss": 0.6303011178970337, + "step": 4369 + }, + { + "epoch": 1.5399118942731278, + "grad_norm": 2.0294439046284736, + "learning_rate": 2.7620504541958525e-06, + "loss": 0.6192827224731445, + "step": 4370 + }, + { + "epoch": 1.5402643171806167, + "grad_norm": 1.9731545400175885, + "learning_rate": 2.758031381715485e-06, + "loss": 0.543215811252594, + "step": 4371 + }, + { + "epoch": 1.5406167400881057, + "grad_norm": 1.8102023756492311, + "learning_rate": 2.7540147676580808e-06, + "loss": 0.6364312171936035, + "step": 4372 + }, + { + "epoch": 1.5409691629955948, + "grad_norm": 1.7440307883728075, + "learning_rate": 2.750000613387157e-06, + "loss": 0.5625254511833191, + "step": 4373 + }, + { + "epoch": 1.5413215859030838, + "grad_norm": 2.1646055145888377, + "learning_rate": 2.7459889202654e-06, + "loss": 0.7304128408432007, + "step": 4374 + }, + { + "epoch": 1.5416740088105727, + "grad_norm": 2.1627384337401296, + "learning_rate": 2.7419796896546536e-06, + "loss": 0.676097571849823, + "step": 4375 + }, + { + "epoch": 1.5420264317180616, + "grad_norm": 1.9373952441867042, + "learning_rate": 2.7379729229159193e-06, + "loss": 0.7024539709091187, + "step": 4376 + }, + { + "epoch": 1.5423788546255506, + "grad_norm": 1.6778176206961017, + "learning_rate": 2.7339686214093774e-06, + "loss": 0.6357964277267456, + "step": 4377 + }, + { + "epoch": 1.5427312775330395, + "grad_norm": 1.8606991682829432, + "learning_rate": 2.729966786494361e-06, + "loss": 0.5254555940628052, + "step": 4378 + }, + { + "epoch": 1.5430837004405287, + "grad_norm": 1.527570009912515, + "learning_rate": 2.7259674195293697e-06, + "loss": 0.4899883270263672, + "step": 4379 + }, + { + "epoch": 1.5434361233480176, + "grad_norm": 1.725531709071361, + "learning_rate": 2.721970521872063e-06, + "loss": 0.5750056505203247, + "step": 4380 + }, + { + "epoch": 1.5437885462555068, + "grad_norm": 1.8900737960638598, + "learning_rate": 2.71797609487926e-06, + "loss": 0.5852059125900269, + "step": 4381 + }, + { + "epoch": 1.5441409691629957, + "grad_norm": 1.8258629839457563, + "learning_rate": 2.71398413990695e-06, + "loss": 0.6360914707183838, + "step": 4382 + }, + { + "epoch": 1.5444933920704846, + "grad_norm": 1.7586915096989222, + "learning_rate": 2.7099946583102764e-06, + "loss": 0.5120062828063965, + "step": 4383 + }, + { + "epoch": 1.5448458149779736, + "grad_norm": 2.068877348919367, + "learning_rate": 2.706007651443543e-06, + "loss": 0.5798901319503784, + "step": 4384 + }, + { + "epoch": 1.5451982378854625, + "grad_norm": 2.014366735127449, + "learning_rate": 2.702023120660213e-06, + "loss": 0.5112065076828003, + "step": 4385 + }, + { + "epoch": 1.5455506607929514, + "grad_norm": 1.9281653354114374, + "learning_rate": 2.6980410673129133e-06, + "loss": 0.6136611700057983, + "step": 4386 + }, + { + "epoch": 1.5459030837004404, + "grad_norm": 1.6841076662412324, + "learning_rate": 2.694061492753426e-06, + "loss": 0.5944457054138184, + "step": 4387 + }, + { + "epoch": 1.5462555066079295, + "grad_norm": 1.9404009079173157, + "learning_rate": 2.690084398332692e-06, + "loss": 0.5931667685508728, + "step": 4388 + }, + { + "epoch": 1.5466079295154185, + "grad_norm": 1.607840859056915, + "learning_rate": 2.686109785400809e-06, + "loss": 0.6112217307090759, + "step": 4389 + }, + { + "epoch": 1.5469603524229076, + "grad_norm": 1.9090904865448288, + "learning_rate": 2.68213765530704e-06, + "loss": 0.549437940120697, + "step": 4390 + }, + { + "epoch": 1.5473127753303966, + "grad_norm": 1.9826888565576624, + "learning_rate": 2.6781680093997965e-06, + "loss": 0.674758791923523, + "step": 4391 + }, + { + "epoch": 1.5476651982378855, + "grad_norm": 1.874293916028551, + "learning_rate": 2.6742008490266504e-06, + "loss": 0.6015446186065674, + "step": 4392 + }, + { + "epoch": 1.5480176211453744, + "grad_norm": 2.2556212033260223, + "learning_rate": 2.6702361755343278e-06, + "loss": 0.5512514710426331, + "step": 4393 + }, + { + "epoch": 1.5483700440528634, + "grad_norm": 2.607818594949077, + "learning_rate": 2.666273990268713e-06, + "loss": 0.6443158984184265, + "step": 4394 + }, + { + "epoch": 1.5487224669603523, + "grad_norm": 1.840692212890546, + "learning_rate": 2.6623142945748447e-06, + "loss": 0.5682512521743774, + "step": 4395 + }, + { + "epoch": 1.5490748898678413, + "grad_norm": 2.173690182254911, + "learning_rate": 2.658357089796917e-06, + "loss": 0.5544074773788452, + "step": 4396 + }, + { + "epoch": 1.5494273127753304, + "grad_norm": 1.726846155573174, + "learning_rate": 2.6544023772782736e-06, + "loss": 0.5811636447906494, + "step": 4397 + }, + { + "epoch": 1.5497797356828193, + "grad_norm": 1.790573455353959, + "learning_rate": 2.650450158361422e-06, + "loss": 0.4696553647518158, + "step": 4398 + }, + { + "epoch": 1.5501321585903085, + "grad_norm": 1.918353319441468, + "learning_rate": 2.6465004343880153e-06, + "loss": 0.6897521615028381, + "step": 4399 + }, + { + "epoch": 1.5504845814977974, + "grad_norm": 1.9780672696205217, + "learning_rate": 2.6425532066988613e-06, + "loss": 0.6154924631118774, + "step": 4400 + }, + { + "epoch": 1.5508370044052864, + "grad_norm": 2.0803038103367815, + "learning_rate": 2.6386084766339214e-06, + "loss": 0.5333596467971802, + "step": 4401 + }, + { + "epoch": 1.5511894273127753, + "grad_norm": 1.8190921046801005, + "learning_rate": 2.634666245532309e-06, + "loss": 0.6633985042572021, + "step": 4402 + }, + { + "epoch": 1.5515418502202643, + "grad_norm": 1.6722563074159322, + "learning_rate": 2.630726514732289e-06, + "loss": 0.7913509607315063, + "step": 4403 + }, + { + "epoch": 1.5518942731277532, + "grad_norm": 1.8118598393520884, + "learning_rate": 2.6267892855712763e-06, + "loss": 0.5776455402374268, + "step": 4404 + }, + { + "epoch": 1.5522466960352423, + "grad_norm": 1.68862603841886, + "learning_rate": 2.6228545593858357e-06, + "loss": 0.5912357568740845, + "step": 4405 + }, + { + "epoch": 1.5525991189427313, + "grad_norm": 2.1281645633634274, + "learning_rate": 2.618922337511689e-06, + "loss": 0.49319127202033997, + "step": 4406 + }, + { + "epoch": 1.5529515418502202, + "grad_norm": 1.9165140700223777, + "learning_rate": 2.6149926212837016e-06, + "loss": 0.5805023908615112, + "step": 4407 + }, + { + "epoch": 1.5533039647577094, + "grad_norm": 1.8889646736612442, + "learning_rate": 2.6110654120358902e-06, + "loss": 0.5635806918144226, + "step": 4408 + }, + { + "epoch": 1.5536563876651983, + "grad_norm": 1.6927734818193383, + "learning_rate": 2.6071407111014178e-06, + "loss": 0.5006709694862366, + "step": 4409 + }, + { + "epoch": 1.5540088105726872, + "grad_norm": 2.1152391301202695, + "learning_rate": 2.6032185198126005e-06, + "loss": 0.6035311818122864, + "step": 4410 + }, + { + "epoch": 1.5543612334801762, + "grad_norm": 1.7863466908276826, + "learning_rate": 2.599298839500899e-06, + "loss": 0.5978977680206299, + "step": 4411 + }, + { + "epoch": 1.5547136563876651, + "grad_norm": 1.8741259739913476, + "learning_rate": 2.5953816714969194e-06, + "loss": 0.6330617070198059, + "step": 4412 + }, + { + "epoch": 1.555066079295154, + "grad_norm": 2.090756152665107, + "learning_rate": 2.591467017130426e-06, + "loss": 0.6541750431060791, + "step": 4413 + }, + { + "epoch": 1.5554185022026432, + "grad_norm": 2.229850729984303, + "learning_rate": 2.5875548777303204e-06, + "loss": 0.5503655076026917, + "step": 4414 + }, + { + "epoch": 1.5557709251101322, + "grad_norm": 1.7715926792210983, + "learning_rate": 2.583645254624645e-06, + "loss": 0.5117509365081787, + "step": 4415 + }, + { + "epoch": 1.5561233480176213, + "grad_norm": 2.2945620883910953, + "learning_rate": 2.5797381491406027e-06, + "loss": 0.6699894070625305, + "step": 4416 + }, + { + "epoch": 1.5564757709251102, + "grad_norm": 2.0695182526571765, + "learning_rate": 2.5758335626045308e-06, + "loss": 0.6870071291923523, + "step": 4417 + }, + { + "epoch": 1.5568281938325992, + "grad_norm": 1.7821456199762375, + "learning_rate": 2.571931496341916e-06, + "loss": 0.7680954933166504, + "step": 4418 + }, + { + "epoch": 1.5571806167400881, + "grad_norm": 2.2345593449058203, + "learning_rate": 2.568031951677389e-06, + "loss": 0.6504727602005005, + "step": 4419 + }, + { + "epoch": 1.557533039647577, + "grad_norm": 1.6796846625470907, + "learning_rate": 2.5641349299347196e-06, + "loss": 0.7101249098777771, + "step": 4420 + }, + { + "epoch": 1.557885462555066, + "grad_norm": 1.6800594114237326, + "learning_rate": 2.560240432436831e-06, + "loss": 0.5734864473342896, + "step": 4421 + }, + { + "epoch": 1.558237885462555, + "grad_norm": 2.065356657851052, + "learning_rate": 2.5563484605057854e-06, + "loss": 0.48660311102867126, + "step": 4422 + }, + { + "epoch": 1.558590308370044, + "grad_norm": 2.0678483817870847, + "learning_rate": 2.552459015462776e-06, + "loss": 0.6442986726760864, + "step": 4423 + }, + { + "epoch": 1.558942731277533, + "grad_norm": 1.721146259770593, + "learning_rate": 2.548572098628158e-06, + "loss": 0.5871995091438293, + "step": 4424 + }, + { + "epoch": 1.5592951541850222, + "grad_norm": 1.8900651182173844, + "learning_rate": 2.544687711321415e-06, + "loss": 0.5899579524993896, + "step": 4425 + }, + { + "epoch": 1.5596475770925111, + "grad_norm": 1.589021195095579, + "learning_rate": 2.540805854861177e-06, + "loss": 0.571341872215271, + "step": 4426 + }, + { + "epoch": 1.56, + "grad_norm": 2.074587814486514, + "learning_rate": 2.5369265305652112e-06, + "loss": 0.6297308206558228, + "step": 4427 + }, + { + "epoch": 1.560352422907489, + "grad_norm": 1.957815284803115, + "learning_rate": 2.5330497397504274e-06, + "loss": 0.6277692317962646, + "step": 4428 + }, + { + "epoch": 1.560704845814978, + "grad_norm": 1.8075270549654299, + "learning_rate": 2.5291754837328787e-06, + "loss": 0.5124595165252686, + "step": 4429 + }, + { + "epoch": 1.5610572687224669, + "grad_norm": 2.027466093132035, + "learning_rate": 2.5253037638277557e-06, + "loss": 0.6777669191360474, + "step": 4430 + }, + { + "epoch": 1.5614096916299558, + "grad_norm": 2.299371691906574, + "learning_rate": 2.521434581349378e-06, + "loss": 0.7380247116088867, + "step": 4431 + }, + { + "epoch": 1.561762114537445, + "grad_norm": 2.0566157739817825, + "learning_rate": 2.5175679376112206e-06, + "loss": 0.6605849266052246, + "step": 4432 + }, + { + "epoch": 1.562114537444934, + "grad_norm": 1.7899790415054606, + "learning_rate": 2.5137038339258837e-06, + "loss": 0.5688329935073853, + "step": 4433 + }, + { + "epoch": 1.562466960352423, + "grad_norm": 2.1227992795896258, + "learning_rate": 2.5098422716051197e-06, + "loss": 0.6731508374214172, + "step": 4434 + }, + { + "epoch": 1.562819383259912, + "grad_norm": 1.766889438914358, + "learning_rate": 2.505983251959798e-06, + "loss": 0.5177330374717712, + "step": 4435 + }, + { + "epoch": 1.563171806167401, + "grad_norm": 1.793841264632356, + "learning_rate": 2.502126776299938e-06, + "loss": 0.5307918787002563, + "step": 4436 + }, + { + "epoch": 1.5635242290748899, + "grad_norm": 1.8402321267228738, + "learning_rate": 2.4982728459346974e-06, + "loss": 0.59647536277771, + "step": 4437 + }, + { + "epoch": 1.5638766519823788, + "grad_norm": 2.049156650890273, + "learning_rate": 2.494421462172365e-06, + "loss": 0.6215553283691406, + "step": 4438 + }, + { + "epoch": 1.5642290748898677, + "grad_norm": 1.7976631043220852, + "learning_rate": 2.490572626320359e-06, + "loss": 0.49461615085601807, + "step": 4439 + }, + { + "epoch": 1.5645814977973567, + "grad_norm": 2.4138380625358757, + "learning_rate": 2.486726339685247e-06, + "loss": 0.6625338196754456, + "step": 4440 + }, + { + "epoch": 1.5649339207048458, + "grad_norm": 1.5979739892152505, + "learning_rate": 2.4828826035727214e-06, + "loss": 0.4059983193874359, + "step": 4441 + }, + { + "epoch": 1.5652863436123348, + "grad_norm": 1.7298713789472393, + "learning_rate": 2.47904141928761e-06, + "loss": 0.6234895586967468, + "step": 4442 + }, + { + "epoch": 1.565638766519824, + "grad_norm": 1.8282339040044808, + "learning_rate": 2.4752027881338757e-06, + "loss": 0.513421893119812, + "step": 4443 + }, + { + "epoch": 1.5659911894273129, + "grad_norm": 2.0213648562049693, + "learning_rate": 2.4713667114146123e-06, + "loss": 0.6168510913848877, + "step": 4444 + }, + { + "epoch": 1.5663436123348018, + "grad_norm": 1.8904853102151467, + "learning_rate": 2.4675331904320533e-06, + "loss": 0.5474672317504883, + "step": 4445 + }, + { + "epoch": 1.5666960352422907, + "grad_norm": 2.020157324166176, + "learning_rate": 2.46370222648756e-06, + "loss": 0.7464281916618347, + "step": 4446 + }, + { + "epoch": 1.5670484581497797, + "grad_norm": 1.8187430699226648, + "learning_rate": 2.4598738208816155e-06, + "loss": 0.5890274047851562, + "step": 4447 + }, + { + "epoch": 1.5674008810572686, + "grad_norm": 2.0160604417207293, + "learning_rate": 2.4560479749138554e-06, + "loss": 0.7577700018882751, + "step": 4448 + }, + { + "epoch": 1.5677533039647578, + "grad_norm": 1.6711759350664435, + "learning_rate": 2.4522246898830302e-06, + "loss": 0.5374037027359009, + "step": 4449 + }, + { + "epoch": 1.5681057268722467, + "grad_norm": 1.7947512315133625, + "learning_rate": 2.4484039670870286e-06, + "loss": 0.44840407371520996, + "step": 4450 + }, + { + "epoch": 1.5684581497797356, + "grad_norm": 1.8087906354095658, + "learning_rate": 2.4445858078228647e-06, + "loss": 0.5144427418708801, + "step": 4451 + }, + { + "epoch": 1.5688105726872248, + "grad_norm": 1.7889124821216469, + "learning_rate": 2.440770213386684e-06, + "loss": 0.39119952917099, + "step": 4452 + }, + { + "epoch": 1.5691629955947137, + "grad_norm": 1.6376212389282347, + "learning_rate": 2.436957185073766e-06, + "loss": 0.5287434458732605, + "step": 4453 + }, + { + "epoch": 1.5695154185022027, + "grad_norm": 2.2578778571267315, + "learning_rate": 2.4331467241785157e-06, + "loss": 0.568587064743042, + "step": 4454 + }, + { + "epoch": 1.5698678414096916, + "grad_norm": 3.2977149916111608, + "learning_rate": 2.429338831994458e-06, + "loss": 0.5522792339324951, + "step": 4455 + }, + { + "epoch": 1.5702202643171805, + "grad_norm": 1.7594156491061212, + "learning_rate": 2.425533509814262e-06, + "loss": 0.48070845007896423, + "step": 4456 + }, + { + "epoch": 1.5705726872246695, + "grad_norm": 1.631888097687176, + "learning_rate": 2.4217307589297135e-06, + "loss": 0.44293439388275146, + "step": 4457 + }, + { + "epoch": 1.5709251101321586, + "grad_norm": 1.933449446432769, + "learning_rate": 2.4179305806317266e-06, + "loss": 0.5753301382064819, + "step": 4458 + }, + { + "epoch": 1.5712775330396476, + "grad_norm": 1.9958241636570169, + "learning_rate": 2.414132976210346e-06, + "loss": 0.5873000025749207, + "step": 4459 + }, + { + "epoch": 1.5716299559471367, + "grad_norm": 2.2068877987049955, + "learning_rate": 2.410337946954736e-06, + "loss": 0.6084823608398438, + "step": 4460 + }, + { + "epoch": 1.5719823788546257, + "grad_norm": 1.743876311662913, + "learning_rate": 2.4065454941531963e-06, + "loss": 0.541124165058136, + "step": 4461 + }, + { + "epoch": 1.5723348017621146, + "grad_norm": 1.8080812306830252, + "learning_rate": 2.4027556190931446e-06, + "loss": 0.5170080661773682, + "step": 4462 + }, + { + "epoch": 1.5726872246696035, + "grad_norm": 1.817245899938438, + "learning_rate": 2.398968323061125e-06, + "loss": 0.5613514184951782, + "step": 4463 + }, + { + "epoch": 1.5730396475770925, + "grad_norm": 1.7097401781842303, + "learning_rate": 2.395183607342807e-06, + "loss": 0.6645728349685669, + "step": 4464 + }, + { + "epoch": 1.5733920704845814, + "grad_norm": 1.8730205237982336, + "learning_rate": 2.391401473222983e-06, + "loss": 0.7077093124389648, + "step": 4465 + }, + { + "epoch": 1.5737444933920703, + "grad_norm": 1.7460518248753176, + "learning_rate": 2.387621921985571e-06, + "loss": 0.5687523484230042, + "step": 4466 + }, + { + "epoch": 1.5740969162995595, + "grad_norm": 1.9850945169232843, + "learning_rate": 2.38384495491361e-06, + "loss": 0.5837362408638, + "step": 4467 + }, + { + "epoch": 1.5744493392070484, + "grad_norm": 2.051593268912329, + "learning_rate": 2.3800705732892615e-06, + "loss": 0.5552037358283997, + "step": 4468 + }, + { + "epoch": 1.5748017621145376, + "grad_norm": 1.8128967121473578, + "learning_rate": 2.376298778393814e-06, + "loss": 0.5502952337265015, + "step": 4469 + }, + { + "epoch": 1.5751541850220265, + "grad_norm": 1.958629504700592, + "learning_rate": 2.3725295715076734e-06, + "loss": 0.5621509552001953, + "step": 4470 + }, + { + "epoch": 1.5755066079295155, + "grad_norm": 2.20917213599842, + "learning_rate": 2.3687629539103676e-06, + "loss": 0.6703782081604004, + "step": 4471 + }, + { + "epoch": 1.5758590308370044, + "grad_norm": 1.6659443121840707, + "learning_rate": 2.3649989268805453e-06, + "loss": 0.5681235194206238, + "step": 4472 + }, + { + "epoch": 1.5762114537444933, + "grad_norm": 1.6009126465101926, + "learning_rate": 2.361237491695978e-06, + "loss": 0.611667811870575, + "step": 4473 + }, + { + "epoch": 1.5765638766519823, + "grad_norm": 1.7200740539010873, + "learning_rate": 2.3574786496335546e-06, + "loss": 0.5758671760559082, + "step": 4474 + }, + { + "epoch": 1.5769162995594712, + "grad_norm": 1.9125579541010735, + "learning_rate": 2.3537224019692863e-06, + "loss": 0.4865596294403076, + "step": 4475 + }, + { + "epoch": 1.5772687224669604, + "grad_norm": 1.8564502689111453, + "learning_rate": 2.3499687499782976e-06, + "loss": 0.6356204152107239, + "step": 4476 + }, + { + "epoch": 1.5776211453744493, + "grad_norm": 2.1421860610476022, + "learning_rate": 2.346217694934847e-06, + "loss": 0.7177166938781738, + "step": 4477 + }, + { + "epoch": 1.5779735682819385, + "grad_norm": 1.5480906826266605, + "learning_rate": 2.3424692381122882e-06, + "loss": 0.5727916955947876, + "step": 4478 + }, + { + "epoch": 1.5783259911894274, + "grad_norm": 1.8719733775312895, + "learning_rate": 2.3387233807831144e-06, + "loss": 0.4904511570930481, + "step": 4479 + }, + { + "epoch": 1.5786784140969163, + "grad_norm": 1.781780296857209, + "learning_rate": 2.3349801242189262e-06, + "loss": 0.6029622554779053, + "step": 4480 + }, + { + "epoch": 1.5790308370044053, + "grad_norm": 1.7377028122196188, + "learning_rate": 2.3312394696904404e-06, + "loss": 0.6462864875793457, + "step": 4481 + }, + { + "epoch": 1.5793832599118942, + "grad_norm": 2.2050402923740555, + "learning_rate": 2.327501418467495e-06, + "loss": 0.6000367403030396, + "step": 4482 + }, + { + "epoch": 1.5797356828193831, + "grad_norm": 1.8056795992302546, + "learning_rate": 2.3237659718190398e-06, + "loss": 0.5498829483985901, + "step": 4483 + }, + { + "epoch": 1.580088105726872, + "grad_norm": 1.9193344841770834, + "learning_rate": 2.320033131013142e-06, + "loss": 0.5445006489753723, + "step": 4484 + }, + { + "epoch": 1.5804405286343612, + "grad_norm": 1.737360484366453, + "learning_rate": 2.316302897316992e-06, + "loss": 0.4878338575363159, + "step": 4485 + }, + { + "epoch": 1.5807929515418502, + "grad_norm": 1.9395301127212525, + "learning_rate": 2.3125752719968763e-06, + "loss": 0.473583459854126, + "step": 4486 + }, + { + "epoch": 1.5811453744493393, + "grad_norm": 1.885736275905952, + "learning_rate": 2.308850256318218e-06, + "loss": 0.6530570983886719, + "step": 4487 + }, + { + "epoch": 1.5814977973568283, + "grad_norm": 1.9957270393411881, + "learning_rate": 2.30512785154554e-06, + "loss": 0.6925215721130371, + "step": 4488 + }, + { + "epoch": 1.5818502202643172, + "grad_norm": 2.319012517660613, + "learning_rate": 2.3014080589424837e-06, + "loss": 0.6210705637931824, + "step": 4489 + }, + { + "epoch": 1.5822026431718061, + "grad_norm": 1.9814470349632005, + "learning_rate": 2.2976908797718013e-06, + "loss": 0.5843231678009033, + "step": 4490 + }, + { + "epoch": 1.582555066079295, + "grad_norm": 1.8411432529202023, + "learning_rate": 2.2939763152953576e-06, + "loss": 0.7014307379722595, + "step": 4491 + }, + { + "epoch": 1.582907488986784, + "grad_norm": 2.432500927945977, + "learning_rate": 2.2902643667741386e-06, + "loss": 0.563744843006134, + "step": 4492 + }, + { + "epoch": 1.5832599118942732, + "grad_norm": 2.0467865020897227, + "learning_rate": 2.286555035468233e-06, + "loss": 0.6067275404930115, + "step": 4493 + }, + { + "epoch": 1.5836123348017621, + "grad_norm": 1.4471777617782167, + "learning_rate": 2.282848322636836e-06, + "loss": 0.5471328496932983, + "step": 4494 + }, + { + "epoch": 1.583964757709251, + "grad_norm": 1.8188988721843682, + "learning_rate": 2.2791442295382693e-06, + "loss": 0.4994550943374634, + "step": 4495 + }, + { + "epoch": 1.5843171806167402, + "grad_norm": 1.9672025899108128, + "learning_rate": 2.275442757429954e-06, + "loss": 0.6064262390136719, + "step": 4496 + }, + { + "epoch": 1.5846696035242291, + "grad_norm": 1.8109350365291292, + "learning_rate": 2.2717439075684268e-06, + "loss": 0.5119039416313171, + "step": 4497 + }, + { + "epoch": 1.585022026431718, + "grad_norm": 2.2031235285356883, + "learning_rate": 2.26804768120933e-06, + "loss": 0.7276502251625061, + "step": 4498 + }, + { + "epoch": 1.585374449339207, + "grad_norm": 2.0480046358265827, + "learning_rate": 2.264354079607416e-06, + "loss": 0.6175409555435181, + "step": 4499 + }, + { + "epoch": 1.585726872246696, + "grad_norm": 2.165546737643913, + "learning_rate": 2.2606631040165517e-06, + "loss": 0.6289592981338501, + "step": 4500 + }, + { + "epoch": 1.5860792951541849, + "grad_norm": 1.626913781336784, + "learning_rate": 2.2569747556897103e-06, + "loss": 0.5802761316299438, + "step": 4501 + }, + { + "epoch": 1.586431718061674, + "grad_norm": 1.6717876401169283, + "learning_rate": 2.2532890358789604e-06, + "loss": 0.5883978605270386, + "step": 4502 + }, + { + "epoch": 1.586784140969163, + "grad_norm": 1.7433478934489002, + "learning_rate": 2.2496059458355e-06, + "loss": 0.6915061473846436, + "step": 4503 + }, + { + "epoch": 1.5871365638766521, + "grad_norm": 1.7904879000491816, + "learning_rate": 2.2459254868096194e-06, + "loss": 0.6255539655685425, + "step": 4504 + }, + { + "epoch": 1.587488986784141, + "grad_norm": 2.0290072373401706, + "learning_rate": 2.2422476600507203e-06, + "loss": 0.6788307428359985, + "step": 4505 + }, + { + "epoch": 1.58784140969163, + "grad_norm": 1.8646329547804459, + "learning_rate": 2.2385724668073104e-06, + "loss": 0.5651443004608154, + "step": 4506 + }, + { + "epoch": 1.588193832599119, + "grad_norm": 1.6858252262208455, + "learning_rate": 2.2348999083270005e-06, + "loss": 0.5308901071548462, + "step": 4507 + }, + { + "epoch": 1.5885462555066079, + "grad_norm": 2.3264820621642084, + "learning_rate": 2.2312299858565156e-06, + "loss": 0.60570228099823, + "step": 4508 + }, + { + "epoch": 1.5888986784140968, + "grad_norm": 1.8330509972931788, + "learning_rate": 2.22756270064168e-06, + "loss": 0.6544185876846313, + "step": 4509 + }, + { + "epoch": 1.5892511013215858, + "grad_norm": 1.7565673285953047, + "learning_rate": 2.2238980539274156e-06, + "loss": 0.667883038520813, + "step": 4510 + }, + { + "epoch": 1.589603524229075, + "grad_norm": 1.7707733782287267, + "learning_rate": 2.2202360469577622e-06, + "loss": 0.647671103477478, + "step": 4511 + }, + { + "epoch": 1.5899559471365639, + "grad_norm": 1.8031539733499908, + "learning_rate": 2.216576680975856e-06, + "loss": 0.6990867257118225, + "step": 4512 + }, + { + "epoch": 1.590308370044053, + "grad_norm": 1.6913080596921681, + "learning_rate": 2.212919957223938e-06, + "loss": 0.6292023658752441, + "step": 4513 + }, + { + "epoch": 1.590660792951542, + "grad_norm": 2.0512598736304763, + "learning_rate": 2.2092658769433504e-06, + "loss": 0.638721227645874, + "step": 4514 + }, + { + "epoch": 1.5910132158590309, + "grad_norm": 2.0710919586830365, + "learning_rate": 2.2056144413745396e-06, + "loss": 0.5622225403785706, + "step": 4515 + }, + { + "epoch": 1.5913656387665198, + "grad_norm": 1.9225600729192178, + "learning_rate": 2.2019656517570576e-06, + "loss": 0.44093507528305054, + "step": 4516 + }, + { + "epoch": 1.5917180616740088, + "grad_norm": 1.9689195876449703, + "learning_rate": 2.198319509329556e-06, + "loss": 0.6889619827270508, + "step": 4517 + }, + { + "epoch": 1.5920704845814977, + "grad_norm": 1.8723694409082583, + "learning_rate": 2.1946760153297773e-06, + "loss": 0.5873552560806274, + "step": 4518 + }, + { + "epoch": 1.5924229074889866, + "grad_norm": 2.3733819724747245, + "learning_rate": 2.191035170994584e-06, + "loss": 0.7172325849533081, + "step": 4519 + }, + { + "epoch": 1.5927753303964758, + "grad_norm": 1.5631566998768178, + "learning_rate": 2.187396977559927e-06, + "loss": 0.520845890045166, + "step": 4520 + }, + { + "epoch": 1.5931277533039647, + "grad_norm": 1.5657344992000655, + "learning_rate": 2.1837614362608574e-06, + "loss": 0.5241606831550598, + "step": 4521 + }, + { + "epoch": 1.5934801762114539, + "grad_norm": 2.0290302307971433, + "learning_rate": 2.1801285483315303e-06, + "loss": 0.583808422088623, + "step": 4522 + }, + { + "epoch": 1.5938325991189428, + "grad_norm": 1.829890026298915, + "learning_rate": 2.1764983150051955e-06, + "loss": 0.4648814797401428, + "step": 4523 + }, + { + "epoch": 1.5941850220264318, + "grad_norm": 1.9603824667877958, + "learning_rate": 2.1728707375142087e-06, + "loss": 0.590090274810791, + "step": 4524 + }, + { + "epoch": 1.5945374449339207, + "grad_norm": 2.0292397946897527, + "learning_rate": 2.16924581709002e-06, + "loss": 0.6554102897644043, + "step": 4525 + }, + { + "epoch": 1.5948898678414096, + "grad_norm": 2.011864917811992, + "learning_rate": 2.1656235549631677e-06, + "loss": 0.5880511999130249, + "step": 4526 + }, + { + "epoch": 1.5952422907488986, + "grad_norm": 1.6246832017365502, + "learning_rate": 2.1620039523633074e-06, + "loss": 0.5779908299446106, + "step": 4527 + }, + { + "epoch": 1.5955947136563877, + "grad_norm": 1.9147900218294176, + "learning_rate": 2.1583870105191775e-06, + "loss": 0.5030412673950195, + "step": 4528 + }, + { + "epoch": 1.5959471365638767, + "grad_norm": 1.9632795275127009, + "learning_rate": 2.1547727306586173e-06, + "loss": 0.5667461156845093, + "step": 4529 + }, + { + "epoch": 1.5962995594713656, + "grad_norm": 2.3190730605108882, + "learning_rate": 2.151161114008563e-06, + "loss": 0.6820607781410217, + "step": 4530 + }, + { + "epoch": 1.5966519823788548, + "grad_norm": 1.7640709477354637, + "learning_rate": 2.1475521617950425e-06, + "loss": 0.6165209412574768, + "step": 4531 + }, + { + "epoch": 1.5970044052863437, + "grad_norm": 1.897918487033638, + "learning_rate": 2.1439458752431887e-06, + "loss": 0.5987168550491333, + "step": 4532 + }, + { + "epoch": 1.5973568281938326, + "grad_norm": 1.8946893490374197, + "learning_rate": 2.1403422555772226e-06, + "loss": 0.5161086320877075, + "step": 4533 + }, + { + "epoch": 1.5977092511013216, + "grad_norm": 1.817150642667859, + "learning_rate": 2.1367413040204543e-06, + "loss": 0.5216903686523438, + "step": 4534 + }, + { + "epoch": 1.5980616740088105, + "grad_norm": 1.7820775067820096, + "learning_rate": 2.133143021795302e-06, + "loss": 0.5664666891098022, + "step": 4535 + }, + { + "epoch": 1.5984140969162994, + "grad_norm": 1.8205676682468495, + "learning_rate": 2.129547410123268e-06, + "loss": 0.501051127910614, + "step": 4536 + }, + { + "epoch": 1.5987665198237886, + "grad_norm": 1.5799563385798543, + "learning_rate": 2.1259544702249515e-06, + "loss": 0.5466792583465576, + "step": 4537 + }, + { + "epoch": 1.5991189427312775, + "grad_norm": 1.9007615560911546, + "learning_rate": 2.122364203320043e-06, + "loss": 0.5295613408088684, + "step": 4538 + }, + { + "epoch": 1.5994713656387667, + "grad_norm": 1.6670646942482272, + "learning_rate": 2.1187766106273224e-06, + "loss": 0.5406922101974487, + "step": 4539 + }, + { + "epoch": 1.5998237885462556, + "grad_norm": 2.0700620230157125, + "learning_rate": 2.1151916933646764e-06, + "loss": 0.5908178687095642, + "step": 4540 + }, + { + "epoch": 1.6001762114537446, + "grad_norm": 1.8405525752725544, + "learning_rate": 2.1116094527490594e-06, + "loss": 0.6207743883132935, + "step": 4541 + }, + { + "epoch": 1.6005286343612335, + "grad_norm": 2.7642600887250652, + "learning_rate": 2.1080298899965413e-06, + "loss": 0.5655614137649536, + "step": 4542 + }, + { + "epoch": 1.6008810572687224, + "grad_norm": 1.5764846584358823, + "learning_rate": 2.104453006322268e-06, + "loss": 0.6019319295883179, + "step": 4543 + }, + { + "epoch": 1.6012334801762114, + "grad_norm": 1.8499785252270624, + "learning_rate": 2.1008788029404794e-06, + "loss": 0.6109766364097595, + "step": 4544 + }, + { + "epoch": 1.6015859030837003, + "grad_norm": 1.8285934792669327, + "learning_rate": 2.0973072810645078e-06, + "loss": 0.5309078693389893, + "step": 4545 + }, + { + "epoch": 1.6019383259911895, + "grad_norm": 1.7116030885611606, + "learning_rate": 2.093738441906774e-06, + "loss": 0.5440298318862915, + "step": 4546 + }, + { + "epoch": 1.6022907488986784, + "grad_norm": 1.6012955775631803, + "learning_rate": 2.0901722866787842e-06, + "loss": 0.46502384543418884, + "step": 4547 + }, + { + "epoch": 1.6026431718061676, + "grad_norm": 1.7999501734847188, + "learning_rate": 2.086608816591146e-06, + "loss": 0.4822906255722046, + "step": 4548 + }, + { + "epoch": 1.6029955947136565, + "grad_norm": 1.8169323717501906, + "learning_rate": 2.083048032853534e-06, + "loss": 0.6382625699043274, + "step": 4549 + }, + { + "epoch": 1.6033480176211454, + "grad_norm": 1.7542851479568786, + "learning_rate": 2.0794899366747334e-06, + "loss": 0.6070914268493652, + "step": 4550 + }, + { + "epoch": 1.6037004405286344, + "grad_norm": 1.8496689505105712, + "learning_rate": 2.0759345292626045e-06, + "loss": 0.5953283309936523, + "step": 4551 + }, + { + "epoch": 1.6040528634361233, + "grad_norm": 1.6448363622587787, + "learning_rate": 2.0723818118240958e-06, + "loss": 0.47553640604019165, + "step": 4552 + }, + { + "epoch": 1.6044052863436122, + "grad_norm": 2.007835441279153, + "learning_rate": 2.0688317855652463e-06, + "loss": 0.7020712494850159, + "step": 4553 + }, + { + "epoch": 1.6047577092511012, + "grad_norm": 1.739770344308816, + "learning_rate": 2.0652844516911776e-06, + "loss": 0.5998836159706116, + "step": 4554 + }, + { + "epoch": 1.6051101321585903, + "grad_norm": 1.7690620328907303, + "learning_rate": 2.0617398114060983e-06, + "loss": 0.6501786708831787, + "step": 4555 + }, + { + "epoch": 1.6054625550660793, + "grad_norm": 1.7628232586759778, + "learning_rate": 2.0581978659133097e-06, + "loss": 0.6444278955459595, + "step": 4556 + }, + { + "epoch": 1.6058149779735684, + "grad_norm": 1.8812364367093761, + "learning_rate": 2.0546586164151827e-06, + "loss": 0.6756579875946045, + "step": 4557 + }, + { + "epoch": 1.6061674008810574, + "grad_norm": 1.9541887465796286, + "learning_rate": 2.051122064113189e-06, + "loss": 0.6043737530708313, + "step": 4558 + }, + { + "epoch": 1.6065198237885463, + "grad_norm": 1.7992795463772795, + "learning_rate": 2.047588210207877e-06, + "loss": 0.6504104137420654, + "step": 4559 + }, + { + "epoch": 1.6068722466960352, + "grad_norm": 1.8447157864854533, + "learning_rate": 2.044057055898879e-06, + "loss": 0.6586685180664062, + "step": 4560 + }, + { + "epoch": 1.6072246696035242, + "grad_norm": 1.6895598009184531, + "learning_rate": 2.0405286023849125e-06, + "loss": 0.4463368058204651, + "step": 4561 + }, + { + "epoch": 1.607577092511013, + "grad_norm": 1.626067629091748, + "learning_rate": 2.037002850863777e-06, + "loss": 0.5208157896995544, + "step": 4562 + }, + { + "epoch": 1.607929515418502, + "grad_norm": 2.325947552099387, + "learning_rate": 2.033479802532354e-06, + "loss": 0.612602174282074, + "step": 4563 + }, + { + "epoch": 1.6082819383259912, + "grad_norm": 1.8677335810734068, + "learning_rate": 2.0299594585866166e-06, + "loss": 0.6871482133865356, + "step": 4564 + }, + { + "epoch": 1.6086343612334801, + "grad_norm": 2.1450630320575863, + "learning_rate": 2.0264418202215998e-06, + "loss": 0.5770177245140076, + "step": 4565 + }, + { + "epoch": 1.6089867841409693, + "grad_norm": 2.0018570918486263, + "learning_rate": 2.0229268886314413e-06, + "loss": 0.600841224193573, + "step": 4566 + }, + { + "epoch": 1.6093392070484582, + "grad_norm": 1.4951834973656204, + "learning_rate": 2.0194146650093494e-06, + "loss": 0.47742071747779846, + "step": 4567 + }, + { + "epoch": 1.6096916299559472, + "grad_norm": 1.932667797658379, + "learning_rate": 2.015905150547612e-06, + "loss": 0.5528711080551147, + "step": 4568 + }, + { + "epoch": 1.610044052863436, + "grad_norm": 1.7893968437532208, + "learning_rate": 2.0123983464376028e-06, + "loss": 0.6892603635787964, + "step": 4569 + }, + { + "epoch": 1.610396475770925, + "grad_norm": 2.0432539431091405, + "learning_rate": 2.0088942538697687e-06, + "loss": 0.593653678894043, + "step": 4570 + }, + { + "epoch": 1.610748898678414, + "grad_norm": 1.913622035178548, + "learning_rate": 2.005392874033646e-06, + "loss": 0.5570813417434692, + "step": 4571 + }, + { + "epoch": 1.6111013215859031, + "grad_norm": 1.7912413841249368, + "learning_rate": 2.0018942081178426e-06, + "loss": 0.6723357439041138, + "step": 4572 + }, + { + "epoch": 1.611453744493392, + "grad_norm": 1.8833118579628767, + "learning_rate": 1.9983982573100413e-06, + "loss": 0.5333940982818604, + "step": 4573 + }, + { + "epoch": 1.611806167400881, + "grad_norm": 2.1881508790927358, + "learning_rate": 1.9949050227970148e-06, + "loss": 0.6404193043708801, + "step": 4574 + }, + { + "epoch": 1.6121585903083702, + "grad_norm": 1.9103565569987608, + "learning_rate": 1.991414505764605e-06, + "loss": 0.6831241250038147, + "step": 4575 + }, + { + "epoch": 1.612511013215859, + "grad_norm": 2.3229832844307063, + "learning_rate": 1.9879267073977337e-06, + "loss": 0.6741847991943359, + "step": 4576 + }, + { + "epoch": 1.612863436123348, + "grad_norm": 1.843434925588856, + "learning_rate": 1.9844416288804004e-06, + "loss": 0.5234787464141846, + "step": 4577 + }, + { + "epoch": 1.613215859030837, + "grad_norm": 1.931234115746558, + "learning_rate": 1.9809592713956782e-06, + "loss": 0.6462803483009338, + "step": 4578 + }, + { + "epoch": 1.613568281938326, + "grad_norm": 2.495392945939654, + "learning_rate": 1.977479636125724e-06, + "loss": 0.612025797367096, + "step": 4579 + }, + { + "epoch": 1.6139207048458148, + "grad_norm": 1.6414504893846202, + "learning_rate": 1.9740027242517668e-06, + "loss": 0.5065322518348694, + "step": 4580 + }, + { + "epoch": 1.614273127753304, + "grad_norm": 1.9613495904560583, + "learning_rate": 1.9705285369540994e-06, + "loss": 0.4986911714076996, + "step": 4581 + }, + { + "epoch": 1.614625550660793, + "grad_norm": 2.1185650604413926, + "learning_rate": 1.967057075412111e-06, + "loss": 0.6030969619750977, + "step": 4582 + }, + { + "epoch": 1.614977973568282, + "grad_norm": 1.8032946015429019, + "learning_rate": 1.963588340804251e-06, + "loss": 0.6116718649864197, + "step": 4583 + }, + { + "epoch": 1.615330396475771, + "grad_norm": 1.9008591407855147, + "learning_rate": 1.960122334308047e-06, + "loss": 0.8064850568771362, + "step": 4584 + }, + { + "epoch": 1.61568281938326, + "grad_norm": 2.130250646945173, + "learning_rate": 1.9566590571000997e-06, + "loss": 0.7416974306106567, + "step": 4585 + }, + { + "epoch": 1.616035242290749, + "grad_norm": 2.0285944926888604, + "learning_rate": 1.9531985103560813e-06, + "loss": 0.48169833421707153, + "step": 4586 + }, + { + "epoch": 1.6163876651982378, + "grad_norm": 8.08226040018375, + "learning_rate": 1.949740695250746e-06, + "loss": 0.7766422629356384, + "step": 4587 + }, + { + "epoch": 1.6167400881057268, + "grad_norm": 1.6227557131714891, + "learning_rate": 1.9462856129579144e-06, + "loss": 0.3793888986110687, + "step": 4588 + }, + { + "epoch": 1.6170925110132157, + "grad_norm": 1.6662726387585254, + "learning_rate": 1.94283326465047e-06, + "loss": 0.6129955053329468, + "step": 4589 + }, + { + "epoch": 1.6174449339207049, + "grad_norm": 1.927411767174183, + "learning_rate": 1.9393836515003874e-06, + "loss": 0.7420347929000854, + "step": 4590 + }, + { + "epoch": 1.6177973568281938, + "grad_norm": 1.810002162071199, + "learning_rate": 1.9359367746786993e-06, + "loss": 0.49013108015060425, + "step": 4591 + }, + { + "epoch": 1.618149779735683, + "grad_norm": 1.8150752517575908, + "learning_rate": 1.932492635355513e-06, + "loss": 0.5198413133621216, + "step": 4592 + }, + { + "epoch": 1.618502202643172, + "grad_norm": 1.9402976415289777, + "learning_rate": 1.929051234700007e-06, + "loss": 0.6031092405319214, + "step": 4593 + }, + { + "epoch": 1.6188546255506608, + "grad_norm": 2.041490312444486, + "learning_rate": 1.9256125738804264e-06, + "loss": 0.6269406080245972, + "step": 4594 + }, + { + "epoch": 1.6192070484581498, + "grad_norm": 1.801972947869227, + "learning_rate": 1.922176654064096e-06, + "loss": 0.4518774747848511, + "step": 4595 + }, + { + "epoch": 1.6195594713656387, + "grad_norm": 1.8680481961289441, + "learning_rate": 1.9187434764174027e-06, + "loss": 0.6199424862861633, + "step": 4596 + }, + { + "epoch": 1.6199118942731277, + "grad_norm": 2.634014207343412, + "learning_rate": 1.9153130421057955e-06, + "loss": 0.5155355930328369, + "step": 4597 + }, + { + "epoch": 1.6202643171806166, + "grad_norm": 1.8081505074484028, + "learning_rate": 1.9118853522938087e-06, + "loss": 0.6188424229621887, + "step": 4598 + }, + { + "epoch": 1.6206167400881057, + "grad_norm": 1.8999856535081827, + "learning_rate": 1.908460408145033e-06, + "loss": 0.5807337164878845, + "step": 4599 + }, + { + "epoch": 1.6209691629955947, + "grad_norm": 1.6142171687185456, + "learning_rate": 1.9050382108221311e-06, + "loss": 0.5258378982543945, + "step": 4600 + }, + { + "epoch": 1.6213215859030838, + "grad_norm": 1.9194714558474444, + "learning_rate": 1.9016187614868308e-06, + "loss": 0.6612311601638794, + "step": 4601 + }, + { + "epoch": 1.6216740088105728, + "grad_norm": 1.7849999472385678, + "learning_rate": 1.8982020612999285e-06, + "loss": 0.611383855342865, + "step": 4602 + }, + { + "epoch": 1.6220264317180617, + "grad_norm": 1.7599275323638883, + "learning_rate": 1.894788111421294e-06, + "loss": 0.6111105680465698, + "step": 4603 + }, + { + "epoch": 1.6223788546255506, + "grad_norm": 2.061255928544227, + "learning_rate": 1.8913769130098504e-06, + "loss": 0.7554557919502258, + "step": 4604 + }, + { + "epoch": 1.6227312775330396, + "grad_norm": 1.7818402726516558, + "learning_rate": 1.887968467223591e-06, + "loss": 0.597324013710022, + "step": 4605 + }, + { + "epoch": 1.6230837004405285, + "grad_norm": 2.3192399293978014, + "learning_rate": 1.8845627752195839e-06, + "loss": 0.6232750415802002, + "step": 4606 + }, + { + "epoch": 1.6234361233480175, + "grad_norm": 1.7697166073683794, + "learning_rate": 1.8811598381539543e-06, + "loss": 0.45699936151504517, + "step": 4607 + }, + { + "epoch": 1.6237885462555066, + "grad_norm": 1.9980768091261172, + "learning_rate": 1.87775965718189e-06, + "loss": 0.5307953953742981, + "step": 4608 + }, + { + "epoch": 1.6241409691629956, + "grad_norm": 1.8817640717556428, + "learning_rate": 1.8743622334576495e-06, + "loss": 0.6013764142990112, + "step": 4609 + }, + { + "epoch": 1.6244933920704847, + "grad_norm": 2.0614740198183066, + "learning_rate": 1.8709675681345485e-06, + "loss": 0.5143340826034546, + "step": 4610 + }, + { + "epoch": 1.6248458149779736, + "grad_norm": 1.6895900050976231, + "learning_rate": 1.8675756623649788e-06, + "loss": 0.506861686706543, + "step": 4611 + }, + { + "epoch": 1.6251982378854626, + "grad_norm": 2.223885866703504, + "learning_rate": 1.8641865173003793e-06, + "loss": 0.6807849407196045, + "step": 4612 + }, + { + "epoch": 1.6255506607929515, + "grad_norm": 1.8930990565263293, + "learning_rate": 1.8608001340912573e-06, + "loss": 0.592629075050354, + "step": 4613 + }, + { + "epoch": 1.6259030837004405, + "grad_norm": 2.032831166123834, + "learning_rate": 1.8574165138871925e-06, + "loss": 0.5669249296188354, + "step": 4614 + }, + { + "epoch": 1.6262555066079294, + "grad_norm": 1.9071887451281335, + "learning_rate": 1.8540356578368135e-06, + "loss": 0.7123057246208191, + "step": 4615 + }, + { + "epoch": 1.6266079295154185, + "grad_norm": 1.7499585996323015, + "learning_rate": 1.8506575670878168e-06, + "loss": 0.5844429731369019, + "step": 4616 + }, + { + "epoch": 1.6269603524229075, + "grad_norm": 1.8176797951508414, + "learning_rate": 1.8472822427869597e-06, + "loss": 0.661457359790802, + "step": 4617 + }, + { + "epoch": 1.6273127753303964, + "grad_norm": 1.9714232511915755, + "learning_rate": 1.8439096860800565e-06, + "loss": 0.6944575905799866, + "step": 4618 + }, + { + "epoch": 1.6276651982378856, + "grad_norm": 1.9471855664955058, + "learning_rate": 1.8405398981119927e-06, + "loss": 0.5818712115287781, + "step": 4619 + }, + { + "epoch": 1.6280176211453745, + "grad_norm": 1.8573981084806426, + "learning_rate": 1.8371728800266964e-06, + "loss": 0.6373921632766724, + "step": 4620 + }, + { + "epoch": 1.6283700440528635, + "grad_norm": 1.8455409169726698, + "learning_rate": 1.8338086329671734e-06, + "loss": 0.4629862904548645, + "step": 4621 + }, + { + "epoch": 1.6287224669603524, + "grad_norm": 2.1547215929268306, + "learning_rate": 1.8304471580754779e-06, + "loss": 0.6537790894508362, + "step": 4622 + }, + { + "epoch": 1.6290748898678413, + "grad_norm": 1.9071168587624383, + "learning_rate": 1.8270884564927272e-06, + "loss": 0.527474582195282, + "step": 4623 + }, + { + "epoch": 1.6294273127753303, + "grad_norm": 1.9134019886674338, + "learning_rate": 1.8237325293590934e-06, + "loss": 0.48941463232040405, + "step": 4624 + }, + { + "epoch": 1.6297797356828194, + "grad_norm": 1.7797372995747724, + "learning_rate": 1.8203793778138123e-06, + "loss": 0.6276243925094604, + "step": 4625 + }, + { + "epoch": 1.6301321585903084, + "grad_norm": 2.175835170708709, + "learning_rate": 1.8170290029951708e-06, + "loss": 0.6339844465255737, + "step": 4626 + }, + { + "epoch": 1.6304845814977975, + "grad_norm": 1.8667689453086813, + "learning_rate": 1.813681406040524e-06, + "loss": 0.517188549041748, + "step": 4627 + }, + { + "epoch": 1.6308370044052865, + "grad_norm": 1.8956914399941025, + "learning_rate": 1.8103365880862667e-06, + "loss": 0.576552152633667, + "step": 4628 + }, + { + "epoch": 1.6311894273127754, + "grad_norm": 1.7936413452903872, + "learning_rate": 1.8069945502678688e-06, + "loss": 0.5703557729721069, + "step": 4629 + }, + { + "epoch": 1.6315418502202643, + "grad_norm": 1.9048409586347532, + "learning_rate": 1.8036552937198447e-06, + "loss": 0.538072943687439, + "step": 4630 + }, + { + "epoch": 1.6318942731277533, + "grad_norm": 1.6721149802212347, + "learning_rate": 1.8003188195757693e-06, + "loss": 0.4144761562347412, + "step": 4631 + }, + { + "epoch": 1.6322466960352422, + "grad_norm": 2.056410628146389, + "learning_rate": 1.7969851289682704e-06, + "loss": 0.5357951521873474, + "step": 4632 + }, + { + "epoch": 1.6325991189427311, + "grad_norm": 1.9601913826257962, + "learning_rate": 1.7936542230290333e-06, + "loss": 0.6158766746520996, + "step": 4633 + }, + { + "epoch": 1.6329515418502203, + "grad_norm": 2.018782202231636, + "learning_rate": 1.790326102888794e-06, + "loss": 0.7278525233268738, + "step": 4634 + }, + { + "epoch": 1.6333039647577092, + "grad_norm": 1.8937378067838377, + "learning_rate": 1.787000769677354e-06, + "loss": 0.5113881230354309, + "step": 4635 + }, + { + "epoch": 1.6336563876651984, + "grad_norm": 2.2218997592930987, + "learning_rate": 1.7836782245235485e-06, + "loss": 0.6247432827949524, + "step": 4636 + }, + { + "epoch": 1.6340088105726873, + "grad_norm": 1.9409043558834718, + "learning_rate": 1.7803584685552877e-06, + "loss": 0.513325572013855, + "step": 4637 + }, + { + "epoch": 1.6343612334801763, + "grad_norm": 2.023194297584799, + "learning_rate": 1.7770415028995213e-06, + "loss": 0.4980276823043823, + "step": 4638 + }, + { + "epoch": 1.6347136563876652, + "grad_norm": 1.8669544509684106, + "learning_rate": 1.7737273286822565e-06, + "loss": 0.5832515954971313, + "step": 4639 + }, + { + "epoch": 1.6350660792951541, + "grad_norm": 1.7519671458346908, + "learning_rate": 1.7704159470285532e-06, + "loss": 0.6030116081237793, + "step": 4640 + }, + { + "epoch": 1.635418502202643, + "grad_norm": 2.26980120712081, + "learning_rate": 1.7671073590625188e-06, + "loss": 0.5494866371154785, + "step": 4641 + }, + { + "epoch": 1.635770925110132, + "grad_norm": 1.8803060042220399, + "learning_rate": 1.7638015659073216e-06, + "loss": 0.617791473865509, + "step": 4642 + }, + { + "epoch": 1.6361233480176212, + "grad_norm": 1.8809591920257003, + "learning_rate": 1.760498568685175e-06, + "loss": 0.5213589668273926, + "step": 4643 + }, + { + "epoch": 1.63647577092511, + "grad_norm": 1.7835752431606857, + "learning_rate": 1.7571983685173367e-06, + "loss": 0.5114192962646484, + "step": 4644 + }, + { + "epoch": 1.6368281938325993, + "grad_norm": 1.8264916856765907, + "learning_rate": 1.7539009665241291e-06, + "loss": 0.6207156181335449, + "step": 4645 + }, + { + "epoch": 1.6371806167400882, + "grad_norm": 1.7037955383522276, + "learning_rate": 1.750606363824915e-06, + "loss": 0.5893350839614868, + "step": 4646 + }, + { + "epoch": 1.6375330396475771, + "grad_norm": 2.0239756750398077, + "learning_rate": 1.7473145615381092e-06, + "loss": 0.6453898549079895, + "step": 4647 + }, + { + "epoch": 1.637885462555066, + "grad_norm": 1.623565893456343, + "learning_rate": 1.7440255607811773e-06, + "loss": 0.5098680853843689, + "step": 4648 + }, + { + "epoch": 1.638237885462555, + "grad_norm": 1.9009179186379688, + "learning_rate": 1.7407393626706305e-06, + "loss": 0.5841408967971802, + "step": 4649 + }, + { + "epoch": 1.638590308370044, + "grad_norm": 1.8903189372223002, + "learning_rate": 1.7374559683220337e-06, + "loss": 0.5593127012252808, + "step": 4650 + }, + { + "epoch": 1.638942731277533, + "grad_norm": 1.9192509501465884, + "learning_rate": 1.7341753788499983e-06, + "loss": 0.6885190606117249, + "step": 4651 + }, + { + "epoch": 1.639295154185022, + "grad_norm": 2.019948918382337, + "learning_rate": 1.730897595368175e-06, + "loss": 0.6271092891693115, + "step": 4652 + }, + { + "epoch": 1.639647577092511, + "grad_norm": 1.8193728432309102, + "learning_rate": 1.7276226189892763e-06, + "loss": 0.6035536527633667, + "step": 4653 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 1.876741558260643, + "learning_rate": 1.724350450825052e-06, + "loss": 0.49980080127716064, + "step": 4654 + }, + { + "epoch": 1.640352422907489, + "grad_norm": 1.945483701689467, + "learning_rate": 1.721081091986303e-06, + "loss": 0.6056489944458008, + "step": 4655 + }, + { + "epoch": 1.640704845814978, + "grad_norm": 1.998934183218588, + "learning_rate": 1.717814543582873e-06, + "loss": 0.5611459016799927, + "step": 4656 + }, + { + "epoch": 1.641057268722467, + "grad_norm": 1.8501618159787931, + "learning_rate": 1.7145508067236515e-06, + "loss": 0.5655262470245361, + "step": 4657 + }, + { + "epoch": 1.6414096916299559, + "grad_norm": 2.17470073262635, + "learning_rate": 1.7112898825165814e-06, + "loss": 0.7793601751327515, + "step": 4658 + }, + { + "epoch": 1.6417621145374448, + "grad_norm": 1.8078904709838137, + "learning_rate": 1.7080317720686434e-06, + "loss": 0.6587018370628357, + "step": 4659 + }, + { + "epoch": 1.642114537444934, + "grad_norm": 2.0052578395520313, + "learning_rate": 1.7047764764858598e-06, + "loss": 0.5546305775642395, + "step": 4660 + }, + { + "epoch": 1.642466960352423, + "grad_norm": 2.2168924782846844, + "learning_rate": 1.7015239968733066e-06, + "loss": 0.6215736865997314, + "step": 4661 + }, + { + "epoch": 1.642819383259912, + "grad_norm": 1.95167913439103, + "learning_rate": 1.6982743343350983e-06, + "loss": 0.5772532224655151, + "step": 4662 + }, + { + "epoch": 1.643171806167401, + "grad_norm": 1.9049742666250684, + "learning_rate": 1.6950274899743947e-06, + "loss": 0.567034900188446, + "step": 4663 + }, + { + "epoch": 1.64352422907489, + "grad_norm": 1.6486603082479945, + "learning_rate": 1.6917834648933985e-06, + "loss": 0.5306716561317444, + "step": 4664 + }, + { + "epoch": 1.6438766519823789, + "grad_norm": 1.923372734442966, + "learning_rate": 1.688542260193351e-06, + "loss": 0.6691634654998779, + "step": 4665 + }, + { + "epoch": 1.6442290748898678, + "grad_norm": 1.9073972200097022, + "learning_rate": 1.6853038769745466e-06, + "loss": 0.6071977615356445, + "step": 4666 + }, + { + "epoch": 1.6445814977973567, + "grad_norm": 3.0113580201176355, + "learning_rate": 1.6820683163363161e-06, + "loss": 0.743544340133667, + "step": 4667 + }, + { + "epoch": 1.6449339207048457, + "grad_norm": 2.2198521832647864, + "learning_rate": 1.6788355793770238e-06, + "loss": 0.5745127201080322, + "step": 4668 + }, + { + "epoch": 1.6452863436123348, + "grad_norm": 2.083730313741091, + "learning_rate": 1.6756056671940902e-06, + "loss": 0.5153336524963379, + "step": 4669 + }, + { + "epoch": 1.6456387665198238, + "grad_norm": 1.888215895134721, + "learning_rate": 1.6723785808839666e-06, + "loss": 0.5780388116836548, + "step": 4670 + }, + { + "epoch": 1.645991189427313, + "grad_norm": 1.975333041709577, + "learning_rate": 1.6691543215421513e-06, + "loss": 0.601921796798706, + "step": 4671 + }, + { + "epoch": 1.6463436123348019, + "grad_norm": 1.8402715148458082, + "learning_rate": 1.6659328902631766e-06, + "loss": 0.6636123657226562, + "step": 4672 + }, + { + "epoch": 1.6466960352422908, + "grad_norm": 1.804292320266694, + "learning_rate": 1.6627142881406188e-06, + "loss": 0.45225393772125244, + "step": 4673 + }, + { + "epoch": 1.6470484581497797, + "grad_norm": 1.862693343451114, + "learning_rate": 1.6594985162670984e-06, + "loss": 0.6406756043434143, + "step": 4674 + }, + { + "epoch": 1.6474008810572687, + "grad_norm": 2.11645792406816, + "learning_rate": 1.6562855757342632e-06, + "loss": 0.6735906600952148, + "step": 4675 + }, + { + "epoch": 1.6477533039647576, + "grad_norm": 1.9503356292211693, + "learning_rate": 1.6530754676328064e-06, + "loss": 0.515188992023468, + "step": 4676 + }, + { + "epoch": 1.6481057268722465, + "grad_norm": 1.721977079638204, + "learning_rate": 1.6498681930524652e-06, + "loss": 0.5976129174232483, + "step": 4677 + }, + { + "epoch": 1.6484581497797357, + "grad_norm": 1.9285425022468947, + "learning_rate": 1.6466637530820074e-06, + "loss": 0.7367427945137024, + "step": 4678 + }, + { + "epoch": 1.6488105726872246, + "grad_norm": 2.073959448612198, + "learning_rate": 1.6434621488092385e-06, + "loss": 0.5173717737197876, + "step": 4679 + }, + { + "epoch": 1.6491629955947138, + "grad_norm": 2.1289983497571745, + "learning_rate": 1.6402633813210056e-06, + "loss": 0.7961066961288452, + "step": 4680 + }, + { + "epoch": 1.6495154185022027, + "grad_norm": 2.1150632325299488, + "learning_rate": 1.637067451703187e-06, + "loss": 0.8271595239639282, + "step": 4681 + }, + { + "epoch": 1.6498678414096917, + "grad_norm": 1.9513356704584446, + "learning_rate": 1.6338743610407103e-06, + "loss": 0.6818888783454895, + "step": 4682 + }, + { + "epoch": 1.6502202643171806, + "grad_norm": 2.182931567425792, + "learning_rate": 1.6306841104175219e-06, + "loss": 0.5168178677558899, + "step": 4683 + }, + { + "epoch": 1.6505726872246695, + "grad_norm": 1.8122401400933128, + "learning_rate": 1.627496700916612e-06, + "loss": 0.5792043209075928, + "step": 4684 + }, + { + "epoch": 1.6509251101321585, + "grad_norm": 1.8346977982265331, + "learning_rate": 1.624312133620013e-06, + "loss": 0.6099069118499756, + "step": 4685 + }, + { + "epoch": 1.6512775330396474, + "grad_norm": 1.7489569966562013, + "learning_rate": 1.6211304096087832e-06, + "loss": 0.4562867283821106, + "step": 4686 + }, + { + "epoch": 1.6516299559471366, + "grad_norm": 1.767208393167573, + "learning_rate": 1.61795152996302e-06, + "loss": 0.48648780584335327, + "step": 4687 + }, + { + "epoch": 1.6519823788546255, + "grad_norm": 1.619888597224146, + "learning_rate": 1.6147754957618533e-06, + "loss": 0.5351820588111877, + "step": 4688 + }, + { + "epoch": 1.6523348017621147, + "grad_norm": 1.5845894367063569, + "learning_rate": 1.6116023080834442e-06, + "loss": 0.4646923542022705, + "step": 4689 + }, + { + "epoch": 1.6526872246696036, + "grad_norm": 2.0533783205545304, + "learning_rate": 1.608431968005001e-06, + "loss": 0.6257984638214111, + "step": 4690 + }, + { + "epoch": 1.6530396475770925, + "grad_norm": 1.6714052981831828, + "learning_rate": 1.605264476602747e-06, + "loss": 0.5646224021911621, + "step": 4691 + }, + { + "epoch": 1.6533920704845815, + "grad_norm": 1.9704920715227376, + "learning_rate": 1.6020998349519457e-06, + "loss": 0.6074661612510681, + "step": 4692 + }, + { + "epoch": 1.6537444933920704, + "grad_norm": 2.119532209280586, + "learning_rate": 1.598938044126901e-06, + "loss": 0.703096866607666, + "step": 4693 + }, + { + "epoch": 1.6540969162995593, + "grad_norm": 2.0281924961410436, + "learning_rate": 1.5957791052009397e-06, + "loss": 0.6677542924880981, + "step": 4694 + }, + { + "epoch": 1.6544493392070485, + "grad_norm": 2.2554606939897313, + "learning_rate": 1.5926230192464232e-06, + "loss": 0.755639910697937, + "step": 4695 + }, + { + "epoch": 1.6548017621145374, + "grad_norm": 2.0543326225263705, + "learning_rate": 1.5894697873347442e-06, + "loss": 0.7008202075958252, + "step": 4696 + }, + { + "epoch": 1.6551541850220264, + "grad_norm": 1.7892378339322623, + "learning_rate": 1.5863194105363244e-06, + "loss": 0.5049681067466736, + "step": 4697 + }, + { + "epoch": 1.6555066079295155, + "grad_norm": 1.8394208195845667, + "learning_rate": 1.583171889920626e-06, + "loss": 0.7415407299995422, + "step": 4698 + }, + { + "epoch": 1.6558590308370045, + "grad_norm": 2.0103355889821404, + "learning_rate": 1.5800272265561256e-06, + "loss": 0.7949470281600952, + "step": 4699 + }, + { + "epoch": 1.6562114537444934, + "grad_norm": 2.2401604191268456, + "learning_rate": 1.5768854215103435e-06, + "loss": 0.5892510414123535, + "step": 4700 + }, + { + "epoch": 1.6565638766519823, + "grad_norm": 2.1732638193025076, + "learning_rate": 1.5737464758498243e-06, + "loss": 0.5357394814491272, + "step": 4701 + }, + { + "epoch": 1.6569162995594713, + "grad_norm": 1.92797804038562, + "learning_rate": 1.5706103906401416e-06, + "loss": 0.6078016757965088, + "step": 4702 + }, + { + "epoch": 1.6572687224669602, + "grad_norm": 2.019695184899454, + "learning_rate": 1.5674771669459e-06, + "loss": 0.5858899354934692, + "step": 4703 + }, + { + "epoch": 1.6576211453744494, + "grad_norm": 1.819706102358174, + "learning_rate": 1.5643468058307287e-06, + "loss": 0.5783329010009766, + "step": 4704 + }, + { + "epoch": 1.6579735682819383, + "grad_norm": 1.8104985438999985, + "learning_rate": 1.561219308357288e-06, + "loss": 0.5412800312042236, + "step": 4705 + }, + { + "epoch": 1.6583259911894275, + "grad_norm": 1.5193820753894371, + "learning_rate": 1.5580946755872727e-06, + "loss": 0.5609365701675415, + "step": 4706 + }, + { + "epoch": 1.6586784140969164, + "grad_norm": 2.2157168701611427, + "learning_rate": 1.554972908581388e-06, + "loss": 0.45193177461624146, + "step": 4707 + }, + { + "epoch": 1.6590308370044053, + "grad_norm": 1.885008861796424, + "learning_rate": 1.5518540083993838e-06, + "loss": 0.6402257680892944, + "step": 4708 + }, + { + "epoch": 1.6593832599118943, + "grad_norm": 1.906792902482494, + "learning_rate": 1.5487379761000276e-06, + "loss": 0.5956071615219116, + "step": 4709 + }, + { + "epoch": 1.6597356828193832, + "grad_norm": 1.5229764109639101, + "learning_rate": 1.5456248127411156e-06, + "loss": 0.5975273847579956, + "step": 4710 + }, + { + "epoch": 1.6600881057268722, + "grad_norm": 2.2860844716103514, + "learning_rate": 1.54251451937947e-06, + "loss": 0.6914929151535034, + "step": 4711 + }, + { + "epoch": 1.660440528634361, + "grad_norm": 2.141875122923791, + "learning_rate": 1.5394070970709384e-06, + "loss": 0.5867592096328735, + "step": 4712 + }, + { + "epoch": 1.6607929515418502, + "grad_norm": 1.96612759503979, + "learning_rate": 1.5363025468703917e-06, + "loss": 0.6448687314987183, + "step": 4713 + }, + { + "epoch": 1.6611453744493392, + "grad_norm": 1.8452223088884994, + "learning_rate": 1.5332008698317348e-06, + "loss": 0.5870485305786133, + "step": 4714 + }, + { + "epoch": 1.6614977973568283, + "grad_norm": 1.9043935409080608, + "learning_rate": 1.5301020670078803e-06, + "loss": 0.6336855888366699, + "step": 4715 + }, + { + "epoch": 1.6618502202643173, + "grad_norm": 1.9247468731228787, + "learning_rate": 1.527006139450784e-06, + "loss": 0.5924787521362305, + "step": 4716 + }, + { + "epoch": 1.6622026431718062, + "grad_norm": 1.5860620334804822, + "learning_rate": 1.523913088211415e-06, + "loss": 0.5817830562591553, + "step": 4717 + }, + { + "epoch": 1.6625550660792952, + "grad_norm": 1.8285246452015176, + "learning_rate": 1.5208229143397657e-06, + "loss": 0.5836390852928162, + "step": 4718 + }, + { + "epoch": 1.662907488986784, + "grad_norm": 1.5094327417455158, + "learning_rate": 1.5177356188848558e-06, + "loss": 0.47110515832901, + "step": 4719 + }, + { + "epoch": 1.663259911894273, + "grad_norm": 2.018838906344594, + "learning_rate": 1.5146512028947225e-06, + "loss": 0.6376635432243347, + "step": 4720 + }, + { + "epoch": 1.663612334801762, + "grad_norm": 1.7847798861513196, + "learning_rate": 1.5115696674164349e-06, + "loss": 0.6399784088134766, + "step": 4721 + }, + { + "epoch": 1.6639647577092511, + "grad_norm": 2.2125247577405127, + "learning_rate": 1.5084910134960773e-06, + "loss": 0.5891954898834229, + "step": 4722 + }, + { + "epoch": 1.66431718061674, + "grad_norm": 1.5827717360956535, + "learning_rate": 1.5054152421787505e-06, + "loss": 0.6358054876327515, + "step": 4723 + }, + { + "epoch": 1.6646696035242292, + "grad_norm": 1.855029533228232, + "learning_rate": 1.5023423545085892e-06, + "loss": 0.5072367191314697, + "step": 4724 + }, + { + "epoch": 1.6650220264317181, + "grad_norm": 1.8866512172900913, + "learning_rate": 1.4992723515287423e-06, + "loss": 0.5549881458282471, + "step": 4725 + }, + { + "epoch": 1.665374449339207, + "grad_norm": 1.5386308243299962, + "learning_rate": 1.4962052342813804e-06, + "loss": 0.4833364188671112, + "step": 4726 + }, + { + "epoch": 1.665726872246696, + "grad_norm": 1.837283227568624, + "learning_rate": 1.4931410038076922e-06, + "loss": 0.6183017492294312, + "step": 4727 + }, + { + "epoch": 1.666079295154185, + "grad_norm": 1.8220970545699078, + "learning_rate": 1.4900796611478885e-06, + "loss": 0.4956335127353668, + "step": 4728 + }, + { + "epoch": 1.666431718061674, + "grad_norm": 2.0221134241832552, + "learning_rate": 1.4870212073412027e-06, + "loss": 0.7345337271690369, + "step": 4729 + }, + { + "epoch": 1.6667841409691628, + "grad_norm": 1.5143426871240313, + "learning_rate": 1.4839656434258864e-06, + "loss": 0.5324833393096924, + "step": 4730 + }, + { + "epoch": 1.667136563876652, + "grad_norm": 1.7373474898452002, + "learning_rate": 1.4809129704392e-06, + "loss": 0.5702322125434875, + "step": 4731 + }, + { + "epoch": 1.667488986784141, + "grad_norm": 1.7374551868496027, + "learning_rate": 1.4778631894174389e-06, + "loss": 0.46188884973526, + "step": 4732 + }, + { + "epoch": 1.66784140969163, + "grad_norm": 1.76911142349316, + "learning_rate": 1.474816301395906e-06, + "loss": 0.5505924224853516, + "step": 4733 + }, + { + "epoch": 1.668193832599119, + "grad_norm": 1.9422422566247162, + "learning_rate": 1.4717723074089251e-06, + "loss": 0.5889710187911987, + "step": 4734 + }, + { + "epoch": 1.668546255506608, + "grad_norm": 2.1059796200005825, + "learning_rate": 1.4687312084898387e-06, + "loss": 0.5794551372528076, + "step": 4735 + }, + { + "epoch": 1.668898678414097, + "grad_norm": 1.8650983467603144, + "learning_rate": 1.4656930056710006e-06, + "loss": 0.567146897315979, + "step": 4736 + }, + { + "epoch": 1.6692511013215858, + "grad_norm": 2.0850787713289067, + "learning_rate": 1.4626576999837938e-06, + "loss": 0.5330451130867004, + "step": 4737 + }, + { + "epoch": 1.6696035242290748, + "grad_norm": 1.868870689701364, + "learning_rate": 1.459625292458604e-06, + "loss": 0.5631227493286133, + "step": 4738 + }, + { + "epoch": 1.669955947136564, + "grad_norm": 1.8773546830623118, + "learning_rate": 1.456595784124839e-06, + "loss": 0.5145374536514282, + "step": 4739 + }, + { + "epoch": 1.6703083700440529, + "grad_norm": 1.925388120075487, + "learning_rate": 1.453569176010927e-06, + "loss": 0.59378582239151, + "step": 4740 + }, + { + "epoch": 1.6706607929515418, + "grad_norm": 2.053494266916917, + "learning_rate": 1.4505454691443043e-06, + "loss": 0.5827980041503906, + "step": 4741 + }, + { + "epoch": 1.671013215859031, + "grad_norm": 1.6369511357690396, + "learning_rate": 1.4475246645514274e-06, + "loss": 0.5270858407020569, + "step": 4742 + }, + { + "epoch": 1.67136563876652, + "grad_norm": 2.2160806515473186, + "learning_rate": 1.4445067632577625e-06, + "loss": 0.4708535373210907, + "step": 4743 + }, + { + "epoch": 1.6717180616740088, + "grad_norm": 1.6498079624073576, + "learning_rate": 1.4414917662877924e-06, + "loss": 0.5505239963531494, + "step": 4744 + }, + { + "epoch": 1.6720704845814978, + "grad_norm": 1.8451834665357323, + "learning_rate": 1.4384796746650221e-06, + "loss": 0.5841302871704102, + "step": 4745 + }, + { + "epoch": 1.6724229074889867, + "grad_norm": 1.8665624096794386, + "learning_rate": 1.4354704894119554e-06, + "loss": 0.627534031867981, + "step": 4746 + }, + { + "epoch": 1.6727753303964756, + "grad_norm": 1.9526983627618448, + "learning_rate": 1.4324642115501165e-06, + "loss": 0.6160094738006592, + "step": 4747 + }, + { + "epoch": 1.6731277533039648, + "grad_norm": 1.6909104461316946, + "learning_rate": 1.4294608421000489e-06, + "loss": 0.5420609712600708, + "step": 4748 + }, + { + "epoch": 1.6734801762114537, + "grad_norm": 1.9597720364889828, + "learning_rate": 1.4264603820813006e-06, + "loss": 0.7729714512825012, + "step": 4749 + }, + { + "epoch": 1.6738325991189429, + "grad_norm": 1.8780592513411432, + "learning_rate": 1.4234628325124345e-06, + "loss": 0.6458747386932373, + "step": 4750 + }, + { + "epoch": 1.6741850220264318, + "grad_norm": 1.717642350217617, + "learning_rate": 1.4204681944110242e-06, + "loss": 0.5250670313835144, + "step": 4751 + }, + { + "epoch": 1.6745374449339208, + "grad_norm": 1.9945004637909651, + "learning_rate": 1.4174764687936548e-06, + "loss": 0.4985695779323578, + "step": 4752 + }, + { + "epoch": 1.6748898678414097, + "grad_norm": 2.1058731387570253, + "learning_rate": 1.4144876566759303e-06, + "loss": 0.6401104927062988, + "step": 4753 + }, + { + "epoch": 1.6752422907488986, + "grad_norm": 1.8242149794974472, + "learning_rate": 1.4115017590724534e-06, + "loss": 0.5991432666778564, + "step": 4754 + }, + { + "epoch": 1.6755947136563876, + "grad_norm": 1.8977892116048576, + "learning_rate": 1.4085187769968433e-06, + "loss": 0.7787071466445923, + "step": 4755 + }, + { + "epoch": 1.6759471365638765, + "grad_norm": 1.8915051082154768, + "learning_rate": 1.4055387114617336e-06, + "loss": 0.6977101564407349, + "step": 4756 + }, + { + "epoch": 1.6762995594713657, + "grad_norm": 1.7182871374874729, + "learning_rate": 1.4025615634787616e-06, + "loss": 0.541371762752533, + "step": 4757 + }, + { + "epoch": 1.6766519823788546, + "grad_norm": 1.8269477268502086, + "learning_rate": 1.3995873340585765e-06, + "loss": 0.5548759698867798, + "step": 4758 + }, + { + "epoch": 1.6770044052863438, + "grad_norm": 1.947083457475871, + "learning_rate": 1.3966160242108373e-06, + "loss": 0.6022241115570068, + "step": 4759 + }, + { + "epoch": 1.6773568281938327, + "grad_norm": 2.1542376603491946, + "learning_rate": 1.3936476349442074e-06, + "loss": 0.4965083599090576, + "step": 4760 + }, + { + "epoch": 1.6777092511013216, + "grad_norm": 2.4193138120349227, + "learning_rate": 1.3906821672663706e-06, + "loss": 0.6712369918823242, + "step": 4761 + }, + { + "epoch": 1.6780616740088106, + "grad_norm": 1.8527401573304754, + "learning_rate": 1.3877196221840038e-06, + "loss": 0.6236127614974976, + "step": 4762 + }, + { + "epoch": 1.6784140969162995, + "grad_norm": 1.9836915293869917, + "learning_rate": 1.3847600007027996e-06, + "loss": 0.7144246101379395, + "step": 4763 + }, + { + "epoch": 1.6787665198237884, + "grad_norm": 1.926900514093349, + "learning_rate": 1.3818033038274602e-06, + "loss": 0.650489091873169, + "step": 4764 + }, + { + "epoch": 1.6791189427312774, + "grad_norm": 1.896615210676468, + "learning_rate": 1.3788495325616912e-06, + "loss": 0.6711791157722473, + "step": 4765 + }, + { + "epoch": 1.6794713656387665, + "grad_norm": 1.6640253715487854, + "learning_rate": 1.375898687908206e-06, + "loss": 0.49629515409469604, + "step": 4766 + }, + { + "epoch": 1.6798237885462555, + "grad_norm": 2.3032521123469727, + "learning_rate": 1.372950770868724e-06, + "loss": 0.5843443870544434, + "step": 4767 + }, + { + "epoch": 1.6801762114537446, + "grad_norm": 1.7269921421841483, + "learning_rate": 1.3700057824439694e-06, + "loss": 0.6201150417327881, + "step": 4768 + }, + { + "epoch": 1.6805286343612336, + "grad_norm": 2.2518096795033746, + "learning_rate": 1.3670637236336815e-06, + "loss": 0.6149473190307617, + "step": 4769 + }, + { + "epoch": 1.6808810572687225, + "grad_norm": 1.8297389667337718, + "learning_rate": 1.3641245954365868e-06, + "loss": 0.476188987493515, + "step": 4770 + }, + { + "epoch": 1.6812334801762114, + "grad_norm": 1.6086104459565809, + "learning_rate": 1.361188398850436e-06, + "loss": 0.4850924015045166, + "step": 4771 + }, + { + "epoch": 1.6815859030837004, + "grad_norm": 2.8146145731538676, + "learning_rate": 1.3582551348719741e-06, + "loss": 0.6008634567260742, + "step": 4772 + }, + { + "epoch": 1.6819383259911893, + "grad_norm": 1.6382847925926618, + "learning_rate": 1.3553248044969525e-06, + "loss": 0.5383377075195312, + "step": 4773 + }, + { + "epoch": 1.6822907488986785, + "grad_norm": 1.966985389102481, + "learning_rate": 1.3523974087201274e-06, + "loss": 0.5711555480957031, + "step": 4774 + }, + { + "epoch": 1.6826431718061674, + "grad_norm": 1.915810750390724, + "learning_rate": 1.3494729485352586e-06, + "loss": 0.5267810821533203, + "step": 4775 + }, + { + "epoch": 1.6829955947136563, + "grad_norm": 1.968063769811982, + "learning_rate": 1.3465514249351076e-06, + "loss": 0.6203084588050842, + "step": 4776 + }, + { + "epoch": 1.6833480176211455, + "grad_norm": 1.604432029465195, + "learning_rate": 1.3436328389114473e-06, + "loss": 0.46672314405441284, + "step": 4777 + }, + { + "epoch": 1.6837004405286344, + "grad_norm": 2.175917964334397, + "learning_rate": 1.3407171914550366e-06, + "loss": 0.6375850439071655, + "step": 4778 + }, + { + "epoch": 1.6840528634361234, + "grad_norm": 1.7467776544405884, + "learning_rate": 1.337804483555656e-06, + "loss": 0.6162304282188416, + "step": 4779 + }, + { + "epoch": 1.6844052863436123, + "grad_norm": 2.0769560048267817, + "learning_rate": 1.3348947162020741e-06, + "loss": 0.7814363241195679, + "step": 4780 + }, + { + "epoch": 1.6847577092511012, + "grad_norm": 1.4969648698838118, + "learning_rate": 1.3319878903820682e-06, + "loss": 0.47330981492996216, + "step": 4781 + }, + { + "epoch": 1.6851101321585902, + "grad_norm": 2.184365435582337, + "learning_rate": 1.3290840070824163e-06, + "loss": 0.759529173374176, + "step": 4782 + }, + { + "epoch": 1.6854625550660793, + "grad_norm": 2.039688209679945, + "learning_rate": 1.326183067288893e-06, + "loss": 0.7727639675140381, + "step": 4783 + }, + { + "epoch": 1.6858149779735683, + "grad_norm": 1.9567066145193837, + "learning_rate": 1.3232850719862789e-06, + "loss": 0.6429058313369751, + "step": 4784 + }, + { + "epoch": 1.6861674008810574, + "grad_norm": 2.394172291442893, + "learning_rate": 1.3203900221583565e-06, + "loss": 0.5895540714263916, + "step": 4785 + }, + { + "epoch": 1.6865198237885464, + "grad_norm": 1.835232130498821, + "learning_rate": 1.317497918787899e-06, + "loss": 0.5410366058349609, + "step": 4786 + }, + { + "epoch": 1.6868722466960353, + "grad_norm": 2.025742052316709, + "learning_rate": 1.3146087628566894e-06, + "loss": 0.5144281387329102, + "step": 4787 + }, + { + "epoch": 1.6872246696035242, + "grad_norm": 2.3397936718705967, + "learning_rate": 1.311722555345506e-06, + "loss": 0.7025437355041504, + "step": 4788 + }, + { + "epoch": 1.6875770925110132, + "grad_norm": 1.7878267984100036, + "learning_rate": 1.3088392972341256e-06, + "loss": 0.6000303030014038, + "step": 4789 + }, + { + "epoch": 1.6879295154185021, + "grad_norm": 2.057293191164908, + "learning_rate": 1.3059589895013248e-06, + "loss": 0.5732883214950562, + "step": 4790 + }, + { + "epoch": 1.688281938325991, + "grad_norm": 1.83266529432573, + "learning_rate": 1.3030816331248785e-06, + "loss": 0.5908600091934204, + "step": 4791 + }, + { + "epoch": 1.6886343612334802, + "grad_norm": 1.7317933799112057, + "learning_rate": 1.3002072290815571e-06, + "loss": 0.5579310655593872, + "step": 4792 + }, + { + "epoch": 1.6889867841409691, + "grad_norm": 1.9713002881427846, + "learning_rate": 1.2973357783471385e-06, + "loss": 0.6439195871353149, + "step": 4793 + }, + { + "epoch": 1.6893392070484583, + "grad_norm": 2.1068548457431437, + "learning_rate": 1.2944672818963822e-06, + "loss": 0.6213329434394836, + "step": 4794 + }, + { + "epoch": 1.6896916299559472, + "grad_norm": 2.271454462540911, + "learning_rate": 1.291601740703059e-06, + "loss": 0.5875385999679565, + "step": 4795 + }, + { + "epoch": 1.6900440528634362, + "grad_norm": 1.8170542219049632, + "learning_rate": 1.2887391557399287e-06, + "loss": 0.7071372270584106, + "step": 4796 + }, + { + "epoch": 1.690396475770925, + "grad_norm": 1.9467140791895143, + "learning_rate": 1.2858795279787517e-06, + "loss": 0.504144549369812, + "step": 4797 + }, + { + "epoch": 1.690748898678414, + "grad_norm": 1.9410537548952953, + "learning_rate": 1.2830228583902816e-06, + "loss": 0.7081021070480347, + "step": 4798 + }, + { + "epoch": 1.691101321585903, + "grad_norm": 1.679906270518348, + "learning_rate": 1.2801691479442658e-06, + "loss": 0.5843057632446289, + "step": 4799 + }, + { + "epoch": 1.691453744493392, + "grad_norm": 2.366764710202919, + "learning_rate": 1.2773183976094571e-06, + "loss": 0.6437872052192688, + "step": 4800 + }, + { + "epoch": 1.691806167400881, + "grad_norm": 2.0784896951955125, + "learning_rate": 1.2744706083535907e-06, + "loss": 0.6945379972457886, + "step": 4801 + }, + { + "epoch": 1.69215859030837, + "grad_norm": 1.5919193491775196, + "learning_rate": 1.2716257811434019e-06, + "loss": 0.5827867984771729, + "step": 4802 + }, + { + "epoch": 1.6925110132158592, + "grad_norm": 2.6733992431993228, + "learning_rate": 1.2687839169446259e-06, + "loss": 0.5298784971237183, + "step": 4803 + }, + { + "epoch": 1.692863436123348, + "grad_norm": 2.069188268688187, + "learning_rate": 1.2659450167219834e-06, + "loss": 0.6885675191879272, + "step": 4804 + }, + { + "epoch": 1.693215859030837, + "grad_norm": 1.8639055588125417, + "learning_rate": 1.2631090814391945e-06, + "loss": 0.6902546882629395, + "step": 4805 + }, + { + "epoch": 1.693568281938326, + "grad_norm": 1.7529564321244235, + "learning_rate": 1.2602761120589713e-06, + "loss": 0.5972022414207458, + "step": 4806 + }, + { + "epoch": 1.693920704845815, + "grad_norm": 1.676709469876984, + "learning_rate": 1.2574461095430145e-06, + "loss": 0.5527150630950928, + "step": 4807 + }, + { + "epoch": 1.6942731277533039, + "grad_norm": 1.6073136228470446, + "learning_rate": 1.2546190748520294e-06, + "loss": 0.5898724794387817, + "step": 4808 + }, + { + "epoch": 1.6946255506607928, + "grad_norm": 1.7947012597219074, + "learning_rate": 1.2517950089457e-06, + "loss": 0.7023364901542664, + "step": 4809 + }, + { + "epoch": 1.694977973568282, + "grad_norm": 1.6620582383673839, + "learning_rate": 1.2489739127827083e-06, + "loss": 0.6016935706138611, + "step": 4810 + }, + { + "epoch": 1.6953303964757709, + "grad_norm": 1.8540813823422424, + "learning_rate": 1.246155787320733e-06, + "loss": 0.5724658966064453, + "step": 4811 + }, + { + "epoch": 1.69568281938326, + "grad_norm": 1.799661687922518, + "learning_rate": 1.2433406335164389e-06, + "loss": 0.5886126160621643, + "step": 4812 + }, + { + "epoch": 1.696035242290749, + "grad_norm": 1.7467523658249466, + "learning_rate": 1.2405284523254823e-06, + "loss": 0.6656844615936279, + "step": 4813 + }, + { + "epoch": 1.696387665198238, + "grad_norm": 1.9261091270854245, + "learning_rate": 1.237719244702511e-06, + "loss": 0.6208533644676208, + "step": 4814 + }, + { + "epoch": 1.6967400881057269, + "grad_norm": 2.00988865324314, + "learning_rate": 1.234913011601162e-06, + "loss": 0.6895248889923096, + "step": 4815 + }, + { + "epoch": 1.6970925110132158, + "grad_norm": 1.755568469718746, + "learning_rate": 1.23210975397407e-06, + "loss": 0.5525833368301392, + "step": 4816 + }, + { + "epoch": 1.6974449339207047, + "grad_norm": 1.9922303788563613, + "learning_rate": 1.2293094727728471e-06, + "loss": 0.5958225727081299, + "step": 4817 + }, + { + "epoch": 1.6977973568281939, + "grad_norm": 1.8538893137799548, + "learning_rate": 1.226512168948103e-06, + "loss": 0.7570905089378357, + "step": 4818 + }, + { + "epoch": 1.6981497797356828, + "grad_norm": 1.9122257264522353, + "learning_rate": 1.2237178434494379e-06, + "loss": 0.586568295955658, + "step": 4819 + }, + { + "epoch": 1.6985022026431718, + "grad_norm": 2.0787640518450257, + "learning_rate": 1.2209264972254365e-06, + "loss": 0.574797511100769, + "step": 4820 + }, + { + "epoch": 1.698854625550661, + "grad_norm": 1.8487761386635184, + "learning_rate": 1.2181381312236751e-06, + "loss": 0.6277909278869629, + "step": 4821 + }, + { + "epoch": 1.6992070484581498, + "grad_norm": 1.6733284685909569, + "learning_rate": 1.2153527463907155e-06, + "loss": 0.6308181285858154, + "step": 4822 + }, + { + "epoch": 1.6995594713656388, + "grad_norm": 1.493809970809451, + "learning_rate": 1.2125703436721091e-06, + "loss": 0.5076426267623901, + "step": 4823 + }, + { + "epoch": 1.6999118942731277, + "grad_norm": 1.9773484387601714, + "learning_rate": 1.2097909240123994e-06, + "loss": 0.7712167501449585, + "step": 4824 + }, + { + "epoch": 1.7002643171806167, + "grad_norm": 2.0486043522302517, + "learning_rate": 1.2070144883551072e-06, + "loss": 0.5964892506599426, + "step": 4825 + }, + { + "epoch": 1.7006167400881056, + "grad_norm": 2.112366587786807, + "learning_rate": 1.2042410376427472e-06, + "loss": 0.7473628520965576, + "step": 4826 + }, + { + "epoch": 1.7009691629955948, + "grad_norm": 2.17212484756118, + "learning_rate": 1.2014705728168219e-06, + "loss": 0.6897937059402466, + "step": 4827 + }, + { + "epoch": 1.7013215859030837, + "grad_norm": 1.734585395626036, + "learning_rate": 1.198703094817817e-06, + "loss": 0.5955557823181152, + "step": 4828 + }, + { + "epoch": 1.7016740088105728, + "grad_norm": 1.8689895875293312, + "learning_rate": 1.195938604585205e-06, + "loss": 0.7051092386245728, + "step": 4829 + }, + { + "epoch": 1.7020264317180618, + "grad_norm": 3.0972606123503836, + "learning_rate": 1.1931771030574446e-06, + "loss": 0.584572434425354, + "step": 4830 + }, + { + "epoch": 1.7023788546255507, + "grad_norm": 1.9375690474214398, + "learning_rate": 1.1904185911719768e-06, + "loss": 0.5691804885864258, + "step": 4831 + }, + { + "epoch": 1.7027312775330397, + "grad_norm": 2.088088383810667, + "learning_rate": 1.187663069865237e-06, + "loss": 0.5539572834968567, + "step": 4832 + }, + { + "epoch": 1.7030837004405286, + "grad_norm": 2.0666580745541956, + "learning_rate": 1.1849105400726324e-06, + "loss": 0.6229352951049805, + "step": 4833 + }, + { + "epoch": 1.7034361233480175, + "grad_norm": 1.9668453557048677, + "learning_rate": 1.1821610027285613e-06, + "loss": 0.7302919626235962, + "step": 4834 + }, + { + "epoch": 1.7037885462555065, + "grad_norm": 1.6339139133298357, + "learning_rate": 1.1794144587664113e-06, + "loss": 0.6802065372467041, + "step": 4835 + }, + { + "epoch": 1.7041409691629956, + "grad_norm": 1.8504754714684983, + "learning_rate": 1.1766709091185447e-06, + "loss": 0.6448635458946228, + "step": 4836 + }, + { + "epoch": 1.7044933920704846, + "grad_norm": 1.9824663488252348, + "learning_rate": 1.1739303547163138e-06, + "loss": 0.6141834259033203, + "step": 4837 + }, + { + "epoch": 1.7048458149779737, + "grad_norm": 1.8134017158002862, + "learning_rate": 1.1711927964900482e-06, + "loss": 0.5634737014770508, + "step": 4838 + }, + { + "epoch": 1.7051982378854627, + "grad_norm": 1.7474752293775022, + "learning_rate": 1.1684582353690642e-06, + "loss": 0.6786668300628662, + "step": 4839 + }, + { + "epoch": 1.7055506607929516, + "grad_norm": 1.8867560516103576, + "learning_rate": 1.1657266722816663e-06, + "loss": 0.6117962002754211, + "step": 4840 + }, + { + "epoch": 1.7059030837004405, + "grad_norm": 1.8617179395667027, + "learning_rate": 1.1629981081551278e-06, + "loss": 0.6115782260894775, + "step": 4841 + }, + { + "epoch": 1.7062555066079295, + "grad_norm": 1.676805324865966, + "learning_rate": 1.1602725439157114e-06, + "loss": 0.6526266932487488, + "step": 4842 + }, + { + "epoch": 1.7066079295154184, + "grad_norm": 1.8958276581556894, + "learning_rate": 1.1575499804886658e-06, + "loss": 0.5449249148368835, + "step": 4843 + }, + { + "epoch": 1.7069603524229073, + "grad_norm": 1.7291534323462554, + "learning_rate": 1.1548304187982152e-06, + "loss": 0.5793930292129517, + "step": 4844 + }, + { + "epoch": 1.7073127753303965, + "grad_norm": 1.9655642851245159, + "learning_rate": 1.152113859767565e-06, + "loss": 0.5133150815963745, + "step": 4845 + }, + { + "epoch": 1.7076651982378854, + "grad_norm": 2.3523116804029973, + "learning_rate": 1.1494003043189028e-06, + "loss": 0.6771460771560669, + "step": 4846 + }, + { + "epoch": 1.7080176211453746, + "grad_norm": 1.651478684492262, + "learning_rate": 1.1466897533733945e-06, + "loss": 0.5680071115493774, + "step": 4847 + }, + { + "epoch": 1.7083700440528635, + "grad_norm": 1.5749772965105935, + "learning_rate": 1.1439822078511941e-06, + "loss": 0.58878493309021, + "step": 4848 + }, + { + "epoch": 1.7087224669603525, + "grad_norm": 2.108145787297885, + "learning_rate": 1.14127766867142e-06, + "loss": 0.6441792249679565, + "step": 4849 + }, + { + "epoch": 1.7090748898678414, + "grad_norm": 1.98601005576605, + "learning_rate": 1.1385761367521865e-06, + "loss": 0.4910963773727417, + "step": 4850 + }, + { + "epoch": 1.7094273127753303, + "grad_norm": 1.860202988226145, + "learning_rate": 1.1358776130105765e-06, + "loss": 0.5878154635429382, + "step": 4851 + }, + { + "epoch": 1.7097797356828193, + "grad_norm": 1.8203129566458394, + "learning_rate": 1.133182098362654e-06, + "loss": 0.5835394859313965, + "step": 4852 + }, + { + "epoch": 1.7101321585903082, + "grad_norm": 2.552503327854629, + "learning_rate": 1.130489593723465e-06, + "loss": 0.6612577438354492, + "step": 4853 + }, + { + "epoch": 1.7104845814977974, + "grad_norm": 1.6914248252090902, + "learning_rate": 1.1278001000070282e-06, + "loss": 0.5892096757888794, + "step": 4854 + }, + { + "epoch": 1.7108370044052863, + "grad_norm": 1.597330243290686, + "learning_rate": 1.1251136181263433e-06, + "loss": 0.6196271181106567, + "step": 4855 + }, + { + "epoch": 1.7111894273127755, + "grad_norm": 1.7525774926334832, + "learning_rate": 1.122430148993392e-06, + "loss": 0.5365586280822754, + "step": 4856 + }, + { + "epoch": 1.7115418502202644, + "grad_norm": 1.8547440453110882, + "learning_rate": 1.119749693519121e-06, + "loss": 0.6006353497505188, + "step": 4857 + }, + { + "epoch": 1.7118942731277533, + "grad_norm": 1.7445383281871432, + "learning_rate": 1.117072252613467e-06, + "loss": 0.5645362138748169, + "step": 4858 + }, + { + "epoch": 1.7122466960352423, + "grad_norm": 1.928827114023792, + "learning_rate": 1.1143978271853362e-06, + "loss": 0.46408799290657043, + "step": 4859 + }, + { + "epoch": 1.7125991189427312, + "grad_norm": 1.9357772553695842, + "learning_rate": 1.1117264181426134e-06, + "loss": 0.7798513770103455, + "step": 4860 + }, + { + "epoch": 1.7129515418502201, + "grad_norm": 2.234058868113385, + "learning_rate": 1.109058026392158e-06, + "loss": 0.739770770072937, + "step": 4861 + }, + { + "epoch": 1.7133039647577093, + "grad_norm": 1.8311645792398603, + "learning_rate": 1.1063926528398062e-06, + "loss": 0.567306637763977, + "step": 4862 + }, + { + "epoch": 1.7136563876651982, + "grad_norm": 1.8983872649225184, + "learning_rate": 1.1037302983903686e-06, + "loss": 0.5730962753295898, + "step": 4863 + }, + { + "epoch": 1.7140088105726872, + "grad_norm": 2.0428299761075186, + "learning_rate": 1.1010709639476335e-06, + "loss": 0.6311475038528442, + "step": 4864 + }, + { + "epoch": 1.7143612334801763, + "grad_norm": 2.074080543967214, + "learning_rate": 1.098414650414359e-06, + "loss": 0.5867577791213989, + "step": 4865 + }, + { + "epoch": 1.7147136563876653, + "grad_norm": 1.7945849101921227, + "learning_rate": 1.0957613586922844e-06, + "loss": 0.6291393637657166, + "step": 4866 + }, + { + "epoch": 1.7150660792951542, + "grad_norm": 2.221825931925125, + "learning_rate": 1.0931110896821184e-06, + "loss": 0.5811575651168823, + "step": 4867 + }, + { + "epoch": 1.7154185022026431, + "grad_norm": 1.8041589779612486, + "learning_rate": 1.0904638442835459e-06, + "loss": 0.6340835690498352, + "step": 4868 + }, + { + "epoch": 1.715770925110132, + "grad_norm": 2.1324283591729696, + "learning_rate": 1.087819623395222e-06, + "loss": 0.6543419361114502, + "step": 4869 + }, + { + "epoch": 1.716123348017621, + "grad_norm": 1.7815282855404584, + "learning_rate": 1.0851784279147793e-06, + "loss": 0.5669729709625244, + "step": 4870 + }, + { + "epoch": 1.7164757709251102, + "grad_norm": 1.7880383242870224, + "learning_rate": 1.08254025873882e-06, + "loss": 0.5422554612159729, + "step": 4871 + }, + { + "epoch": 1.716828193832599, + "grad_norm": 2.1378220532284646, + "learning_rate": 1.0799051167629215e-06, + "loss": 0.6154215335845947, + "step": 4872 + }, + { + "epoch": 1.7171806167400883, + "grad_norm": 1.6926103915620132, + "learning_rate": 1.0772730028816304e-06, + "loss": 0.6306319236755371, + "step": 4873 + }, + { + "epoch": 1.7175330396475772, + "grad_norm": 1.8857595594505687, + "learning_rate": 1.0746439179884716e-06, + "loss": 0.6301003694534302, + "step": 4874 + }, + { + "epoch": 1.7178854625550661, + "grad_norm": 2.02854056964172, + "learning_rate": 1.0720178629759347e-06, + "loss": 0.5730071067810059, + "step": 4875 + }, + { + "epoch": 1.718237885462555, + "grad_norm": 1.911878436689674, + "learning_rate": 1.0693948387354836e-06, + "loss": 0.5330506563186646, + "step": 4876 + }, + { + "epoch": 1.718590308370044, + "grad_norm": 2.2472032788534033, + "learning_rate": 1.0667748461575544e-06, + "loss": 0.7724611759185791, + "step": 4877 + }, + { + "epoch": 1.718942731277533, + "grad_norm": 1.8072854503281317, + "learning_rate": 1.0641578861315517e-06, + "loss": 0.5415126085281372, + "step": 4878 + }, + { + "epoch": 1.7192951541850219, + "grad_norm": 1.7491717586336433, + "learning_rate": 1.0615439595458554e-06, + "loss": 0.4895828664302826, + "step": 4879 + }, + { + "epoch": 1.719647577092511, + "grad_norm": 2.1761761181791757, + "learning_rate": 1.0589330672878084e-06, + "loss": 0.6049074530601501, + "step": 4880 + }, + { + "epoch": 1.72, + "grad_norm": 1.8120989551683504, + "learning_rate": 1.056325210243726e-06, + "loss": 0.5733378529548645, + "step": 4881 + }, + { + "epoch": 1.7203524229074891, + "grad_norm": 1.842989042937231, + "learning_rate": 1.0537203892989e-06, + "loss": 0.6034674644470215, + "step": 4882 + }, + { + "epoch": 1.720704845814978, + "grad_norm": 1.873470428701205, + "learning_rate": 1.0511186053375833e-06, + "loss": 0.5282200574874878, + "step": 4883 + }, + { + "epoch": 1.721057268722467, + "grad_norm": 1.7879116674889537, + "learning_rate": 1.0485198592430001e-06, + "loss": 0.6331876516342163, + "step": 4884 + }, + { + "epoch": 1.721409691629956, + "grad_norm": 1.9365084560803385, + "learning_rate": 1.045924151897344e-06, + "loss": 0.5194844007492065, + "step": 4885 + }, + { + "epoch": 1.7217621145374449, + "grad_norm": 1.9087945355709668, + "learning_rate": 1.0433314841817755e-06, + "loss": 0.5496135354042053, + "step": 4886 + }, + { + "epoch": 1.7221145374449338, + "grad_norm": 2.009884434911672, + "learning_rate": 1.0407418569764304e-06, + "loss": 0.7871953248977661, + "step": 4887 + }, + { + "epoch": 1.7224669603524227, + "grad_norm": 2.040889138785673, + "learning_rate": 1.0381552711604004e-06, + "loss": 0.7199628353118896, + "step": 4888 + }, + { + "epoch": 1.722819383259912, + "grad_norm": 2.248999014584043, + "learning_rate": 1.0355717276117506e-06, + "loss": 0.5876469612121582, + "step": 4889 + }, + { + "epoch": 1.7231718061674008, + "grad_norm": 2.0349261607011, + "learning_rate": 1.0329912272075181e-06, + "loss": 0.5543426275253296, + "step": 4890 + }, + { + "epoch": 1.72352422907489, + "grad_norm": 1.583533546685778, + "learning_rate": 1.0304137708236994e-06, + "loss": 0.5118540525436401, + "step": 4891 + }, + { + "epoch": 1.723876651982379, + "grad_norm": 1.82424017683773, + "learning_rate": 1.0278393593352631e-06, + "loss": 0.628477931022644, + "step": 4892 + }, + { + "epoch": 1.7242290748898679, + "grad_norm": 1.9109773060364437, + "learning_rate": 1.0252679936161392e-06, + "loss": 0.6119322776794434, + "step": 4893 + }, + { + "epoch": 1.7245814977973568, + "grad_norm": 1.8450217827392812, + "learning_rate": 1.0226996745392259e-06, + "loss": 0.7661763429641724, + "step": 4894 + }, + { + "epoch": 1.7249339207048457, + "grad_norm": 2.1201139928861394, + "learning_rate": 1.0201344029763927e-06, + "loss": 0.6431440114974976, + "step": 4895 + }, + { + "epoch": 1.7252863436123347, + "grad_norm": 2.0387248477928503, + "learning_rate": 1.0175721797984639e-06, + "loss": 0.7295387983322144, + "step": 4896 + }, + { + "epoch": 1.7256387665198238, + "grad_norm": 1.9561833203401287, + "learning_rate": 1.015013005875235e-06, + "loss": 0.58225017786026, + "step": 4897 + }, + { + "epoch": 1.7259911894273128, + "grad_norm": 1.9211243008184207, + "learning_rate": 1.0124568820754689e-06, + "loss": 0.5467473864555359, + "step": 4898 + }, + { + "epoch": 1.7263436123348017, + "grad_norm": 2.2453442964094967, + "learning_rate": 1.00990380926689e-06, + "loss": 0.7637814283370972, + "step": 4899 + }, + { + "epoch": 1.7266960352422909, + "grad_norm": 2.13267606796778, + "learning_rate": 1.0073537883161821e-06, + "loss": 0.5354464650154114, + "step": 4900 + }, + { + "epoch": 1.7270484581497798, + "grad_norm": 1.91187833906973, + "learning_rate": 1.0048068200890037e-06, + "loss": 0.5213606357574463, + "step": 4901 + }, + { + "epoch": 1.7274008810572687, + "grad_norm": 1.8770841550484265, + "learning_rate": 1.0022629054499678e-06, + "loss": 0.6073330640792847, + "step": 4902 + }, + { + "epoch": 1.7277533039647577, + "grad_norm": 2.1663053459498283, + "learning_rate": 9.997220452626587e-07, + "loss": 0.5711998343467712, + "step": 4903 + }, + { + "epoch": 1.7281057268722466, + "grad_norm": 1.8823259072141711, + "learning_rate": 9.971842403896137e-07, + "loss": 0.6824701428413391, + "step": 4904 + }, + { + "epoch": 1.7284581497797356, + "grad_norm": 1.844862593672041, + "learning_rate": 9.9464949169234e-07, + "loss": 0.528059184551239, + "step": 4905 + }, + { + "epoch": 1.7288105726872247, + "grad_norm": 1.7519423160504919, + "learning_rate": 9.92117800031308e-07, + "loss": 0.45617133378982544, + "step": 4906 + }, + { + "epoch": 1.7291629955947136, + "grad_norm": 2.095891000231315, + "learning_rate": 9.895891662659485e-07, + "loss": 0.6186379194259644, + "step": 4907 + }, + { + "epoch": 1.7295154185022028, + "grad_norm": 1.8933361504308706, + "learning_rate": 9.870635912546511e-07, + "loss": 0.622776985168457, + "step": 4908 + }, + { + "epoch": 1.7298678414096917, + "grad_norm": 2.1556634846751073, + "learning_rate": 9.845410758547724e-07, + "loss": 0.6322426199913025, + "step": 4909 + }, + { + "epoch": 1.7302202643171807, + "grad_norm": 1.8637079254212523, + "learning_rate": 9.82021620922624e-07, + "loss": 0.565685510635376, + "step": 4910 + }, + { + "epoch": 1.7305726872246696, + "grad_norm": 1.9032887733300228, + "learning_rate": 9.795052273134908e-07, + "loss": 0.670723557472229, + "step": 4911 + }, + { + "epoch": 1.7309251101321586, + "grad_norm": 1.818317953069921, + "learning_rate": 9.769918958816017e-07, + "loss": 0.627914309501648, + "step": 4912 + }, + { + "epoch": 1.7312775330396475, + "grad_norm": 1.8142433277320784, + "learning_rate": 9.74481627480156e-07, + "loss": 0.613754391670227, + "step": 4913 + }, + { + "epoch": 1.7316299559471364, + "grad_norm": 1.6146673255290158, + "learning_rate": 9.719744229613148e-07, + "loss": 0.7128336429595947, + "step": 4914 + }, + { + "epoch": 1.7319823788546256, + "grad_norm": 2.07516307915708, + "learning_rate": 9.694702831761937e-07, + "loss": 0.692448079586029, + "step": 4915 + }, + { + "epoch": 1.7323348017621145, + "grad_norm": 1.8379288210737326, + "learning_rate": 9.669692089748717e-07, + "loss": 0.5722585916519165, + "step": 4916 + }, + { + "epoch": 1.7326872246696037, + "grad_norm": 2.880722779651987, + "learning_rate": 9.64471201206385e-07, + "loss": 0.5267904996871948, + "step": 4917 + }, + { + "epoch": 1.7330396475770926, + "grad_norm": 1.8098448963152955, + "learning_rate": 9.619762607187277e-07, + "loss": 0.6290950179100037, + "step": 4918 + }, + { + "epoch": 1.7333920704845815, + "grad_norm": 1.6991585212089806, + "learning_rate": 9.594843883588588e-07, + "loss": 0.5137144327163696, + "step": 4919 + }, + { + "epoch": 1.7337444933920705, + "grad_norm": 2.0101083451482067, + "learning_rate": 9.569955849726875e-07, + "loss": 0.6110765337944031, + "step": 4920 + }, + { + "epoch": 1.7340969162995594, + "grad_norm": 1.805820390142787, + "learning_rate": 9.545098514050844e-07, + "loss": 0.5097514390945435, + "step": 4921 + }, + { + "epoch": 1.7344493392070484, + "grad_norm": 1.9204009410934093, + "learning_rate": 9.520271884998822e-07, + "loss": 0.7220968008041382, + "step": 4922 + }, + { + "epoch": 1.7348017621145373, + "grad_norm": 1.816061125504689, + "learning_rate": 9.495475970998669e-07, + "loss": 0.4790550470352173, + "step": 4923 + }, + { + "epoch": 1.7351541850220265, + "grad_norm": 1.8878679441443287, + "learning_rate": 9.470710780467818e-07, + "loss": 0.5440540909767151, + "step": 4924 + }, + { + "epoch": 1.7355066079295154, + "grad_norm": 1.8420075371513611, + "learning_rate": 9.445976321813277e-07, + "loss": 0.6351054310798645, + "step": 4925 + }, + { + "epoch": 1.7358590308370045, + "grad_norm": 1.8685391189030902, + "learning_rate": 9.421272603431619e-07, + "loss": 0.597430944442749, + "step": 4926 + }, + { + "epoch": 1.7362114537444935, + "grad_norm": 1.8993591697635552, + "learning_rate": 9.396599633709013e-07, + "loss": 0.5826110243797302, + "step": 4927 + }, + { + "epoch": 1.7365638766519824, + "grad_norm": 1.9528322527669026, + "learning_rate": 9.371957421021116e-07, + "loss": 0.61531662940979, + "step": 4928 + }, + { + "epoch": 1.7369162995594714, + "grad_norm": 1.7976479809998938, + "learning_rate": 9.347345973733257e-07, + "loss": 0.5286549925804138, + "step": 4929 + }, + { + "epoch": 1.7372687224669603, + "grad_norm": 2.051327926584316, + "learning_rate": 9.322765300200209e-07, + "loss": 0.6923980712890625, + "step": 4930 + }, + { + "epoch": 1.7376211453744492, + "grad_norm": 1.8765754964403032, + "learning_rate": 9.298215408766376e-07, + "loss": 0.5408697128295898, + "step": 4931 + }, + { + "epoch": 1.7379735682819382, + "grad_norm": 1.9428832757254997, + "learning_rate": 9.273696307765656e-07, + "loss": 0.6360228061676025, + "step": 4932 + }, + { + "epoch": 1.7383259911894273, + "grad_norm": 1.5478222777536266, + "learning_rate": 9.249208005521538e-07, + "loss": 0.46559634804725647, + "step": 4933 + }, + { + "epoch": 1.7386784140969163, + "grad_norm": 2.0814940983294465, + "learning_rate": 9.224750510347036e-07, + "loss": 0.6065478324890137, + "step": 4934 + }, + { + "epoch": 1.7390308370044054, + "grad_norm": 2.197942688439507, + "learning_rate": 9.2003238305447e-07, + "loss": 0.6777745485305786, + "step": 4935 + }, + { + "epoch": 1.7393832599118944, + "grad_norm": 1.764242470379209, + "learning_rate": 9.175927974406607e-07, + "loss": 0.568982720375061, + "step": 4936 + }, + { + "epoch": 1.7397356828193833, + "grad_norm": 1.9082270198240563, + "learning_rate": 9.151562950214443e-07, + "loss": 0.6014461517333984, + "step": 4937 + }, + { + "epoch": 1.7400881057268722, + "grad_norm": 1.9463215063568118, + "learning_rate": 9.127228766239349e-07, + "loss": 0.6312133073806763, + "step": 4938 + }, + { + "epoch": 1.7404405286343612, + "grad_norm": 1.9066118382891128, + "learning_rate": 9.102925430742015e-07, + "loss": 0.5440298318862915, + "step": 4939 + }, + { + "epoch": 1.74079295154185, + "grad_norm": 1.9115402376997355, + "learning_rate": 9.078652951972688e-07, + "loss": 0.6599005460739136, + "step": 4940 + }, + { + "epoch": 1.7411453744493393, + "grad_norm": 1.8987879122247575, + "learning_rate": 9.054411338171099e-07, + "loss": 0.6719228625297546, + "step": 4941 + }, + { + "epoch": 1.7414977973568282, + "grad_norm": 1.7692389966879711, + "learning_rate": 9.030200597566529e-07, + "loss": 0.5771356821060181, + "step": 4942 + }, + { + "epoch": 1.7418502202643171, + "grad_norm": 2.0029197465912936, + "learning_rate": 9.006020738377764e-07, + "loss": 0.5066591501235962, + "step": 4943 + }, + { + "epoch": 1.7422026431718063, + "grad_norm": 1.754361693598564, + "learning_rate": 8.981871768813111e-07, + "loss": 0.5091663002967834, + "step": 4944 + }, + { + "epoch": 1.7425550660792952, + "grad_norm": 1.9092674317256029, + "learning_rate": 8.957753697070415e-07, + "loss": 0.6594514846801758, + "step": 4945 + }, + { + "epoch": 1.7429074889867842, + "grad_norm": 1.8033652679865708, + "learning_rate": 8.933666531337004e-07, + "loss": 0.5485379695892334, + "step": 4946 + }, + { + "epoch": 1.743259911894273, + "grad_norm": 2.2602019905537913, + "learning_rate": 8.909610279789716e-07, + "loss": 0.6079416871070862, + "step": 4947 + }, + { + "epoch": 1.743612334801762, + "grad_norm": 1.8415960205262154, + "learning_rate": 8.885584950594894e-07, + "loss": 0.4980606436729431, + "step": 4948 + }, + { + "epoch": 1.743964757709251, + "grad_norm": 1.5880176897451332, + "learning_rate": 8.861590551908405e-07, + "loss": 0.47701022028923035, + "step": 4949 + }, + { + "epoch": 1.7443171806167401, + "grad_norm": 1.7223149872435417, + "learning_rate": 8.837627091875578e-07, + "loss": 0.5041281580924988, + "step": 4950 + }, + { + "epoch": 1.744669603524229, + "grad_norm": 1.9666236461253934, + "learning_rate": 8.813694578631283e-07, + "loss": 0.5477255582809448, + "step": 4951 + }, + { + "epoch": 1.7450220264317182, + "grad_norm": 1.883766477051188, + "learning_rate": 8.78979302029983e-07, + "loss": 0.6377973556518555, + "step": 4952 + }, + { + "epoch": 1.7453744493392072, + "grad_norm": 1.940207867324299, + "learning_rate": 8.76592242499511e-07, + "loss": 0.6688166856765747, + "step": 4953 + }, + { + "epoch": 1.745726872246696, + "grad_norm": 2.0031898505950907, + "learning_rate": 8.742082800820406e-07, + "loss": 0.6236848831176758, + "step": 4954 + }, + { + "epoch": 1.746079295154185, + "grad_norm": 1.7582600318717108, + "learning_rate": 8.718274155868545e-07, + "loss": 0.653768002986908, + "step": 4955 + }, + { + "epoch": 1.746431718061674, + "grad_norm": 1.844534933556578, + "learning_rate": 8.694496498221805e-07, + "loss": 0.5647604465484619, + "step": 4956 + }, + { + "epoch": 1.746784140969163, + "grad_norm": 1.781932697931349, + "learning_rate": 8.670749835951964e-07, + "loss": 0.4960663914680481, + "step": 4957 + }, + { + "epoch": 1.7471365638766518, + "grad_norm": 1.6873484879529697, + "learning_rate": 8.647034177120317e-07, + "loss": 0.6271536350250244, + "step": 4958 + }, + { + "epoch": 1.747488986784141, + "grad_norm": 2.0059254125224757, + "learning_rate": 8.623349529777525e-07, + "loss": 0.6323459148406982, + "step": 4959 + }, + { + "epoch": 1.74784140969163, + "grad_norm": 1.9564636362517054, + "learning_rate": 8.599695901963811e-07, + "loss": 0.6084197163581848, + "step": 4960 + }, + { + "epoch": 1.748193832599119, + "grad_norm": 1.8913653459936526, + "learning_rate": 8.576073301708876e-07, + "loss": 0.48974379897117615, + "step": 4961 + }, + { + "epoch": 1.748546255506608, + "grad_norm": 1.8735173678444992, + "learning_rate": 8.552481737031859e-07, + "loss": 0.5985081195831299, + "step": 4962 + }, + { + "epoch": 1.748898678414097, + "grad_norm": 1.6360789306706147, + "learning_rate": 8.528921215941299e-07, + "loss": 0.507872998714447, + "step": 4963 + }, + { + "epoch": 1.749251101321586, + "grad_norm": 1.5251403239052872, + "learning_rate": 8.50539174643531e-07, + "loss": 0.5772356986999512, + "step": 4964 + }, + { + "epoch": 1.7496035242290748, + "grad_norm": 2.222117569410965, + "learning_rate": 8.48189333650139e-07, + "loss": 0.675100564956665, + "step": 4965 + }, + { + "epoch": 1.7499559471365638, + "grad_norm": 1.9356078104678653, + "learning_rate": 8.458425994116582e-07, + "loss": 0.5571645498275757, + "step": 4966 + }, + { + "epoch": 1.7503083700440527, + "grad_norm": 1.807660183683072, + "learning_rate": 8.434989727247233e-07, + "loss": 0.5842185020446777, + "step": 4967 + }, + { + "epoch": 1.7506607929515419, + "grad_norm": 1.7960899956397995, + "learning_rate": 8.41158454384925e-07, + "loss": 0.5693016648292542, + "step": 4968 + }, + { + "epoch": 1.7510132158590308, + "grad_norm": 1.808037504366546, + "learning_rate": 8.388210451868006e-07, + "loss": 0.5791449546813965, + "step": 4969 + }, + { + "epoch": 1.75136563876652, + "grad_norm": 2.1439820497437516, + "learning_rate": 8.364867459238257e-07, + "loss": 0.4873960018157959, + "step": 4970 + }, + { + "epoch": 1.751718061674009, + "grad_norm": 1.6712365329059415, + "learning_rate": 8.341555573884175e-07, + "loss": 0.609403669834137, + "step": 4971 + }, + { + "epoch": 1.7520704845814978, + "grad_norm": 2.0664225342752327, + "learning_rate": 8.318274803719483e-07, + "loss": 0.5676242113113403, + "step": 4972 + }, + { + "epoch": 1.7524229074889868, + "grad_norm": 2.2550971825464026, + "learning_rate": 8.29502515664723e-07, + "loss": 0.7692728638648987, + "step": 4973 + }, + { + "epoch": 1.7527753303964757, + "grad_norm": 2.318073308236361, + "learning_rate": 8.27180664056001e-07, + "loss": 0.7940253019332886, + "step": 4974 + }, + { + "epoch": 1.7531277533039646, + "grad_norm": 2.021077548315, + "learning_rate": 8.24861926333973e-07, + "loss": 0.5784735083580017, + "step": 4975 + }, + { + "epoch": 1.7534801762114536, + "grad_norm": 2.106016882372918, + "learning_rate": 8.225463032857783e-07, + "loss": 0.6493539810180664, + "step": 4976 + }, + { + "epoch": 1.7538325991189427, + "grad_norm": 1.6893816606485224, + "learning_rate": 8.202337956975026e-07, + "loss": 0.615519106388092, + "step": 4977 + }, + { + "epoch": 1.7541850220264317, + "grad_norm": 2.4337358559529587, + "learning_rate": 8.179244043541678e-07, + "loss": 0.5369104146957397, + "step": 4978 + }, + { + "epoch": 1.7545374449339208, + "grad_norm": 1.8845170170566812, + "learning_rate": 8.156181300397414e-07, + "loss": 0.5527158975601196, + "step": 4979 + }, + { + "epoch": 1.7548898678414098, + "grad_norm": 2.1597753145956786, + "learning_rate": 8.133149735371316e-07, + "loss": 0.5870147943496704, + "step": 4980 + }, + { + "epoch": 1.7552422907488987, + "grad_norm": 2.0333589118991497, + "learning_rate": 8.110149356281848e-07, + "loss": 0.7235025763511658, + "step": 4981 + }, + { + "epoch": 1.7555947136563876, + "grad_norm": 1.9283097758260628, + "learning_rate": 8.087180170937004e-07, + "loss": 0.5630521774291992, + "step": 4982 + }, + { + "epoch": 1.7559471365638766, + "grad_norm": 2.015740627515862, + "learning_rate": 8.06424218713403e-07, + "loss": 0.5005021691322327, + "step": 4983 + }, + { + "epoch": 1.7562995594713655, + "grad_norm": 2.0683486617790066, + "learning_rate": 8.041335412659679e-07, + "loss": 0.7267229557037354, + "step": 4984 + }, + { + "epoch": 1.7566519823788547, + "grad_norm": 2.2397406108409834, + "learning_rate": 8.018459855290107e-07, + "loss": 0.6494802236557007, + "step": 4985 + }, + { + "epoch": 1.7570044052863436, + "grad_norm": 1.8012009390187627, + "learning_rate": 7.995615522790845e-07, + "loss": 0.5637267827987671, + "step": 4986 + }, + { + "epoch": 1.7573568281938328, + "grad_norm": 1.807872858711751, + "learning_rate": 7.972802422916826e-07, + "loss": 0.5143958330154419, + "step": 4987 + }, + { + "epoch": 1.7577092511013217, + "grad_norm": 1.7925007157989583, + "learning_rate": 7.950020563412398e-07, + "loss": 0.607841968536377, + "step": 4988 + }, + { + "epoch": 1.7580616740088106, + "grad_norm": 1.9011698158798267, + "learning_rate": 7.927269952011285e-07, + "loss": 0.6066895723342896, + "step": 4989 + }, + { + "epoch": 1.7584140969162996, + "grad_norm": 2.293924542695718, + "learning_rate": 7.904550596436611e-07, + "loss": 0.6686232686042786, + "step": 4990 + }, + { + "epoch": 1.7587665198237885, + "grad_norm": 1.7540251789370713, + "learning_rate": 7.881862504400884e-07, + "loss": 0.589708685874939, + "step": 4991 + }, + { + "epoch": 1.7591189427312774, + "grad_norm": 1.9346002211307631, + "learning_rate": 7.859205683606008e-07, + "loss": 0.7008450031280518, + "step": 4992 + }, + { + "epoch": 1.7594713656387664, + "grad_norm": 1.5488386957340947, + "learning_rate": 7.836580141743289e-07, + "loss": 0.5754648447036743, + "step": 4993 + }, + { + "epoch": 1.7598237885462555, + "grad_norm": 1.8204543329281522, + "learning_rate": 7.81398588649338e-07, + "loss": 0.5756049156188965, + "step": 4994 + }, + { + "epoch": 1.7601762114537445, + "grad_norm": 1.8754803653843481, + "learning_rate": 7.791422925526326e-07, + "loss": 0.6143715381622314, + "step": 4995 + }, + { + "epoch": 1.7605286343612336, + "grad_norm": 1.9795958910244131, + "learning_rate": 7.768891266501544e-07, + "loss": 0.700069010257721, + "step": 4996 + }, + { + "epoch": 1.7608810572687226, + "grad_norm": 1.8030282940418303, + "learning_rate": 7.746390917067847e-07, + "loss": 0.5200002193450928, + "step": 4997 + }, + { + "epoch": 1.7612334801762115, + "grad_norm": 2.0811179040330483, + "learning_rate": 7.723921884863395e-07, + "loss": 0.6963525414466858, + "step": 4998 + }, + { + "epoch": 1.7615859030837004, + "grad_norm": 1.9255908471526815, + "learning_rate": 7.701484177515717e-07, + "loss": 0.6329556703567505, + "step": 4999 + }, + { + "epoch": 1.7619383259911894, + "grad_norm": 2.0796773022688213, + "learning_rate": 7.67907780264171e-07, + "loss": 0.6980677247047424, + "step": 5000 + }, + { + "epoch": 1.7622907488986783, + "grad_norm": 1.95091452058077, + "learning_rate": 7.656702767847679e-07, + "loss": 0.5244314670562744, + "step": 5001 + }, + { + "epoch": 1.7626431718061673, + "grad_norm": 1.937585844549177, + "learning_rate": 7.634359080729215e-07, + "loss": 0.6679523587226868, + "step": 5002 + }, + { + "epoch": 1.7629955947136564, + "grad_norm": 1.7698344536731299, + "learning_rate": 7.612046748871327e-07, + "loss": 0.6168316602706909, + "step": 5003 + }, + { + "epoch": 1.7633480176211453, + "grad_norm": 1.8295319189191592, + "learning_rate": 7.589765779848346e-07, + "loss": 0.5892738699913025, + "step": 5004 + }, + { + "epoch": 1.7637004405286345, + "grad_norm": 1.8270406797726577, + "learning_rate": 7.567516181223966e-07, + "loss": 0.6714082956314087, + "step": 5005 + }, + { + "epoch": 1.7640528634361234, + "grad_norm": 1.7798086214061835, + "learning_rate": 7.545297960551245e-07, + "loss": 0.6327016353607178, + "step": 5006 + }, + { + "epoch": 1.7644052863436124, + "grad_norm": 1.8272907155681217, + "learning_rate": 7.52311112537254e-07, + "loss": 0.5114126205444336, + "step": 5007 + }, + { + "epoch": 1.7647577092511013, + "grad_norm": 1.9198067827489789, + "learning_rate": 7.500955683219646e-07, + "loss": 0.5701695084571838, + "step": 5008 + }, + { + "epoch": 1.7651101321585903, + "grad_norm": 1.7304483866926885, + "learning_rate": 7.478831641613616e-07, + "loss": 0.5966283082962036, + "step": 5009 + }, + { + "epoch": 1.7654625550660792, + "grad_norm": 1.7690414353003558, + "learning_rate": 7.456739008064883e-07, + "loss": 0.6219101548194885, + "step": 5010 + }, + { + "epoch": 1.7658149779735681, + "grad_norm": 2.1971226449232804, + "learning_rate": 7.434677790073197e-07, + "loss": 0.6516324877738953, + "step": 5011 + }, + { + "epoch": 1.7661674008810573, + "grad_norm": 2.0945250680543395, + "learning_rate": 7.412647995127664e-07, + "loss": 0.4623621106147766, + "step": 5012 + }, + { + "epoch": 1.7665198237885462, + "grad_norm": 1.7568345992089816, + "learning_rate": 7.390649630706703e-07, + "loss": 0.5661109685897827, + "step": 5013 + }, + { + "epoch": 1.7668722466960354, + "grad_norm": 2.0070117088967154, + "learning_rate": 7.368682704278096e-07, + "loss": 0.47063148021698, + "step": 5014 + }, + { + "epoch": 1.7672246696035243, + "grad_norm": 1.636187219475051, + "learning_rate": 7.346747223298889e-07, + "loss": 0.5684597492218018, + "step": 5015 + }, + { + "epoch": 1.7675770925110132, + "grad_norm": 1.872749765270047, + "learning_rate": 7.324843195215548e-07, + "loss": 0.5614477396011353, + "step": 5016 + }, + { + "epoch": 1.7679295154185022, + "grad_norm": 1.9944667195924293, + "learning_rate": 7.302970627463779e-07, + "loss": 0.508664608001709, + "step": 5017 + }, + { + "epoch": 1.7682819383259911, + "grad_norm": 1.9918093815103546, + "learning_rate": 7.281129527468645e-07, + "loss": 0.5348209142684937, + "step": 5018 + }, + { + "epoch": 1.76863436123348, + "grad_norm": 2.2774118234615695, + "learning_rate": 7.259319902644513e-07, + "loss": 0.6441121101379395, + "step": 5019 + }, + { + "epoch": 1.7689867841409692, + "grad_norm": 1.7776640162425583, + "learning_rate": 7.237541760395083e-07, + "loss": 0.6454842686653137, + "step": 5020 + }, + { + "epoch": 1.7693392070484582, + "grad_norm": 1.818033997112941, + "learning_rate": 7.215795108113343e-07, + "loss": 0.4822286367416382, + "step": 5021 + }, + { + "epoch": 1.769691629955947, + "grad_norm": 2.2519074742911775, + "learning_rate": 7.19407995318162e-07, + "loss": 0.6078327894210815, + "step": 5022 + }, + { + "epoch": 1.7700440528634362, + "grad_norm": 1.9964867958416748, + "learning_rate": 7.172396302971507e-07, + "loss": 0.6394459009170532, + "step": 5023 + }, + { + "epoch": 1.7703964757709252, + "grad_norm": 1.919321953608054, + "learning_rate": 7.150744164843959e-07, + "loss": 0.646416425704956, + "step": 5024 + }, + { + "epoch": 1.7707488986784141, + "grad_norm": 1.743918601710363, + "learning_rate": 7.129123546149208e-07, + "loss": 0.6265356540679932, + "step": 5025 + }, + { + "epoch": 1.771101321585903, + "grad_norm": 1.717725969603381, + "learning_rate": 7.107534454226728e-07, + "loss": 0.5074717998504639, + "step": 5026 + }, + { + "epoch": 1.771453744493392, + "grad_norm": 1.9181838757933405, + "learning_rate": 7.0859768964054e-07, + "loss": 0.7036402821540833, + "step": 5027 + }, + { + "epoch": 1.771806167400881, + "grad_norm": 1.7638856276686163, + "learning_rate": 7.064450880003327e-07, + "loss": 0.6098893880844116, + "step": 5028 + }, + { + "epoch": 1.77215859030837, + "grad_norm": 2.005026773406909, + "learning_rate": 7.042956412327917e-07, + "loss": 0.582880973815918, + "step": 5029 + }, + { + "epoch": 1.772511013215859, + "grad_norm": 2.013313109536588, + "learning_rate": 7.021493500675869e-07, + "loss": 0.6003242135047913, + "step": 5030 + }, + { + "epoch": 1.7728634361233482, + "grad_norm": 1.9319887994625418, + "learning_rate": 7.000062152333165e-07, + "loss": 0.4999944865703583, + "step": 5031 + }, + { + "epoch": 1.7732158590308371, + "grad_norm": 1.8450299102376384, + "learning_rate": 6.978662374575107e-07, + "loss": 0.5569149255752563, + "step": 5032 + }, + { + "epoch": 1.773568281938326, + "grad_norm": 1.9277460192299252, + "learning_rate": 6.957294174666263e-07, + "loss": 0.5600287914276123, + "step": 5033 + }, + { + "epoch": 1.773920704845815, + "grad_norm": 1.8890013971887576, + "learning_rate": 6.935957559860418e-07, + "loss": 0.5412951707839966, + "step": 5034 + }, + { + "epoch": 1.774273127753304, + "grad_norm": 1.7378105888388657, + "learning_rate": 6.914652537400735e-07, + "loss": 0.5881151556968689, + "step": 5035 + }, + { + "epoch": 1.7746255506607929, + "grad_norm": 1.8829243382985155, + "learning_rate": 6.893379114519572e-07, + "loss": 0.5975406169891357, + "step": 5036 + }, + { + "epoch": 1.7749779735682818, + "grad_norm": 1.7883517993987919, + "learning_rate": 6.872137298438653e-07, + "loss": 0.6266802549362183, + "step": 5037 + }, + { + "epoch": 1.775330396475771, + "grad_norm": 2.279148556628154, + "learning_rate": 6.850927096368854e-07, + "loss": 0.6825709939002991, + "step": 5038 + }, + { + "epoch": 1.77568281938326, + "grad_norm": 1.6068572613194736, + "learning_rate": 6.829748515510381e-07, + "loss": 0.6035742163658142, + "step": 5039 + }, + { + "epoch": 1.776035242290749, + "grad_norm": 1.901514453732062, + "learning_rate": 6.808601563052742e-07, + "loss": 0.6665611267089844, + "step": 5040 + }, + { + "epoch": 1.776387665198238, + "grad_norm": 2.334324554300087, + "learning_rate": 6.787486246174657e-07, + "loss": 0.8202367424964905, + "step": 5041 + }, + { + "epoch": 1.776740088105727, + "grad_norm": 1.8080635950130315, + "learning_rate": 6.766402572044084e-07, + "loss": 0.6516656875610352, + "step": 5042 + }, + { + "epoch": 1.7770925110132159, + "grad_norm": 1.6361942373114873, + "learning_rate": 6.745350547818307e-07, + "loss": 0.663591742515564, + "step": 5043 + }, + { + "epoch": 1.7774449339207048, + "grad_norm": 2.0460511379273716, + "learning_rate": 6.724330180643824e-07, + "loss": 0.6025142669677734, + "step": 5044 + }, + { + "epoch": 1.7777973568281937, + "grad_norm": 1.6332878492082579, + "learning_rate": 6.703341477656422e-07, + "loss": 0.5704027414321899, + "step": 5045 + }, + { + "epoch": 1.7781497797356827, + "grad_norm": 2.0053343984683534, + "learning_rate": 6.682384445981071e-07, + "loss": 0.6518473625183105, + "step": 5046 + }, + { + "epoch": 1.7785022026431718, + "grad_norm": 1.6878153153712165, + "learning_rate": 6.661459092732037e-07, + "loss": 0.5547574758529663, + "step": 5047 + }, + { + "epoch": 1.7788546255506608, + "grad_norm": 1.8096814000573205, + "learning_rate": 6.640565425012846e-07, + "loss": 0.6248831748962402, + "step": 5048 + }, + { + "epoch": 1.77920704845815, + "grad_norm": 1.8747085080187502, + "learning_rate": 6.619703449916259e-07, + "loss": 0.5899701118469238, + "step": 5049 + }, + { + "epoch": 1.7795594713656389, + "grad_norm": 1.9253293216058311, + "learning_rate": 6.598873174524223e-07, + "loss": 0.41864174604415894, + "step": 5050 + }, + { + "epoch": 1.7799118942731278, + "grad_norm": 2.2457701854009025, + "learning_rate": 6.578074605908002e-07, + "loss": 0.7473436594009399, + "step": 5051 + }, + { + "epoch": 1.7802643171806167, + "grad_norm": 1.6599111795216646, + "learning_rate": 6.557307751128051e-07, + "loss": 0.49480879306793213, + "step": 5052 + }, + { + "epoch": 1.7806167400881057, + "grad_norm": 1.8257078701065834, + "learning_rate": 6.536572617234082e-07, + "loss": 0.5619323253631592, + "step": 5053 + }, + { + "epoch": 1.7809691629955946, + "grad_norm": 1.8566139978409217, + "learning_rate": 6.515869211265013e-07, + "loss": 0.5271984338760376, + "step": 5054 + }, + { + "epoch": 1.7813215859030835, + "grad_norm": 1.967436768949709, + "learning_rate": 6.495197540248999e-07, + "loss": 0.6544383764266968, + "step": 5055 + }, + { + "epoch": 1.7816740088105727, + "grad_norm": 2.157946298106486, + "learning_rate": 6.474557611203458e-07, + "loss": 0.6525388956069946, + "step": 5056 + }, + { + "epoch": 1.7820264317180616, + "grad_norm": 2.0314482863762735, + "learning_rate": 6.453949431134987e-07, + "loss": 0.5509910583496094, + "step": 5057 + }, + { + "epoch": 1.7823788546255508, + "grad_norm": 1.6067790596532618, + "learning_rate": 6.433373007039412e-07, + "loss": 0.5030776262283325, + "step": 5058 + }, + { + "epoch": 1.7827312775330397, + "grad_norm": 1.875686429811456, + "learning_rate": 6.412828345901811e-07, + "loss": 0.6743696331977844, + "step": 5059 + }, + { + "epoch": 1.7830837004405287, + "grad_norm": 1.9399780429001139, + "learning_rate": 6.392315454696452e-07, + "loss": 0.5395437479019165, + "step": 5060 + }, + { + "epoch": 1.7834361233480176, + "grad_norm": 1.7657846282567238, + "learning_rate": 6.371834340386807e-07, + "loss": 0.5773402452468872, + "step": 5061 + }, + { + "epoch": 1.7837885462555065, + "grad_norm": 1.920136830142019, + "learning_rate": 6.351385009925582e-07, + "loss": 0.6014268398284912, + "step": 5062 + }, + { + "epoch": 1.7841409691629955, + "grad_norm": 1.9465884411051106, + "learning_rate": 6.33096747025469e-07, + "loss": 0.5519139170646667, + "step": 5063 + }, + { + "epoch": 1.7844933920704846, + "grad_norm": 3.0085962631929752, + "learning_rate": 6.310581728305254e-07, + "loss": 0.5407502055168152, + "step": 5064 + }, + { + "epoch": 1.7848458149779736, + "grad_norm": 1.5371833099084395, + "learning_rate": 6.290227790997605e-07, + "loss": 0.61688232421875, + "step": 5065 + }, + { + "epoch": 1.7851982378854625, + "grad_norm": 2.002396471657761, + "learning_rate": 6.269905665241271e-07, + "loss": 0.5212849974632263, + "step": 5066 + }, + { + "epoch": 1.7855506607929517, + "grad_norm": 1.7684490871986807, + "learning_rate": 6.249615357934968e-07, + "loss": 0.6827710866928101, + "step": 5067 + }, + { + "epoch": 1.7859030837004406, + "grad_norm": 2.016669351586175, + "learning_rate": 6.22935687596663e-07, + "loss": 0.6907633543014526, + "step": 5068 + }, + { + "epoch": 1.7862555066079295, + "grad_norm": 2.045834595721204, + "learning_rate": 6.209130226213378e-07, + "loss": 0.5707769989967346, + "step": 5069 + }, + { + "epoch": 1.7866079295154185, + "grad_norm": 1.9432188628486171, + "learning_rate": 6.188935415541541e-07, + "loss": 0.6062690019607544, + "step": 5070 + }, + { + "epoch": 1.7869603524229074, + "grad_norm": 1.8744219034756735, + "learning_rate": 6.168772450806604e-07, + "loss": 0.5291163921356201, + "step": 5071 + }, + { + "epoch": 1.7873127753303963, + "grad_norm": 1.8892054954511246, + "learning_rate": 6.148641338853301e-07, + "loss": 0.6324198246002197, + "step": 5072 + }, + { + "epoch": 1.7876651982378855, + "grad_norm": 1.7030219876612867, + "learning_rate": 6.128542086515499e-07, + "loss": 0.5516111850738525, + "step": 5073 + }, + { + "epoch": 1.7880176211453744, + "grad_norm": 2.1800478368143232, + "learning_rate": 6.108474700616263e-07, + "loss": 0.6384079456329346, + "step": 5074 + }, + { + "epoch": 1.7883700440528636, + "grad_norm": 1.777234944410244, + "learning_rate": 6.088439187967865e-07, + "loss": 0.5699876546859741, + "step": 5075 + }, + { + "epoch": 1.7887224669603525, + "grad_norm": 2.081274535023766, + "learning_rate": 6.06843555537171e-07, + "loss": 0.6068697571754456, + "step": 5076 + }, + { + "epoch": 1.7890748898678415, + "grad_norm": 2.1233392160842066, + "learning_rate": 6.048463809618444e-07, + "loss": 0.6254304647445679, + "step": 5077 + }, + { + "epoch": 1.7894273127753304, + "grad_norm": 2.0059926594667914, + "learning_rate": 6.02852395748782e-07, + "loss": 0.6779477596282959, + "step": 5078 + }, + { + "epoch": 1.7897797356828193, + "grad_norm": 1.8024145072939486, + "learning_rate": 6.008616005748802e-07, + "loss": 0.6139817833900452, + "step": 5079 + }, + { + "epoch": 1.7901321585903083, + "grad_norm": 2.042935872875493, + "learning_rate": 5.988739961159539e-07, + "loss": 0.553310215473175, + "step": 5080 + }, + { + "epoch": 1.7904845814977972, + "grad_norm": 1.9543566497010472, + "learning_rate": 5.968895830467325e-07, + "loss": 0.6093542575836182, + "step": 5081 + }, + { + "epoch": 1.7908370044052864, + "grad_norm": 1.8231021161772492, + "learning_rate": 5.949083620408614e-07, + "loss": 0.6224432587623596, + "step": 5082 + }, + { + "epoch": 1.7911894273127753, + "grad_norm": 1.881995664144807, + "learning_rate": 5.929303337709047e-07, + "loss": 0.6155597567558289, + "step": 5083 + }, + { + "epoch": 1.7915418502202645, + "grad_norm": 1.7127795559170356, + "learning_rate": 5.909554989083411e-07, + "loss": 0.5742098093032837, + "step": 5084 + }, + { + "epoch": 1.7918942731277534, + "grad_norm": 2.1579790645115886, + "learning_rate": 5.889838581235641e-07, + "loss": 0.7427949905395508, + "step": 5085 + }, + { + "epoch": 1.7922466960352423, + "grad_norm": 1.8686834683482023, + "learning_rate": 5.870154120858851e-07, + "loss": 0.48208528757095337, + "step": 5086 + }, + { + "epoch": 1.7925991189427313, + "grad_norm": 2.103622298674757, + "learning_rate": 5.850501614635318e-07, + "loss": 0.48402148485183716, + "step": 5087 + }, + { + "epoch": 1.7929515418502202, + "grad_norm": 1.9085757415865392, + "learning_rate": 5.83088106923646e-07, + "loss": 0.6808921694755554, + "step": 5088 + }, + { + "epoch": 1.7933039647577091, + "grad_norm": 1.4851842618773352, + "learning_rate": 5.811292491322795e-07, + "loss": 0.48358428478240967, + "step": 5089 + }, + { + "epoch": 1.793656387665198, + "grad_norm": 1.801328000774117, + "learning_rate": 5.791735887544081e-07, + "loss": 0.6492827534675598, + "step": 5090 + }, + { + "epoch": 1.7940088105726872, + "grad_norm": 1.542873674028149, + "learning_rate": 5.772211264539162e-07, + "loss": 0.5453791618347168, + "step": 5091 + }, + { + "epoch": 1.7943612334801762, + "grad_norm": 1.780642500081645, + "learning_rate": 5.75271862893605e-07, + "loss": 0.5901151895523071, + "step": 5092 + }, + { + "epoch": 1.7947136563876653, + "grad_norm": 2.0888993209852664, + "learning_rate": 5.73325798735187e-07, + "loss": 0.616302490234375, + "step": 5093 + }, + { + "epoch": 1.7950660792951543, + "grad_norm": 1.7666548150635142, + "learning_rate": 5.713829346392907e-07, + "loss": 0.616886556148529, + "step": 5094 + }, + { + "epoch": 1.7954185022026432, + "grad_norm": 2.1253066780397725, + "learning_rate": 5.694432712654597e-07, + "loss": 0.5552375316619873, + "step": 5095 + }, + { + "epoch": 1.7957709251101321, + "grad_norm": 1.9305053090727797, + "learning_rate": 5.675068092721491e-07, + "loss": 0.5956143736839294, + "step": 5096 + }, + { + "epoch": 1.796123348017621, + "grad_norm": 2.0198097994194675, + "learning_rate": 5.655735493167247e-07, + "loss": 0.5870288610458374, + "step": 5097 + }, + { + "epoch": 1.79647577092511, + "grad_norm": 1.737470684820577, + "learning_rate": 5.636434920554701e-07, + "loss": 0.5325669646263123, + "step": 5098 + }, + { + "epoch": 1.7968281938325992, + "grad_norm": 1.9881595702868853, + "learning_rate": 5.617166381435813e-07, + "loss": 0.5931425094604492, + "step": 5099 + }, + { + "epoch": 1.797180616740088, + "grad_norm": 1.9607916445612916, + "learning_rate": 5.597929882351627e-07, + "loss": 0.5755603313446045, + "step": 5100 + }, + { + "epoch": 1.797533039647577, + "grad_norm": 2.000480246693455, + "learning_rate": 5.578725429832344e-07, + "loss": 0.5780980587005615, + "step": 5101 + }, + { + "epoch": 1.7978854625550662, + "grad_norm": 1.9982279321373282, + "learning_rate": 5.559553030397258e-07, + "loss": 0.5863890647888184, + "step": 5102 + }, + { + "epoch": 1.7982378854625551, + "grad_norm": 1.8196971349794717, + "learning_rate": 5.540412690554842e-07, + "loss": 0.5577390789985657, + "step": 5103 + }, + { + "epoch": 1.798590308370044, + "grad_norm": 1.773628551628446, + "learning_rate": 5.521304416802642e-07, + "loss": 0.5994857549667358, + "step": 5104 + }, + { + "epoch": 1.798942731277533, + "grad_norm": 1.8364843823531443, + "learning_rate": 5.502228215627281e-07, + "loss": 0.6065348982810974, + "step": 5105 + }, + { + "epoch": 1.799295154185022, + "grad_norm": 1.9447341697044171, + "learning_rate": 5.483184093504568e-07, + "loss": 0.5390498638153076, + "step": 5106 + }, + { + "epoch": 1.7996475770925109, + "grad_norm": 1.9731136151561257, + "learning_rate": 5.464172056899364e-07, + "loss": 0.5826783180236816, + "step": 5107 + }, + { + "epoch": 1.8, + "grad_norm": 1.7733740837200977, + "learning_rate": 5.445192112265718e-07, + "loss": 0.5429874658584595, + "step": 5108 + }, + { + "epoch": 1.800352422907489, + "grad_norm": 1.8521585290179927, + "learning_rate": 5.426244266046676e-07, + "loss": 0.5591466426849365, + "step": 5109 + }, + { + "epoch": 1.8007048458149781, + "grad_norm": 1.6996794293630604, + "learning_rate": 5.407328524674449e-07, + "loss": 0.5351911187171936, + "step": 5110 + }, + { + "epoch": 1.801057268722467, + "grad_norm": 1.9525068150093072, + "learning_rate": 5.388444894570378e-07, + "loss": 0.6095720529556274, + "step": 5111 + }, + { + "epoch": 1.801409691629956, + "grad_norm": 1.9048124225268466, + "learning_rate": 5.369593382144844e-07, + "loss": 0.6278849840164185, + "step": 5112 + }, + { + "epoch": 1.801762114537445, + "grad_norm": 1.932605893192458, + "learning_rate": 5.350773993797332e-07, + "loss": 0.6787056922912598, + "step": 5113 + }, + { + "epoch": 1.8021145374449339, + "grad_norm": 1.7901749162387552, + "learning_rate": 5.331986735916461e-07, + "loss": 0.6054684519767761, + "step": 5114 + }, + { + "epoch": 1.8024669603524228, + "grad_norm": 1.9918768270140568, + "learning_rate": 5.31323161487991e-07, + "loss": 0.5039973855018616, + "step": 5115 + }, + { + "epoch": 1.8028193832599118, + "grad_norm": 2.1203502988203207, + "learning_rate": 5.294508637054474e-07, + "loss": 0.6306504011154175, + "step": 5116 + }, + { + "epoch": 1.803171806167401, + "grad_norm": 1.5433818431075417, + "learning_rate": 5.275817808796013e-07, + "loss": 0.5654761791229248, + "step": 5117 + }, + { + "epoch": 1.8035242290748899, + "grad_norm": 1.84553610812893, + "learning_rate": 5.257159136449452e-07, + "loss": 0.5801905989646912, + "step": 5118 + }, + { + "epoch": 1.803876651982379, + "grad_norm": 1.9190330109285871, + "learning_rate": 5.238532626348891e-07, + "loss": 0.6565619707107544, + "step": 5119 + }, + { + "epoch": 1.804229074889868, + "grad_norm": 2.043183915925982, + "learning_rate": 5.219938284817416e-07, + "loss": 0.5923253297805786, + "step": 5120 + }, + { + "epoch": 1.8045814977973569, + "grad_norm": 2.0522176560055647, + "learning_rate": 5.2013761181672e-07, + "loss": 0.6697949171066284, + "step": 5121 + }, + { + "epoch": 1.8049339207048458, + "grad_norm": 1.5694231089682613, + "learning_rate": 5.182846132699571e-07, + "loss": 0.5146230459213257, + "step": 5122 + }, + { + "epoch": 1.8052863436123348, + "grad_norm": 1.8882278421308176, + "learning_rate": 5.16434833470485e-07, + "loss": 0.5928882360458374, + "step": 5123 + }, + { + "epoch": 1.8056387665198237, + "grad_norm": 1.8209325836560148, + "learning_rate": 5.145882730462481e-07, + "loss": 0.6114771366119385, + "step": 5124 + }, + { + "epoch": 1.8059911894273126, + "grad_norm": 2.0596769025893122, + "learning_rate": 5.127449326240952e-07, + "loss": 0.6624642014503479, + "step": 5125 + }, + { + "epoch": 1.8063436123348018, + "grad_norm": 1.6177669824438379, + "learning_rate": 5.109048128297822e-07, + "loss": 0.6277980208396912, + "step": 5126 + }, + { + "epoch": 1.8066960352422907, + "grad_norm": 1.8432956331440709, + "learning_rate": 5.090679142879751e-07, + "loss": 0.6470246911048889, + "step": 5127 + }, + { + "epoch": 1.8070484581497799, + "grad_norm": 1.9361376318593135, + "learning_rate": 5.072342376222438e-07, + "loss": 0.6418337821960449, + "step": 5128 + }, + { + "epoch": 1.8074008810572688, + "grad_norm": 1.7303831881097942, + "learning_rate": 5.054037834550596e-07, + "loss": 0.6013847589492798, + "step": 5129 + }, + { + "epoch": 1.8077533039647578, + "grad_norm": 2.0870369514809086, + "learning_rate": 5.035765524078095e-07, + "loss": 0.5354605913162231, + "step": 5130 + }, + { + "epoch": 1.8081057268722467, + "grad_norm": 1.7245482885328716, + "learning_rate": 5.01752545100781e-07, + "loss": 0.6017459034919739, + "step": 5131 + }, + { + "epoch": 1.8084581497797356, + "grad_norm": 2.1853671040659335, + "learning_rate": 4.999317621531663e-07, + "loss": 0.5929696559906006, + "step": 5132 + }, + { + "epoch": 1.8088105726872246, + "grad_norm": 2.1106102623060723, + "learning_rate": 4.981142041830645e-07, + "loss": 0.6444251537322998, + "step": 5133 + }, + { + "epoch": 1.8091629955947135, + "grad_norm": 1.9231094224982612, + "learning_rate": 4.962998718074807e-07, + "loss": 0.5854116678237915, + "step": 5134 + }, + { + "epoch": 1.8095154185022027, + "grad_norm": 1.674252446757184, + "learning_rate": 4.944887656423248e-07, + "loss": 0.5145394206047058, + "step": 5135 + }, + { + "epoch": 1.8098678414096916, + "grad_norm": 1.9221197947181823, + "learning_rate": 4.926808863024102e-07, + "loss": 0.5733104348182678, + "step": 5136 + }, + { + "epoch": 1.8102202643171807, + "grad_norm": 1.955048282910108, + "learning_rate": 4.908762344014573e-07, + "loss": 0.5925072431564331, + "step": 5137 + }, + { + "epoch": 1.8105726872246697, + "grad_norm": 1.8754640994406597, + "learning_rate": 4.890748105520859e-07, + "loss": 0.5346912145614624, + "step": 5138 + }, + { + "epoch": 1.8109251101321586, + "grad_norm": 1.636475505756285, + "learning_rate": 4.87276615365827e-07, + "loss": 0.6206755638122559, + "step": 5139 + }, + { + "epoch": 1.8112775330396476, + "grad_norm": 2.0734228349073076, + "learning_rate": 4.854816494531089e-07, + "loss": 0.5998660326004028, + "step": 5140 + }, + { + "epoch": 1.8116299559471365, + "grad_norm": 2.10222956499389, + "learning_rate": 4.836899134232687e-07, + "loss": 0.44545644521713257, + "step": 5141 + }, + { + "epoch": 1.8119823788546254, + "grad_norm": 1.904050289597462, + "learning_rate": 4.81901407884543e-07, + "loss": 0.701204776763916, + "step": 5142 + }, + { + "epoch": 1.8123348017621146, + "grad_norm": 1.8707530799436762, + "learning_rate": 4.801161334440762e-07, + "loss": 0.6103897094726562, + "step": 5143 + }, + { + "epoch": 1.8126872246696035, + "grad_norm": 1.7727850982789193, + "learning_rate": 4.783340907079126e-07, + "loss": 0.5864719152450562, + "step": 5144 + }, + { + "epoch": 1.8130396475770925, + "grad_norm": 1.436946543481978, + "learning_rate": 4.7655528028099916e-07, + "loss": 0.46949082612991333, + "step": 5145 + }, + { + "epoch": 1.8133920704845816, + "grad_norm": 1.9729708472080463, + "learning_rate": 4.7477970276718855e-07, + "loss": 0.6371885538101196, + "step": 5146 + }, + { + "epoch": 1.8137444933920706, + "grad_norm": 2.043577546107911, + "learning_rate": 4.730073587692319e-07, + "loss": 0.6819220781326294, + "step": 5147 + }, + { + "epoch": 1.8140969162995595, + "grad_norm": 1.7501541102560871, + "learning_rate": 4.712382488887868e-07, + "loss": 0.5230735540390015, + "step": 5148 + }, + { + "epoch": 1.8144493392070484, + "grad_norm": 1.6629154647812032, + "learning_rate": 4.6947237372640954e-07, + "loss": 0.5194997787475586, + "step": 5149 + }, + { + "epoch": 1.8148017621145374, + "grad_norm": 2.6396803493511842, + "learning_rate": 4.677097338815595e-07, + "loss": 0.6025055050849915, + "step": 5150 + }, + { + "epoch": 1.8151541850220263, + "grad_norm": 1.9158428969793393, + "learning_rate": 4.6595032995260135e-07, + "loss": 0.649467945098877, + "step": 5151 + }, + { + "epoch": 1.8155066079295155, + "grad_norm": 1.8951471308172565, + "learning_rate": 4.641941625367918e-07, + "loss": 0.5216347575187683, + "step": 5152 + }, + { + "epoch": 1.8158590308370044, + "grad_norm": 2.264572307408149, + "learning_rate": 4.6244123223030177e-07, + "loss": 0.5135647058486938, + "step": 5153 + }, + { + "epoch": 1.8162114537444936, + "grad_norm": 1.8178771999892822, + "learning_rate": 4.6069153962819193e-07, + "loss": 0.5526058673858643, + "step": 5154 + }, + { + "epoch": 1.8165638766519825, + "grad_norm": 2.050533288883353, + "learning_rate": 4.589450853244315e-07, + "loss": 0.5897486209869385, + "step": 5155 + }, + { + "epoch": 1.8169162995594714, + "grad_norm": 1.8009014119109743, + "learning_rate": 4.5720186991188517e-07, + "loss": 0.5698407888412476, + "step": 5156 + }, + { + "epoch": 1.8172687224669604, + "grad_norm": 1.7954864355128493, + "learning_rate": 4.5546189398232075e-07, + "loss": 0.579573392868042, + "step": 5157 + }, + { + "epoch": 1.8176211453744493, + "grad_norm": 1.7473651992455344, + "learning_rate": 4.5372515812640573e-07, + "loss": 0.41852182149887085, + "step": 5158 + }, + { + "epoch": 1.8179735682819382, + "grad_norm": 1.7056493552996725, + "learning_rate": 4.519916629337107e-07, + "loss": 0.6081204414367676, + "step": 5159 + }, + { + "epoch": 1.8183259911894272, + "grad_norm": 2.046109798166009, + "learning_rate": 4.502614089926982e-07, + "loss": 0.5725652575492859, + "step": 5160 + }, + { + "epoch": 1.8186784140969163, + "grad_norm": 1.7147916989755474, + "learning_rate": 4.4853439689073965e-07, + "loss": 0.5109303593635559, + "step": 5161 + }, + { + "epoch": 1.8190308370044053, + "grad_norm": 1.8721629996812361, + "learning_rate": 4.468106272141004e-07, + "loss": 0.5647833347320557, + "step": 5162 + }, + { + "epoch": 1.8193832599118944, + "grad_norm": 1.8784402680779348, + "learning_rate": 4.450901005479469e-07, + "loss": 0.6074738502502441, + "step": 5163 + }, + { + "epoch": 1.8197356828193834, + "grad_norm": 1.9135972387212516, + "learning_rate": 4.433728174763452e-07, + "loss": 0.647289514541626, + "step": 5164 + }, + { + "epoch": 1.8200881057268723, + "grad_norm": 2.08976454113542, + "learning_rate": 4.416587785822568e-07, + "loss": 0.5817590951919556, + "step": 5165 + }, + { + "epoch": 1.8204405286343612, + "grad_norm": 2.105714289057314, + "learning_rate": 4.399479844475485e-07, + "loss": 0.6483672857284546, + "step": 5166 + }, + { + "epoch": 1.8207929515418502, + "grad_norm": 1.9562649517319024, + "learning_rate": 4.382404356529801e-07, + "loss": 0.5439441204071045, + "step": 5167 + }, + { + "epoch": 1.821145374449339, + "grad_norm": 1.8467126365486348, + "learning_rate": 4.3653613277820804e-07, + "loss": 0.5835710167884827, + "step": 5168 + }, + { + "epoch": 1.821497797356828, + "grad_norm": 1.9450074521030982, + "learning_rate": 4.3483507640179503e-07, + "loss": 0.7024152874946594, + "step": 5169 + }, + { + "epoch": 1.8218502202643172, + "grad_norm": 1.880332916659811, + "learning_rate": 4.331372671011935e-07, + "loss": 0.5223513841629028, + "step": 5170 + }, + { + "epoch": 1.8222026431718061, + "grad_norm": 2.771814545513559, + "learning_rate": 4.3144270545275814e-07, + "loss": 0.5975688099861145, + "step": 5171 + }, + { + "epoch": 1.8225550660792953, + "grad_norm": 1.5329834705964882, + "learning_rate": 4.2975139203173977e-07, + "loss": 0.5459109544754028, + "step": 5172 + }, + { + "epoch": 1.8229074889867842, + "grad_norm": 1.8202354421886453, + "learning_rate": 4.2806332741228586e-07, + "loss": 0.6155862808227539, + "step": 5173 + }, + { + "epoch": 1.8232599118942732, + "grad_norm": 2.2226946714753644, + "learning_rate": 4.263785121674435e-07, + "loss": 0.6505374908447266, + "step": 5174 + }, + { + "epoch": 1.823612334801762, + "grad_norm": 1.9153455724722082, + "learning_rate": 4.246969468691553e-07, + "loss": 0.5243734121322632, + "step": 5175 + }, + { + "epoch": 1.823964757709251, + "grad_norm": 1.8732488601912396, + "learning_rate": 4.2301863208825676e-07, + "loss": 0.6931817531585693, + "step": 5176 + }, + { + "epoch": 1.82431718061674, + "grad_norm": 1.969859922329015, + "learning_rate": 4.2134356839448665e-07, + "loss": 0.5312765836715698, + "step": 5177 + }, + { + "epoch": 1.824669603524229, + "grad_norm": 1.9404158745446412, + "learning_rate": 4.1967175635647674e-07, + "loss": 0.598992109298706, + "step": 5178 + }, + { + "epoch": 1.825022026431718, + "grad_norm": 1.7631344780586065, + "learning_rate": 4.1800319654175413e-07, + "loss": 0.5844708681106567, + "step": 5179 + }, + { + "epoch": 1.825374449339207, + "grad_norm": 1.9995354508958225, + "learning_rate": 4.1633788951674357e-07, + "loss": 0.5884612798690796, + "step": 5180 + }, + { + "epoch": 1.8257268722466962, + "grad_norm": 1.72810410086028, + "learning_rate": 4.1467583584676395e-07, + "loss": 0.6038404107093811, + "step": 5181 + }, + { + "epoch": 1.826079295154185, + "grad_norm": 2.339259211755874, + "learning_rate": 4.130170360960317e-07, + "loss": 0.6511296033859253, + "step": 5182 + }, + { + "epoch": 1.826431718061674, + "grad_norm": 1.925197944351106, + "learning_rate": 4.113614908276609e-07, + "loss": 0.5884404182434082, + "step": 5183 + }, + { + "epoch": 1.826784140969163, + "grad_norm": 1.731239361884253, + "learning_rate": 4.097092006036507e-07, + "loss": 0.5549901723861694, + "step": 5184 + }, + { + "epoch": 1.827136563876652, + "grad_norm": 1.994782951411243, + "learning_rate": 4.0806016598490707e-07, + "loss": 0.561951756477356, + "step": 5185 + }, + { + "epoch": 1.8274889867841408, + "grad_norm": 1.869408348764558, + "learning_rate": 4.064143875312254e-07, + "loss": 0.6412413120269775, + "step": 5186 + }, + { + "epoch": 1.82784140969163, + "grad_norm": 1.6798143654231001, + "learning_rate": 4.0477186580129447e-07, + "loss": 0.6295674443244934, + "step": 5187 + }, + { + "epoch": 1.828193832599119, + "grad_norm": 1.6293958799120483, + "learning_rate": 4.031326013527015e-07, + "loss": 0.6700723767280579, + "step": 5188 + }, + { + "epoch": 1.8285462555066079, + "grad_norm": 1.8215522719850648, + "learning_rate": 4.014965947419236e-07, + "loss": 0.5758254528045654, + "step": 5189 + }, + { + "epoch": 1.828898678414097, + "grad_norm": 1.9932829475641192, + "learning_rate": 3.9986384652433654e-07, + "loss": 0.6663509607315063, + "step": 5190 + }, + { + "epoch": 1.829251101321586, + "grad_norm": 1.9935453293677252, + "learning_rate": 3.982343572542069e-07, + "loss": 0.6459337472915649, + "step": 5191 + }, + { + "epoch": 1.829603524229075, + "grad_norm": 1.854876606446137, + "learning_rate": 3.9660812748469336e-07, + "loss": 0.6411766409873962, + "step": 5192 + }, + { + "epoch": 1.8299559471365638, + "grad_norm": 2.1651745240120976, + "learning_rate": 3.9498515776785207e-07, + "loss": 0.711888313293457, + "step": 5193 + }, + { + "epoch": 1.8303083700440528, + "grad_norm": 2.2389356684810284, + "learning_rate": 3.933654486546312e-07, + "loss": 0.63288813829422, + "step": 5194 + }, + { + "epoch": 1.8306607929515417, + "grad_norm": 1.9048245223498055, + "learning_rate": 3.9174900069486985e-07, + "loss": 0.6330822706222534, + "step": 5195 + }, + { + "epoch": 1.8310132158590309, + "grad_norm": 2.0831179708663154, + "learning_rate": 3.901358144373035e-07, + "loss": 0.7242149114608765, + "step": 5196 + }, + { + "epoch": 1.8313656387665198, + "grad_norm": 1.8790323108631095, + "learning_rate": 3.885258904295575e-07, + "loss": 0.6741703748703003, + "step": 5197 + }, + { + "epoch": 1.831718061674009, + "grad_norm": 1.9200909143991698, + "learning_rate": 3.8691922921815226e-07, + "loss": 0.625057578086853, + "step": 5198 + }, + { + "epoch": 1.832070484581498, + "grad_norm": 2.457846968244059, + "learning_rate": 3.853158313484995e-07, + "loss": 0.673669159412384, + "step": 5199 + }, + { + "epoch": 1.8324229074889868, + "grad_norm": 1.7310768756301407, + "learning_rate": 3.837156973648992e-07, + "loss": 0.5981203317642212, + "step": 5200 + }, + { + "epoch": 1.8327753303964758, + "grad_norm": 2.2560941225086992, + "learning_rate": 3.821188278105514e-07, + "loss": 0.6577199697494507, + "step": 5201 + }, + { + "epoch": 1.8331277533039647, + "grad_norm": 1.8570769012933126, + "learning_rate": 3.805252232275414e-07, + "loss": 0.6951043605804443, + "step": 5202 + }, + { + "epoch": 1.8334801762114536, + "grad_norm": 1.874325920944958, + "learning_rate": 3.7893488415684964e-07, + "loss": 0.572435200214386, + "step": 5203 + }, + { + "epoch": 1.8338325991189426, + "grad_norm": 1.7906206085216059, + "learning_rate": 3.773478111383455e-07, + "loss": 0.5849496126174927, + "step": 5204 + }, + { + "epoch": 1.8341850220264317, + "grad_norm": 1.9908368337543014, + "learning_rate": 3.7576400471079023e-07, + "loss": 0.5380967855453491, + "step": 5205 + }, + { + "epoch": 1.8345374449339207, + "grad_norm": 1.7322293442190257, + "learning_rate": 3.7418346541183923e-07, + "loss": 0.5681222677230835, + "step": 5206 + }, + { + "epoch": 1.8348898678414098, + "grad_norm": 1.7551676131968534, + "learning_rate": 3.7260619377803677e-07, + "loss": 0.5012099146842957, + "step": 5207 + }, + { + "epoch": 1.8352422907488988, + "grad_norm": 1.9889231090545432, + "learning_rate": 3.710321903448133e-07, + "loss": 0.6175205707550049, + "step": 5208 + }, + { + "epoch": 1.8355947136563877, + "grad_norm": 2.0658320822662137, + "learning_rate": 3.6946145564649817e-07, + "loss": 0.6190954446792603, + "step": 5209 + }, + { + "epoch": 1.8359471365638766, + "grad_norm": 2.067936609981899, + "learning_rate": 3.678939902163048e-07, + "loss": 0.6820691823959351, + "step": 5210 + }, + { + "epoch": 1.8362995594713656, + "grad_norm": 1.6116358163190896, + "learning_rate": 3.6632979458633867e-07, + "loss": 0.5309683084487915, + "step": 5211 + }, + { + "epoch": 1.8366519823788545, + "grad_norm": 1.7416007879814253, + "learning_rate": 3.6476886928759726e-07, + "loss": 0.5110820531845093, + "step": 5212 + }, + { + "epoch": 1.8370044052863435, + "grad_norm": 1.723221372899004, + "learning_rate": 3.6321121484996447e-07, + "loss": 0.6226333975791931, + "step": 5213 + }, + { + "epoch": 1.8373568281938326, + "grad_norm": 2.234178040191492, + "learning_rate": 3.6165683180221735e-07, + "loss": 0.6287777423858643, + "step": 5214 + }, + { + "epoch": 1.8377092511013216, + "grad_norm": 1.9295755553308827, + "learning_rate": 3.601057206720182e-07, + "loss": 0.7033661603927612, + "step": 5215 + }, + { + "epoch": 1.8380616740088107, + "grad_norm": 2.3805238150126473, + "learning_rate": 3.5855788198592257e-07, + "loss": 0.5841168165206909, + "step": 5216 + }, + { + "epoch": 1.8384140969162996, + "grad_norm": 1.9475866760038651, + "learning_rate": 3.570133162693734e-07, + "loss": 0.6797176599502563, + "step": 5217 + }, + { + "epoch": 1.8387665198237886, + "grad_norm": 1.8282916435885754, + "learning_rate": 3.5547202404670246e-07, + "loss": 0.4317880868911743, + "step": 5218 + }, + { + "epoch": 1.8391189427312775, + "grad_norm": 1.8334146730463823, + "learning_rate": 3.5393400584113004e-07, + "loss": 0.4757443368434906, + "step": 5219 + }, + { + "epoch": 1.8394713656387665, + "grad_norm": 1.907804753373484, + "learning_rate": 3.5239926217476627e-07, + "loss": 0.6341856718063354, + "step": 5220 + }, + { + "epoch": 1.8398237885462554, + "grad_norm": 1.8320811149781473, + "learning_rate": 3.5086779356860777e-07, + "loss": 0.5401504039764404, + "step": 5221 + }, + { + "epoch": 1.8401762114537445, + "grad_norm": 1.9485378653698677, + "learning_rate": 3.4933960054254314e-07, + "loss": 0.507185697555542, + "step": 5222 + }, + { + "epoch": 1.8405286343612335, + "grad_norm": 1.8475072625751607, + "learning_rate": 3.478146836153418e-07, + "loss": 0.544599175453186, + "step": 5223 + }, + { + "epoch": 1.8408810572687224, + "grad_norm": 1.7516560167770228, + "learning_rate": 3.4629304330466964e-07, + "loss": 0.5231183767318726, + "step": 5224 + }, + { + "epoch": 1.8412334801762116, + "grad_norm": 1.9594972590005177, + "learning_rate": 3.447746801270746e-07, + "loss": 0.5505118370056152, + "step": 5225 + }, + { + "epoch": 1.8415859030837005, + "grad_norm": 1.8779318369867126, + "learning_rate": 3.432595945979944e-07, + "loss": 0.6056097149848938, + "step": 5226 + }, + { + "epoch": 1.8419383259911895, + "grad_norm": 2.1828814894071806, + "learning_rate": 3.4174778723175204e-07, + "loss": 0.6292518377304077, + "step": 5227 + }, + { + "epoch": 1.8422907488986784, + "grad_norm": 2.121254282924953, + "learning_rate": 3.4023925854156035e-07, + "loss": 0.6821235418319702, + "step": 5228 + }, + { + "epoch": 1.8426431718061673, + "grad_norm": 1.8646887822875091, + "learning_rate": 3.3873400903951636e-07, + "loss": 0.6663388013839722, + "step": 5229 + }, + { + "epoch": 1.8429955947136563, + "grad_norm": 1.7699721471254064, + "learning_rate": 3.3723203923660795e-07, + "loss": 0.5283368825912476, + "step": 5230 + }, + { + "epoch": 1.8433480176211454, + "grad_norm": 1.8757843861417383, + "learning_rate": 3.35733349642704e-07, + "loss": 0.6193508505821228, + "step": 5231 + }, + { + "epoch": 1.8437004405286344, + "grad_norm": 1.8277200643148488, + "learning_rate": 3.3423794076656635e-07, + "loss": 0.5790667533874512, + "step": 5232 + }, + { + "epoch": 1.8440528634361235, + "grad_norm": 1.8773326611638317, + "learning_rate": 3.3274581311583786e-07, + "loss": 0.5774649381637573, + "step": 5233 + }, + { + "epoch": 1.8444052863436124, + "grad_norm": 1.8907427086265292, + "learning_rate": 3.312569671970489e-07, + "loss": 0.7818938493728638, + "step": 5234 + }, + { + "epoch": 1.8447577092511014, + "grad_norm": 1.9327729742836703, + "learning_rate": 3.297714035156174e-07, + "loss": 0.7140024900436401, + "step": 5235 + }, + { + "epoch": 1.8451101321585903, + "grad_norm": 1.8813227413168874, + "learning_rate": 3.2828912257584664e-07, + "loss": 0.526549220085144, + "step": 5236 + }, + { + "epoch": 1.8454625550660793, + "grad_norm": 1.7801884231788352, + "learning_rate": 3.268101248809219e-07, + "loss": 0.5497986078262329, + "step": 5237 + }, + { + "epoch": 1.8458149779735682, + "grad_norm": 1.8669723447216968, + "learning_rate": 3.2533441093292153e-07, + "loss": 0.587260901927948, + "step": 5238 + }, + { + "epoch": 1.8461674008810571, + "grad_norm": 1.7543011465942289, + "learning_rate": 3.238619812327992e-07, + "loss": 0.6064329147338867, + "step": 5239 + }, + { + "epoch": 1.8465198237885463, + "grad_norm": 1.6866654405083865, + "learning_rate": 3.22392836280403e-07, + "loss": 0.5427783727645874, + "step": 5240 + }, + { + "epoch": 1.8468722466960352, + "grad_norm": 2.007154381007414, + "learning_rate": 3.209269765744605e-07, + "loss": 0.6315155029296875, + "step": 5241 + }, + { + "epoch": 1.8472246696035244, + "grad_norm": 1.8683798567232428, + "learning_rate": 3.194644026125848e-07, + "loss": 0.47614991664886475, + "step": 5242 + }, + { + "epoch": 1.8475770925110133, + "grad_norm": 1.7870378472192856, + "learning_rate": 3.1800511489127553e-07, + "loss": 0.4671345353126526, + "step": 5243 + }, + { + "epoch": 1.8479295154185023, + "grad_norm": 2.1401583736619774, + "learning_rate": 3.1654911390591404e-07, + "loss": 0.5751510262489319, + "step": 5244 + }, + { + "epoch": 1.8482819383259912, + "grad_norm": 1.8052174793154305, + "learning_rate": 3.1509640015076946e-07, + "loss": 0.41024816036224365, + "step": 5245 + }, + { + "epoch": 1.8486343612334801, + "grad_norm": 1.731551636677765, + "learning_rate": 3.136469741189918e-07, + "loss": 0.5401195287704468, + "step": 5246 + }, + { + "epoch": 1.848986784140969, + "grad_norm": 1.653370854405324, + "learning_rate": 3.1220083630261413e-07, + "loss": 0.526515007019043, + "step": 5247 + }, + { + "epoch": 1.849339207048458, + "grad_norm": 1.8913718815401968, + "learning_rate": 3.1075798719255813e-07, + "loss": 0.5476140975952148, + "step": 5248 + }, + { + "epoch": 1.8496916299559472, + "grad_norm": 1.8985078398075201, + "learning_rate": 3.093184272786254e-07, + "loss": 0.5542911291122437, + "step": 5249 + }, + { + "epoch": 1.850044052863436, + "grad_norm": 1.880723497688654, + "learning_rate": 3.078821570495005e-07, + "loss": 0.5147569179534912, + "step": 5250 + }, + { + "epoch": 1.8503964757709253, + "grad_norm": 1.982026450369604, + "learning_rate": 3.0644917699275355e-07, + "loss": 0.5774611830711365, + "step": 5251 + }, + { + "epoch": 1.8507488986784142, + "grad_norm": 1.7200421440570042, + "learning_rate": 3.0501948759483646e-07, + "loss": 0.6516300439834595, + "step": 5252 + }, + { + "epoch": 1.8511013215859031, + "grad_norm": 2.0195950340864495, + "learning_rate": 3.0359308934108435e-07, + "loss": 0.7598013877868652, + "step": 5253 + }, + { + "epoch": 1.851453744493392, + "grad_norm": 2.0638022912417506, + "learning_rate": 3.0216998271571653e-07, + "loss": 0.5605336427688599, + "step": 5254 + }, + { + "epoch": 1.851806167400881, + "grad_norm": 2.028778763216705, + "learning_rate": 3.007501682018288e-07, + "loss": 0.6549514532089233, + "step": 5255 + }, + { + "epoch": 1.85215859030837, + "grad_norm": 2.059939172990393, + "learning_rate": 2.993336462814089e-07, + "loss": 0.5390901565551758, + "step": 5256 + }, + { + "epoch": 1.8525110132158589, + "grad_norm": 1.812559235788011, + "learning_rate": 2.979204174353201e-07, + "loss": 0.5039275884628296, + "step": 5257 + }, + { + "epoch": 1.852863436123348, + "grad_norm": 1.6793203683546194, + "learning_rate": 2.9651048214330956e-07, + "loss": 0.4715292453765869, + "step": 5258 + }, + { + "epoch": 1.853215859030837, + "grad_norm": 1.5445048853459802, + "learning_rate": 2.951038408840068e-07, + "loss": 0.4593687653541565, + "step": 5259 + }, + { + "epoch": 1.8535682819383261, + "grad_norm": 2.427211613937901, + "learning_rate": 2.9370049413492084e-07, + "loss": 0.8451346158981323, + "step": 5260 + }, + { + "epoch": 1.853920704845815, + "grad_norm": 1.796887553027914, + "learning_rate": 2.923004423724474e-07, + "loss": 0.5567130446434021, + "step": 5261 + }, + { + "epoch": 1.854273127753304, + "grad_norm": 1.6019285108338794, + "learning_rate": 2.909036860718595e-07, + "loss": 0.4740293622016907, + "step": 5262 + }, + { + "epoch": 1.854625550660793, + "grad_norm": 1.566732286884799, + "learning_rate": 2.895102257073101e-07, + "loss": 0.5279378294944763, + "step": 5263 + }, + { + "epoch": 1.8549779735682819, + "grad_norm": 2.0699049521167923, + "learning_rate": 2.881200617518387e-07, + "loss": 0.5977471470832825, + "step": 5264 + }, + { + "epoch": 1.8553303964757708, + "grad_norm": 2.147594228172352, + "learning_rate": 2.8673319467736104e-07, + "loss": 0.5385996699333191, + "step": 5265 + }, + { + "epoch": 1.85568281938326, + "grad_norm": 2.011382389323699, + "learning_rate": 2.85349624954675e-07, + "loss": 0.5702279806137085, + "step": 5266 + }, + { + "epoch": 1.856035242290749, + "grad_norm": 1.875774247263156, + "learning_rate": 2.839693530534604e-07, + "loss": 0.584097146987915, + "step": 5267 + }, + { + "epoch": 1.8563876651982378, + "grad_norm": 1.9561416110933127, + "learning_rate": 2.825923794422758e-07, + "loss": 0.6205782890319824, + "step": 5268 + }, + { + "epoch": 1.856740088105727, + "grad_norm": 1.8766933117628495, + "learning_rate": 2.8121870458856284e-07, + "loss": 0.5626852512359619, + "step": 5269 + }, + { + "epoch": 1.857092511013216, + "grad_norm": 1.826792073608219, + "learning_rate": 2.798483289586396e-07, + "loss": 0.6052513122558594, + "step": 5270 + }, + { + "epoch": 1.8574449339207049, + "grad_norm": 2.051566447554152, + "learning_rate": 2.7848125301770504e-07, + "loss": 0.5074095726013184, + "step": 5271 + }, + { + "epoch": 1.8577973568281938, + "grad_norm": 2.3608926664844705, + "learning_rate": 2.7711747722984127e-07, + "loss": 0.8006119728088379, + "step": 5272 + }, + { + "epoch": 1.8581497797356827, + "grad_norm": 1.939365874771501, + "learning_rate": 2.7575700205800694e-07, + "loss": 0.6437188982963562, + "step": 5273 + }, + { + "epoch": 1.8585022026431717, + "grad_norm": 2.070323156152843, + "learning_rate": 2.743998279640403e-07, + "loss": 0.6610177755355835, + "step": 5274 + }, + { + "epoch": 1.8588546255506608, + "grad_norm": 2.242727394045801, + "learning_rate": 2.7304595540865953e-07, + "loss": 0.6041977405548096, + "step": 5275 + }, + { + "epoch": 1.8592070484581498, + "grad_norm": 2.296252009493085, + "learning_rate": 2.716953848514625e-07, + "loss": 0.5684002041816711, + "step": 5276 + }, + { + "epoch": 1.859559471365639, + "grad_norm": 2.108426771462305, + "learning_rate": 2.703481167509281e-07, + "loss": 0.7256498336791992, + "step": 5277 + }, + { + "epoch": 1.8599118942731279, + "grad_norm": 1.959590007863519, + "learning_rate": 2.690041515644093e-07, + "loss": 0.7264266014099121, + "step": 5278 + }, + { + "epoch": 1.8602643171806168, + "grad_norm": 2.0027244373685047, + "learning_rate": 2.6766348974813895e-07, + "loss": 0.5427879095077515, + "step": 5279 + }, + { + "epoch": 1.8606167400881057, + "grad_norm": 1.679848534564951, + "learning_rate": 2.663261317572341e-07, + "loss": 0.5970745086669922, + "step": 5280 + }, + { + "epoch": 1.8609691629955947, + "grad_norm": 1.9989999209106484, + "learning_rate": 2.6499207804568495e-07, + "loss": 0.5796299576759338, + "step": 5281 + }, + { + "epoch": 1.8613215859030836, + "grad_norm": 1.6433355014728201, + "learning_rate": 2.6366132906635923e-07, + "loss": 0.4900246262550354, + "step": 5282 + }, + { + "epoch": 1.8616740088105725, + "grad_norm": 1.8937189873731617, + "learning_rate": 2.6233388527100777e-07, + "loss": 0.6052582263946533, + "step": 5283 + }, + { + "epoch": 1.8620264317180617, + "grad_norm": 2.1632344831004127, + "learning_rate": 2.610097471102524e-07, + "loss": 0.6908484697341919, + "step": 5284 + }, + { + "epoch": 1.8623788546255506, + "grad_norm": 1.9493448159947622, + "learning_rate": 2.596889150336024e-07, + "loss": 0.6353795528411865, + "step": 5285 + }, + { + "epoch": 1.8627312775330398, + "grad_norm": 2.019445353702499, + "learning_rate": 2.5837138948943354e-07, + "loss": 0.803575873374939, + "step": 5286 + }, + { + "epoch": 1.8630837004405287, + "grad_norm": 1.9882041113358364, + "learning_rate": 2.5705717092500694e-07, + "loss": 0.5551957488059998, + "step": 5287 + }, + { + "epoch": 1.8634361233480177, + "grad_norm": 1.9987103830633048, + "learning_rate": 2.5574625978646017e-07, + "loss": 0.6247879266738892, + "step": 5288 + }, + { + "epoch": 1.8637885462555066, + "grad_norm": 2.072117287811421, + "learning_rate": 2.544386565188062e-07, + "loss": 0.6029977798461914, + "step": 5289 + }, + { + "epoch": 1.8641409691629955, + "grad_norm": 2.101747258049668, + "learning_rate": 2.531343615659343e-07, + "loss": 0.611297070980072, + "step": 5290 + }, + { + "epoch": 1.8644933920704845, + "grad_norm": 2.1168170865355616, + "learning_rate": 2.518333753706137e-07, + "loss": 0.5290260314941406, + "step": 5291 + }, + { + "epoch": 1.8648458149779734, + "grad_norm": 1.88270236786552, + "learning_rate": 2.5053569837448664e-07, + "loss": 0.5988795757293701, + "step": 5292 + }, + { + "epoch": 1.8651982378854626, + "grad_norm": 2.1933893236783613, + "learning_rate": 2.4924133101807636e-07, + "loss": 0.671028733253479, + "step": 5293 + }, + { + "epoch": 1.8655506607929515, + "grad_norm": 2.195163128107634, + "learning_rate": 2.4795027374077905e-07, + "loss": 0.5741167664527893, + "step": 5294 + }, + { + "epoch": 1.8659030837004407, + "grad_norm": 1.8793688638635475, + "learning_rate": 2.4666252698086867e-07, + "loss": 0.47447216510772705, + "step": 5295 + }, + { + "epoch": 1.8662555066079296, + "grad_norm": 1.813537542020307, + "learning_rate": 2.453780911754955e-07, + "loss": 0.6535651087760925, + "step": 5296 + }, + { + "epoch": 1.8666079295154185, + "grad_norm": 1.830958965071389, + "learning_rate": 2.4409696676068517e-07, + "loss": 0.5928847193717957, + "step": 5297 + }, + { + "epoch": 1.8669603524229075, + "grad_norm": 2.1016696944101363, + "learning_rate": 2.428191541713387e-07, + "loss": 0.5928774476051331, + "step": 5298 + }, + { + "epoch": 1.8673127753303964, + "grad_norm": 1.8181831294339377, + "learning_rate": 2.415446538412358e-07, + "loss": 0.5798670053482056, + "step": 5299 + }, + { + "epoch": 1.8676651982378853, + "grad_norm": 1.8162014512536164, + "learning_rate": 2.4027346620302707e-07, + "loss": 0.6222843527793884, + "step": 5300 + }, + { + "epoch": 1.8680176211453743, + "grad_norm": 1.9183032685045331, + "learning_rate": 2.39005591688245e-07, + "loss": 0.5501612424850464, + "step": 5301 + }, + { + "epoch": 1.8683700440528634, + "grad_norm": 1.7621857286720093, + "learning_rate": 2.377410307272887e-07, + "loss": 0.5266422033309937, + "step": 5302 + }, + { + "epoch": 1.8687224669603524, + "grad_norm": 1.9926692528436012, + "learning_rate": 2.3647978374944037e-07, + "loss": 0.7145729064941406, + "step": 5303 + }, + { + "epoch": 1.8690748898678415, + "grad_norm": 1.8939089473542137, + "learning_rate": 2.3522185118285411e-07, + "loss": 0.6505781412124634, + "step": 5304 + }, + { + "epoch": 1.8694273127753305, + "grad_norm": 2.0817226286854607, + "learning_rate": 2.3396723345455728e-07, + "loss": 0.6278528571128845, + "step": 5305 + }, + { + "epoch": 1.8697797356828194, + "grad_norm": 1.790557343760165, + "learning_rate": 2.3271593099045475e-07, + "loss": 0.5650503039360046, + "step": 5306 + }, + { + "epoch": 1.8701321585903083, + "grad_norm": 1.6157546701422072, + "learning_rate": 2.314679442153256e-07, + "loss": 0.6267939209938049, + "step": 5307 + }, + { + "epoch": 1.8704845814977973, + "grad_norm": 1.874302486649101, + "learning_rate": 2.302232735528187e-07, + "loss": 0.45913875102996826, + "step": 5308 + }, + { + "epoch": 1.8708370044052862, + "grad_norm": 1.7607480001908633, + "learning_rate": 2.289819194254661e-07, + "loss": 0.6122059226036072, + "step": 5309 + }, + { + "epoch": 1.8711894273127754, + "grad_norm": 1.803806841150382, + "learning_rate": 2.2774388225466514e-07, + "loss": 0.6479405164718628, + "step": 5310 + }, + { + "epoch": 1.8715418502202643, + "grad_norm": 1.8546829656575279, + "learning_rate": 2.26509162460693e-07, + "loss": 0.5013849139213562, + "step": 5311 + }, + { + "epoch": 1.8718942731277532, + "grad_norm": 1.749663744266161, + "learning_rate": 2.2527776046269767e-07, + "loss": 0.6431373357772827, + "step": 5312 + }, + { + "epoch": 1.8722466960352424, + "grad_norm": 1.669095711801791, + "learning_rate": 2.2404967667870147e-07, + "loss": 0.6447317004203796, + "step": 5313 + }, + { + "epoch": 1.8725991189427313, + "grad_norm": 2.405218866271529, + "learning_rate": 2.2282491152560203e-07, + "loss": 0.5784682631492615, + "step": 5314 + }, + { + "epoch": 1.8729515418502203, + "grad_norm": 1.7544004376252713, + "learning_rate": 2.2160346541916677e-07, + "loss": 0.560835599899292, + "step": 5315 + }, + { + "epoch": 1.8733039647577092, + "grad_norm": 1.7162975954294335, + "learning_rate": 2.2038533877404066e-07, + "loss": 0.5930913686752319, + "step": 5316 + }, + { + "epoch": 1.8736563876651982, + "grad_norm": 1.9892540663354406, + "learning_rate": 2.1917053200374073e-07, + "loss": 0.7221095561981201, + "step": 5317 + }, + { + "epoch": 1.874008810572687, + "grad_norm": 1.9380281400359725, + "learning_rate": 2.179590455206515e-07, + "loss": 0.6307567358016968, + "step": 5318 + }, + { + "epoch": 1.8743612334801762, + "grad_norm": 2.0190052317760814, + "learning_rate": 2.167508797360396e-07, + "loss": 0.6158597469329834, + "step": 5319 + }, + { + "epoch": 1.8747136563876652, + "grad_norm": 1.7468326387459954, + "learning_rate": 2.1554603506003802e-07, + "loss": 0.5778557062149048, + "step": 5320 + }, + { + "epoch": 1.8750660792951543, + "grad_norm": 1.497372593580549, + "learning_rate": 2.1434451190165294e-07, + "loss": 0.5213632583618164, + "step": 5321 + }, + { + "epoch": 1.8754185022026433, + "grad_norm": 1.8555907678767487, + "learning_rate": 2.131463106687659e-07, + "loss": 0.6633203029632568, + "step": 5322 + }, + { + "epoch": 1.8757709251101322, + "grad_norm": 1.9991798348617227, + "learning_rate": 2.1195143176812817e-07, + "loss": 0.6586780548095703, + "step": 5323 + }, + { + "epoch": 1.8761233480176212, + "grad_norm": 1.991978810673319, + "learning_rate": 2.1075987560536305e-07, + "loss": 0.4946047067642212, + "step": 5324 + }, + { + "epoch": 1.87647577092511, + "grad_norm": 1.6744690075916624, + "learning_rate": 2.0957164258497031e-07, + "loss": 0.5689302682876587, + "step": 5325 + }, + { + "epoch": 1.876828193832599, + "grad_norm": 1.9550201402383367, + "learning_rate": 2.0838673311031287e-07, + "loss": 0.5761843323707581, + "step": 5326 + }, + { + "epoch": 1.877180616740088, + "grad_norm": 1.6070623974889393, + "learning_rate": 2.0720514758363343e-07, + "loss": 0.5714447498321533, + "step": 5327 + }, + { + "epoch": 1.8775330396475771, + "grad_norm": 1.7537019465709125, + "learning_rate": 2.0602688640604441e-07, + "loss": 0.4566301107406616, + "step": 5328 + }, + { + "epoch": 1.877885462555066, + "grad_norm": 2.110089760102471, + "learning_rate": 2.04851949977527e-07, + "loss": 0.6326137781143188, + "step": 5329 + }, + { + "epoch": 1.8782378854625552, + "grad_norm": 1.8775980517302555, + "learning_rate": 2.036803386969355e-07, + "loss": 0.6342206001281738, + "step": 5330 + }, + { + "epoch": 1.8785903083700441, + "grad_norm": 1.9958405881870251, + "learning_rate": 2.0251205296199616e-07, + "loss": 0.5525872707366943, + "step": 5331 + }, + { + "epoch": 1.878942731277533, + "grad_norm": 1.6965395036886874, + "learning_rate": 2.0134709316930733e-07, + "loss": 0.4932950735092163, + "step": 5332 + }, + { + "epoch": 1.879295154185022, + "grad_norm": 1.7918605717870588, + "learning_rate": 2.001854597143349e-07, + "loss": 0.6526485681533813, + "step": 5333 + }, + { + "epoch": 1.879647577092511, + "grad_norm": 1.8862781919579625, + "learning_rate": 1.990271529914156e-07, + "loss": 0.6256940960884094, + "step": 5334 + }, + { + "epoch": 1.88, + "grad_norm": 2.361417623387243, + "learning_rate": 1.9787217339376053e-07, + "loss": 0.6406987905502319, + "step": 5335 + }, + { + "epoch": 1.8803524229074888, + "grad_norm": 1.812802653812012, + "learning_rate": 1.9672052131345043e-07, + "loss": 0.6141321659088135, + "step": 5336 + }, + { + "epoch": 1.880704845814978, + "grad_norm": 2.025004487176686, + "learning_rate": 1.955721971414326e-07, + "loss": 0.558428943157196, + "step": 5337 + }, + { + "epoch": 1.881057268722467, + "grad_norm": 1.973943138705469, + "learning_rate": 1.9442720126752968e-07, + "loss": 0.5995065569877625, + "step": 5338 + }, + { + "epoch": 1.881409691629956, + "grad_norm": 1.6822565518265986, + "learning_rate": 1.932855340804296e-07, + "loss": 0.5109822750091553, + "step": 5339 + }, + { + "epoch": 1.881762114537445, + "grad_norm": 1.941646392245956, + "learning_rate": 1.921471959676957e-07, + "loss": 0.6695220470428467, + "step": 5340 + }, + { + "epoch": 1.882114537444934, + "grad_norm": 1.8857636319654494, + "learning_rate": 1.9101218731575777e-07, + "loss": 0.6982283592224121, + "step": 5341 + }, + { + "epoch": 1.882466960352423, + "grad_norm": 1.8944501787373655, + "learning_rate": 1.8988050850991314e-07, + "loss": 0.6475410461425781, + "step": 5342 + }, + { + "epoch": 1.8828193832599118, + "grad_norm": 1.7449353446414906, + "learning_rate": 1.8875215993433448e-07, + "loss": 0.57706218957901, + "step": 5343 + }, + { + "epoch": 1.8831718061674008, + "grad_norm": 1.708696671712054, + "learning_rate": 1.8762714197205988e-07, + "loss": 0.5243045091629028, + "step": 5344 + }, + { + "epoch": 1.88352422907489, + "grad_norm": 1.797956034726921, + "learning_rate": 1.865054550049994e-07, + "loss": 0.6208887100219727, + "step": 5345 + }, + { + "epoch": 1.8838766519823789, + "grad_norm": 1.9048581772706628, + "learning_rate": 1.853870994139284e-07, + "loss": 0.5572443008422852, + "step": 5346 + }, + { + "epoch": 1.8842290748898678, + "grad_norm": 1.7939928987370566, + "learning_rate": 1.8427207557849436e-07, + "loss": 0.5673031806945801, + "step": 5347 + }, + { + "epoch": 1.884581497797357, + "grad_norm": 1.6894216214789064, + "learning_rate": 1.8316038387721558e-07, + "loss": 0.5085422992706299, + "step": 5348 + }, + { + "epoch": 1.8849339207048459, + "grad_norm": 1.7455381888238348, + "learning_rate": 1.8205202468747463e-07, + "loss": 0.5480824708938599, + "step": 5349 + }, + { + "epoch": 1.8852863436123348, + "grad_norm": 1.7848642016680003, + "learning_rate": 1.8094699838552387e-07, + "loss": 0.6236293911933899, + "step": 5350 + }, + { + "epoch": 1.8856387665198238, + "grad_norm": 1.7626474829765526, + "learning_rate": 1.798453053464888e-07, + "loss": 0.541741132736206, + "step": 5351 + }, + { + "epoch": 1.8859911894273127, + "grad_norm": 1.7289887528200605, + "learning_rate": 1.7874694594435692e-07, + "loss": 0.5309538245201111, + "step": 5352 + }, + { + "epoch": 1.8863436123348016, + "grad_norm": 1.944311199542912, + "learning_rate": 1.7765192055198888e-07, + "loss": 0.5886228084564209, + "step": 5353 + }, + { + "epoch": 1.8866960352422908, + "grad_norm": 1.6415851491633797, + "learning_rate": 1.7656022954111064e-07, + "loss": 0.6216265559196472, + "step": 5354 + }, + { + "epoch": 1.8870484581497797, + "grad_norm": 1.6922081510439257, + "learning_rate": 1.7547187328231575e-07, + "loss": 0.5393999814987183, + "step": 5355 + }, + { + "epoch": 1.8874008810572689, + "grad_norm": 1.7167987260272457, + "learning_rate": 1.74386852145072e-07, + "loss": 0.583373486995697, + "step": 5356 + }, + { + "epoch": 1.8877533039647578, + "grad_norm": 2.361225928566298, + "learning_rate": 1.73305166497707e-07, + "loss": 0.6403313875198364, + "step": 5357 + }, + { + "epoch": 1.8881057268722468, + "grad_norm": 1.771396849548527, + "learning_rate": 1.7222681670741814e-07, + "loss": 0.5780963897705078, + "step": 5358 + }, + { + "epoch": 1.8884581497797357, + "grad_norm": 1.59802053134679, + "learning_rate": 1.711518031402748e-07, + "loss": 0.6046397686004639, + "step": 5359 + }, + { + "epoch": 1.8888105726872246, + "grad_norm": 1.5504259730519754, + "learning_rate": 1.700801261612084e-07, + "loss": 0.5582219362258911, + "step": 5360 + }, + { + "epoch": 1.8891629955947136, + "grad_norm": 1.962329345083699, + "learning_rate": 1.6901178613402125e-07, + "loss": 0.4880410432815552, + "step": 5361 + }, + { + "epoch": 1.8895154185022025, + "grad_norm": 2.055990524297856, + "learning_rate": 1.6794678342138105e-07, + "loss": 0.7417550086975098, + "step": 5362 + }, + { + "epoch": 1.8898678414096917, + "grad_norm": 1.8316934396355506, + "learning_rate": 1.668851183848219e-07, + "loss": 0.4616948962211609, + "step": 5363 + }, + { + "epoch": 1.8902202643171806, + "grad_norm": 1.6177478399502592, + "learning_rate": 1.658267913847489e-07, + "loss": 0.5595716834068298, + "step": 5364 + }, + { + "epoch": 1.8905726872246698, + "grad_norm": 1.9610306002643032, + "learning_rate": 1.6477180278042793e-07, + "loss": 0.72450852394104, + "step": 5365 + }, + { + "epoch": 1.8909251101321587, + "grad_norm": 1.8036541582694667, + "learning_rate": 1.637201529299959e-07, + "loss": 0.6261592507362366, + "step": 5366 + }, + { + "epoch": 1.8912775330396476, + "grad_norm": 2.1024939179342823, + "learning_rate": 1.6267184219045607e-07, + "loss": 0.5023064613342285, + "step": 5367 + }, + { + "epoch": 1.8916299559471366, + "grad_norm": 1.9210322300280602, + "learning_rate": 1.6162687091767714e-07, + "loss": 0.7113457918167114, + "step": 5368 + }, + { + "epoch": 1.8919823788546255, + "grad_norm": 1.9212954550271457, + "learning_rate": 1.6058523946639426e-07, + "loss": 0.5376787185668945, + "step": 5369 + }, + { + "epoch": 1.8923348017621144, + "grad_norm": 1.86817536856008, + "learning_rate": 1.5954694819020788e-07, + "loss": 0.6523979902267456, + "step": 5370 + }, + { + "epoch": 1.8926872246696034, + "grad_norm": 1.841265437549123, + "learning_rate": 1.5851199744158607e-07, + "loss": 0.6610705852508545, + "step": 5371 + }, + { + "epoch": 1.8930396475770925, + "grad_norm": 2.0967966308369053, + "learning_rate": 1.5748038757186445e-07, + "loss": 0.657126247882843, + "step": 5372 + }, + { + "epoch": 1.8933920704845815, + "grad_norm": 2.3300722251609893, + "learning_rate": 1.5645211893123846e-07, + "loss": 0.7247096300125122, + "step": 5373 + }, + { + "epoch": 1.8937444933920706, + "grad_norm": 1.5063549897958597, + "learning_rate": 1.5542719186877553e-07, + "loss": 0.5392117500305176, + "step": 5374 + }, + { + "epoch": 1.8940969162995596, + "grad_norm": 1.706529406386883, + "learning_rate": 1.5440560673240735e-07, + "loss": 0.5038361549377441, + "step": 5375 + }, + { + "epoch": 1.8944493392070485, + "grad_norm": 1.9403637299706042, + "learning_rate": 1.5338736386892982e-07, + "loss": 0.4768316447734833, + "step": 5376 + }, + { + "epoch": 1.8948017621145374, + "grad_norm": 1.7917263966392405, + "learning_rate": 1.5237246362400316e-07, + "loss": 0.5925793051719666, + "step": 5377 + }, + { + "epoch": 1.8951541850220264, + "grad_norm": 2.029166285154972, + "learning_rate": 1.5136090634215616e-07, + "loss": 0.47840988636016846, + "step": 5378 + }, + { + "epoch": 1.8955066079295153, + "grad_norm": 1.9172034216887006, + "learning_rate": 1.5035269236677974e-07, + "loss": 0.6365169882774353, + "step": 5379 + }, + { + "epoch": 1.8958590308370042, + "grad_norm": 1.789950493711397, + "learning_rate": 1.4934782204013344e-07, + "loss": 0.6287797689437866, + "step": 5380 + }, + { + "epoch": 1.8962114537444934, + "grad_norm": 1.8420293657892082, + "learning_rate": 1.4834629570333548e-07, + "loss": 0.6859137415885925, + "step": 5381 + }, + { + "epoch": 1.8965638766519823, + "grad_norm": 1.9365437650034845, + "learning_rate": 1.4734811369637725e-07, + "loss": 0.5545040369033813, + "step": 5382 + }, + { + "epoch": 1.8969162995594715, + "grad_norm": 1.6857031681916985, + "learning_rate": 1.463532763581077e-07, + "loss": 0.6418923139572144, + "step": 5383 + }, + { + "epoch": 1.8972687224669604, + "grad_norm": 4.115242480246632, + "learning_rate": 1.4536178402624334e-07, + "loss": 0.7618488669395447, + "step": 5384 + }, + { + "epoch": 1.8976211453744494, + "grad_norm": 1.7790399709296727, + "learning_rate": 1.4437363703736718e-07, + "loss": 0.6178286671638489, + "step": 5385 + }, + { + "epoch": 1.8979735682819383, + "grad_norm": 2.33955789440919, + "learning_rate": 1.4338883572692087e-07, + "loss": 0.6800570487976074, + "step": 5386 + }, + { + "epoch": 1.8983259911894272, + "grad_norm": 1.9056441030293936, + "learning_rate": 1.4240738042921588e-07, + "loss": 0.6063584089279175, + "step": 5387 + }, + { + "epoch": 1.8986784140969162, + "grad_norm": 1.857878498727731, + "learning_rate": 1.4142927147742792e-07, + "loss": 0.5631873607635498, + "step": 5388 + }, + { + "epoch": 1.8990308370044053, + "grad_norm": 1.6999145603505723, + "learning_rate": 1.4045450920358917e-07, + "loss": 0.5346484184265137, + "step": 5389 + }, + { + "epoch": 1.8993832599118943, + "grad_norm": 1.660876208730021, + "learning_rate": 1.3948309393860605e-07, + "loss": 0.5043535232543945, + "step": 5390 + }, + { + "epoch": 1.8997356828193832, + "grad_norm": 1.9091498065078292, + "learning_rate": 1.3851502601224032e-07, + "loss": 0.6591805219650269, + "step": 5391 + }, + { + "epoch": 1.9000881057268724, + "grad_norm": 1.777554153966534, + "learning_rate": 1.3755030575312355e-07, + "loss": 0.6831244826316833, + "step": 5392 + }, + { + "epoch": 1.9004405286343613, + "grad_norm": 1.744983267268657, + "learning_rate": 1.3658893348874714e-07, + "loss": 0.6572617292404175, + "step": 5393 + }, + { + "epoch": 1.9007929515418502, + "grad_norm": 2.007956379457216, + "learning_rate": 1.3563090954546555e-07, + "loss": 0.5834530591964722, + "step": 5394 + }, + { + "epoch": 1.9011453744493392, + "grad_norm": 1.8405418946212868, + "learning_rate": 1.3467623424850084e-07, + "loss": 0.5810972452163696, + "step": 5395 + }, + { + "epoch": 1.9014977973568281, + "grad_norm": 1.8342670520255937, + "learning_rate": 1.3372490792193493e-07, + "loss": 0.6338596940040588, + "step": 5396 + }, + { + "epoch": 1.901850220264317, + "grad_norm": 2.4739742581402946, + "learning_rate": 1.327769308887117e-07, + "loss": 0.5274045467376709, + "step": 5397 + }, + { + "epoch": 1.9022026431718062, + "grad_norm": 2.13415646905843, + "learning_rate": 1.3183230347064147e-07, + "loss": 0.5416278839111328, + "step": 5398 + }, + { + "epoch": 1.9025550660792951, + "grad_norm": 1.8878260396672215, + "learning_rate": 1.3089102598839442e-07, + "loss": 0.4818935692310333, + "step": 5399 + }, + { + "epoch": 1.9029074889867843, + "grad_norm": 1.6383283062285148, + "learning_rate": 1.299530987615072e-07, + "loss": 0.4553770124912262, + "step": 5400 + }, + { + "epoch": 1.9032599118942732, + "grad_norm": 1.7060011862412936, + "learning_rate": 1.2901852210837507e-07, + "loss": 0.5663920640945435, + "step": 5401 + }, + { + "epoch": 1.9036123348017622, + "grad_norm": 1.975611905778012, + "learning_rate": 1.2808729634625872e-07, + "loss": 0.5654638409614563, + "step": 5402 + }, + { + "epoch": 1.903964757709251, + "grad_norm": 2.0012288604540136, + "learning_rate": 1.271594217912797e-07, + "loss": 0.8061939477920532, + "step": 5403 + }, + { + "epoch": 1.90431718061674, + "grad_norm": 2.149695499003911, + "learning_rate": 1.2623489875842276e-07, + "loss": 0.5832188129425049, + "step": 5404 + }, + { + "epoch": 1.904669603524229, + "grad_norm": 1.8966385092802618, + "learning_rate": 1.2531372756153458e-07, + "loss": 0.6112633943557739, + "step": 5405 + }, + { + "epoch": 1.905022026431718, + "grad_norm": 2.3113031929819106, + "learning_rate": 1.2439590851332394e-07, + "loss": 0.7083494663238525, + "step": 5406 + }, + { + "epoch": 1.905374449339207, + "grad_norm": 1.9110441437452201, + "learning_rate": 1.2348144192536272e-07, + "loss": 0.5319055318832397, + "step": 5407 + }, + { + "epoch": 1.905726872246696, + "grad_norm": 1.9724655581165889, + "learning_rate": 1.2257032810808256e-07, + "loss": 0.6199945211410522, + "step": 5408 + }, + { + "epoch": 1.9060792951541852, + "grad_norm": 2.3233890606574503, + "learning_rate": 1.2166256737077942e-07, + "loss": 0.6596004962921143, + "step": 5409 + }, + { + "epoch": 1.906431718061674, + "grad_norm": 1.9040617554840082, + "learning_rate": 1.20758160021609e-07, + "loss": 0.553988516330719, + "step": 5410 + }, + { + "epoch": 1.906784140969163, + "grad_norm": 2.329855084255152, + "learning_rate": 1.1985710636759128e-07, + "loss": 0.6295895576477051, + "step": 5411 + }, + { + "epoch": 1.907136563876652, + "grad_norm": 2.035449496855298, + "learning_rate": 1.1895940671460271e-07, + "loss": 0.6555598378181458, + "step": 5412 + }, + { + "epoch": 1.907488986784141, + "grad_norm": 1.8252966820746244, + "learning_rate": 1.1806506136738616e-07, + "loss": 0.48203831911087036, + "step": 5413 + }, + { + "epoch": 1.9078414096916299, + "grad_norm": 2.0052153848511045, + "learning_rate": 1.1717407062954434e-07, + "loss": 0.6632858514785767, + "step": 5414 + }, + { + "epoch": 1.9081938325991188, + "grad_norm": 1.913108464706502, + "learning_rate": 1.1628643480354085e-07, + "loss": 0.6058870553970337, + "step": 5415 + }, + { + "epoch": 1.908546255506608, + "grad_norm": 1.6689328390033278, + "learning_rate": 1.1540215419070022e-07, + "loss": 0.5106638073921204, + "step": 5416 + }, + { + "epoch": 1.9088986784140969, + "grad_norm": 1.965112171139023, + "learning_rate": 1.1452122909120788e-07, + "loss": 0.6641250848770142, + "step": 5417 + }, + { + "epoch": 1.909251101321586, + "grad_norm": 1.7797017689691026, + "learning_rate": 1.1364365980411019e-07, + "loss": 0.4823518395423889, + "step": 5418 + }, + { + "epoch": 1.909603524229075, + "grad_norm": 1.7374946519813605, + "learning_rate": 1.127694466273166e-07, + "loss": 0.5770869255065918, + "step": 5419 + }, + { + "epoch": 1.909955947136564, + "grad_norm": 1.8439547121423094, + "learning_rate": 1.1189858985759306e-07, + "loss": 0.5120491981506348, + "step": 5420 + }, + { + "epoch": 1.9103083700440529, + "grad_norm": 1.998054444662161, + "learning_rate": 1.1103108979056865e-07, + "loss": 0.6742277145385742, + "step": 5421 + }, + { + "epoch": 1.9106607929515418, + "grad_norm": 1.7361045655014782, + "learning_rate": 1.1016694672073336e-07, + "loss": 0.6053510904312134, + "step": 5422 + }, + { + "epoch": 1.9110132158590307, + "grad_norm": 2.276872906150792, + "learning_rate": 1.0930616094143698e-07, + "loss": 0.5598228573799133, + "step": 5423 + }, + { + "epoch": 1.9113656387665197, + "grad_norm": 1.7689371613585823, + "learning_rate": 1.0844873274488799e-07, + "loss": 0.599521279335022, + "step": 5424 + }, + { + "epoch": 1.9117180616740088, + "grad_norm": 2.270274631303626, + "learning_rate": 1.075946624221591e-07, + "loss": 0.5986596345901489, + "step": 5425 + }, + { + "epoch": 1.9120704845814978, + "grad_norm": 2.0819173495219054, + "learning_rate": 1.067439502631773e-07, + "loss": 0.5657980442047119, + "step": 5426 + }, + { + "epoch": 1.912422907488987, + "grad_norm": 2.498725021517388, + "learning_rate": 1.0589659655673712e-07, + "loss": 0.5561040639877319, + "step": 5427 + }, + { + "epoch": 1.9127753303964758, + "grad_norm": 1.6241033411576455, + "learning_rate": 1.0505260159048513e-07, + "loss": 0.5088320970535278, + "step": 5428 + }, + { + "epoch": 1.9131277533039648, + "grad_norm": 2.1207031706665407, + "learning_rate": 1.0421196565093217e-07, + "loss": 0.5679075717926025, + "step": 5429 + }, + { + "epoch": 1.9134801762114537, + "grad_norm": 1.8775486377310404, + "learning_rate": 1.0337468902344994e-07, + "loss": 0.6701461672782898, + "step": 5430 + }, + { + "epoch": 1.9138325991189427, + "grad_norm": 1.7839638341554918, + "learning_rate": 1.0254077199226553e-07, + "loss": 0.6172112822532654, + "step": 5431 + }, + { + "epoch": 1.9141850220264316, + "grad_norm": 1.904067212081221, + "learning_rate": 1.0171021484046806e-07, + "loss": 0.5926263332366943, + "step": 5432 + }, + { + "epoch": 1.9145374449339208, + "grad_norm": 1.7190787727179386, + "learning_rate": 1.0088301785000754e-07, + "loss": 0.6142431497573853, + "step": 5433 + }, + { + "epoch": 1.9148898678414097, + "grad_norm": 1.7095738070807496, + "learning_rate": 1.0005918130168934e-07, + "loss": 0.5367780923843384, + "step": 5434 + }, + { + "epoch": 1.9152422907488986, + "grad_norm": 1.8769142431022592, + "learning_rate": 9.923870547518311e-08, + "loss": 0.5241641998291016, + "step": 5435 + }, + { + "epoch": 1.9155947136563878, + "grad_norm": 1.7765958549274539, + "learning_rate": 9.842159064901157e-08, + "loss": 0.5906308889389038, + "step": 5436 + }, + { + "epoch": 1.9159471365638767, + "grad_norm": 2.1275572555046613, + "learning_rate": 9.760783710056176e-08, + "loss": 0.5411181449890137, + "step": 5437 + }, + { + "epoch": 1.9162995594713657, + "grad_norm": 1.9001328464490854, + "learning_rate": 9.679744510607825e-08, + "loss": 0.6313618421554565, + "step": 5438 + }, + { + "epoch": 1.9166519823788546, + "grad_norm": 2.0658646856716336, + "learning_rate": 9.599041494066208e-08, + "loss": 0.6330033540725708, + "step": 5439 + }, + { + "epoch": 1.9170044052863435, + "grad_norm": 1.9617429681187768, + "learning_rate": 9.518674687827634e-08, + "loss": 0.5859507322311401, + "step": 5440 + }, + { + "epoch": 1.9173568281938325, + "grad_norm": 1.9233196169731877, + "learning_rate": 9.438644119174057e-08, + "loss": 0.571119487285614, + "step": 5441 + }, + { + "epoch": 1.9177092511013216, + "grad_norm": 1.683294616332208, + "learning_rate": 9.3589498152733e-08, + "loss": 0.6114518046379089, + "step": 5442 + }, + { + "epoch": 1.9180616740088106, + "grad_norm": 2.0948221060814407, + "learning_rate": 9.279591803179277e-08, + "loss": 0.5762027502059937, + "step": 5443 + }, + { + "epoch": 1.9184140969162997, + "grad_norm": 1.973540736612678, + "learning_rate": 9.200570109831441e-08, + "loss": 0.6081440448760986, + "step": 5444 + }, + { + "epoch": 1.9187665198237887, + "grad_norm": 1.9242540837021294, + "learning_rate": 9.121884762055222e-08, + "loss": 0.5682440996170044, + "step": 5445 + }, + { + "epoch": 1.9191189427312776, + "grad_norm": 1.642224199268087, + "learning_rate": 9.043535786561919e-08, + "loss": 0.5290100574493408, + "step": 5446 + }, + { + "epoch": 1.9194713656387665, + "grad_norm": 1.8013641871034827, + "learning_rate": 8.965523209948367e-08, + "loss": 0.5743255019187927, + "step": 5447 + }, + { + "epoch": 1.9198237885462555, + "grad_norm": 1.6357977481393366, + "learning_rate": 8.887847058697718e-08, + "loss": 0.5955618023872375, + "step": 5448 + }, + { + "epoch": 1.9201762114537444, + "grad_norm": 1.9706217525454803, + "learning_rate": 8.810507359178322e-08, + "loss": 0.4732915759086609, + "step": 5449 + }, + { + "epoch": 1.9205286343612333, + "grad_norm": 3.2730228664607797, + "learning_rate": 8.733504137644621e-08, + "loss": 0.6712108850479126, + "step": 5450 + }, + { + "epoch": 1.9208810572687225, + "grad_norm": 1.997966446518774, + "learning_rate": 8.656837420237152e-08, + "loss": 0.5169811248779297, + "step": 5451 + }, + { + "epoch": 1.9212334801762114, + "grad_norm": 1.9146732631772796, + "learning_rate": 8.580507232981428e-08, + "loss": 0.6117082238197327, + "step": 5452 + }, + { + "epoch": 1.9215859030837006, + "grad_norm": 1.7690878518096709, + "learning_rate": 8.504513601789388e-08, + "loss": 0.7020283937454224, + "step": 5453 + }, + { + "epoch": 1.9219383259911895, + "grad_norm": 1.806111695783304, + "learning_rate": 8.42885655245862e-08, + "loss": 0.5489979386329651, + "step": 5454 + }, + { + "epoch": 1.9222907488986785, + "grad_norm": 1.8218906131330599, + "learning_rate": 8.353536110672133e-08, + "loss": 0.5361644625663757, + "step": 5455 + }, + { + "epoch": 1.9226431718061674, + "grad_norm": 1.8728336665856926, + "learning_rate": 8.278552301998921e-08, + "loss": 0.6470010280609131, + "step": 5456 + }, + { + "epoch": 1.9229955947136563, + "grad_norm": 1.5338046694887773, + "learning_rate": 8.203905151893731e-08, + "loss": 0.4642202854156494, + "step": 5457 + }, + { + "epoch": 1.9233480176211453, + "grad_norm": 2.1878989180883357, + "learning_rate": 8.129594685696852e-08, + "loss": 0.6817516088485718, + "step": 5458 + }, + { + "epoch": 1.9237004405286342, + "grad_norm": 1.7544221338170298, + "learning_rate": 8.055620928634433e-08, + "loss": 0.5748617649078369, + "step": 5459 + }, + { + "epoch": 1.9240528634361234, + "grad_norm": 1.9928156109239001, + "learning_rate": 7.981983905818281e-08, + "loss": 0.6730939149856567, + "step": 5460 + }, + { + "epoch": 1.9244052863436123, + "grad_norm": 1.665760800669473, + "learning_rate": 7.90868364224584e-08, + "loss": 0.46469685435295105, + "step": 5461 + }, + { + "epoch": 1.9247577092511015, + "grad_norm": 2.0844638903136907, + "learning_rate": 7.835720162800209e-08, + "loss": 0.5633926391601562, + "step": 5462 + }, + { + "epoch": 1.9251101321585904, + "grad_norm": 2.034693536740542, + "learning_rate": 7.76309349225035e-08, + "loss": 0.5813394784927368, + "step": 5463 + }, + { + "epoch": 1.9254625550660793, + "grad_norm": 1.4118750743542163, + "learning_rate": 7.690803655250656e-08, + "loss": 0.39959418773651123, + "step": 5464 + }, + { + "epoch": 1.9258149779735683, + "grad_norm": 1.7685280750016403, + "learning_rate": 7.618850676341383e-08, + "loss": 0.6136372089385986, + "step": 5465 + }, + { + "epoch": 1.9261674008810572, + "grad_norm": 1.7393751984149959, + "learning_rate": 7.547234579948104e-08, + "loss": 0.6664354801177979, + "step": 5466 + }, + { + "epoch": 1.9265198237885461, + "grad_norm": 1.8827898065352628, + "learning_rate": 7.475955390382483e-08, + "loss": 0.6009566783905029, + "step": 5467 + }, + { + "epoch": 1.9268722466960353, + "grad_norm": 1.7872694267120686, + "learning_rate": 7.405013131841499e-08, + "loss": 0.7307299375534058, + "step": 5468 + }, + { + "epoch": 1.9272246696035242, + "grad_norm": 1.8234703336391604, + "learning_rate": 7.334407828407885e-08, + "loss": 0.5459531545639038, + "step": 5469 + }, + { + "epoch": 1.9275770925110132, + "grad_norm": 2.1252744976115583, + "learning_rate": 7.264139504049916e-08, + "loss": 0.6230820417404175, + "step": 5470 + }, + { + "epoch": 1.9279295154185023, + "grad_norm": 1.6781926619362313, + "learning_rate": 7.194208182621509e-08, + "loss": 0.5282379984855652, + "step": 5471 + }, + { + "epoch": 1.9282819383259913, + "grad_norm": 2.1980396503246604, + "learning_rate": 7.12461388786212e-08, + "loss": 0.626023530960083, + "step": 5472 + }, + { + "epoch": 1.9286343612334802, + "grad_norm": 2.1608211937841197, + "learning_rate": 7.055356643396849e-08, + "loss": 0.6897492408752441, + "step": 5473 + }, + { + "epoch": 1.9289867841409691, + "grad_norm": 1.7214187213722456, + "learning_rate": 6.986436472736447e-08, + "loss": 0.583849310874939, + "step": 5474 + }, + { + "epoch": 1.929339207048458, + "grad_norm": 1.7492909983006562, + "learning_rate": 6.917853399277197e-08, + "loss": 0.6056735515594482, + "step": 5475 + }, + { + "epoch": 1.929691629955947, + "grad_norm": 1.8166317563571888, + "learning_rate": 6.849607446300699e-08, + "loss": 0.52838134765625, + "step": 5476 + }, + { + "epoch": 1.9300440528634362, + "grad_norm": 2.0425025849187954, + "learning_rate": 6.781698636974532e-08, + "loss": 0.6466653943061829, + "step": 5477 + }, + { + "epoch": 1.930396475770925, + "grad_norm": 1.9593462888477349, + "learning_rate": 6.714126994351589e-08, + "loss": 0.6570286750793457, + "step": 5478 + }, + { + "epoch": 1.9307488986784143, + "grad_norm": 2.4867358577799576, + "learning_rate": 6.646892541370409e-08, + "loss": 0.7303042411804199, + "step": 5479 + }, + { + "epoch": 1.9311013215859032, + "grad_norm": 1.7938376915708092, + "learning_rate": 6.579995300854846e-08, + "loss": 0.5556488037109375, + "step": 5480 + }, + { + "epoch": 1.9314537444933921, + "grad_norm": 1.9624740523274589, + "learning_rate": 6.513435295514404e-08, + "loss": 0.6673456430435181, + "step": 5481 + }, + { + "epoch": 1.931806167400881, + "grad_norm": 1.9681067241776358, + "learning_rate": 6.447212547944448e-08, + "loss": 0.5605199337005615, + "step": 5482 + }, + { + "epoch": 1.93215859030837, + "grad_norm": 2.1935053480556785, + "learning_rate": 6.381327080625111e-08, + "loss": 0.5455278158187866, + "step": 5483 + }, + { + "epoch": 1.932511013215859, + "grad_norm": 1.8919678372461928, + "learning_rate": 6.315778915922722e-08, + "loss": 0.5371166467666626, + "step": 5484 + }, + { + "epoch": 1.9328634361233479, + "grad_norm": 1.9114985069981878, + "learning_rate": 6.250568076088814e-08, + "loss": 0.5873486399650574, + "step": 5485 + }, + { + "epoch": 1.933215859030837, + "grad_norm": 1.706006640351556, + "learning_rate": 6.18569458326046e-08, + "loss": 0.4187420606613159, + "step": 5486 + }, + { + "epoch": 1.933568281938326, + "grad_norm": 1.900919435061996, + "learning_rate": 6.121158459460042e-08, + "loss": 0.6006373167037964, + "step": 5487 + }, + { + "epoch": 1.9339207048458151, + "grad_norm": 1.819026585986156, + "learning_rate": 6.056959726595702e-08, + "loss": 0.6022043228149414, + "step": 5488 + }, + { + "epoch": 1.934273127753304, + "grad_norm": 2.037720704211898, + "learning_rate": 5.993098406460895e-08, + "loss": 0.6324778199195862, + "step": 5489 + }, + { + "epoch": 1.934625550660793, + "grad_norm": 2.0263189254585026, + "learning_rate": 5.929574520734505e-08, + "loss": 0.545529305934906, + "step": 5490 + }, + { + "epoch": 1.934977973568282, + "grad_norm": 1.9957592171950855, + "learning_rate": 5.8663880909809454e-08, + "loss": 0.623627781867981, + "step": 5491 + }, + { + "epoch": 1.9353303964757709, + "grad_norm": 1.9773130682504432, + "learning_rate": 5.80353913865006e-08, + "loss": 0.529983639717102, + "step": 5492 + }, + { + "epoch": 1.9356828193832598, + "grad_norm": 1.8301905692374867, + "learning_rate": 5.7410276850770055e-08, + "loss": 0.638504147529602, + "step": 5493 + }, + { + "epoch": 1.9360352422907487, + "grad_norm": 1.7706026455559263, + "learning_rate": 5.678853751482694e-08, + "loss": 0.6822696924209595, + "step": 5494 + }, + { + "epoch": 1.936387665198238, + "grad_norm": 1.6924491917110376, + "learning_rate": 5.6170173589730204e-08, + "loss": 0.5454750061035156, + "step": 5495 + }, + { + "epoch": 1.9367400881057268, + "grad_norm": 2.1428203564618915, + "learning_rate": 5.555518528539638e-08, + "loss": 0.5301260948181152, + "step": 5496 + }, + { + "epoch": 1.937092511013216, + "grad_norm": 1.965552985899495, + "learning_rate": 5.4943572810594035e-08, + "loss": 0.697251558303833, + "step": 5497 + }, + { + "epoch": 1.937444933920705, + "grad_norm": 1.8589631146352448, + "learning_rate": 5.433533637294819e-08, + "loss": 0.5171586871147156, + "step": 5498 + }, + { + "epoch": 1.9377973568281939, + "grad_norm": 1.974708525019113, + "learning_rate": 5.373047617893479e-08, + "loss": 0.6006083488464355, + "step": 5499 + }, + { + "epoch": 1.9381497797356828, + "grad_norm": 1.8914658578007237, + "learning_rate": 5.312899243388403e-08, + "loss": 0.6083849668502808, + "step": 5500 + } + ], + "logging_steps": 1, + "max_steps": 5676, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1700693086617600.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-5500/training_args.bin b/checkpoint-5500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3d16cae9f8126645d9b722fd466525457b2f8a90 --- /dev/null +++ b/checkpoint-5500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db5e3f64fea9062d775ce1214f2b31fbf79ffdfb10af7998752ce02faa3d3dd5 +size 6968 diff --git a/checkpoint-5500/zero_to_fp32.py b/checkpoint-5500/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/checkpoint-5500/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-5600/README.md b/checkpoint-5600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4005c4d8e7a819833408da4794e4e74d2ced6553 --- /dev/null +++ b/checkpoint-5600/README.md @@ -0,0 +1,208 @@ +--- +base_model: Qwen/Qwen2.5-VL-7B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-5600/adapter_config.json b/checkpoint-5600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8224bce2b962b82e55c954c000a28629995b1870 --- /dev/null +++ b/checkpoint-5600/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.8.mlp.gate_proj", + "layers.20.mlp.up_proj", + "layers.25.mlp.down_proj", + "layers.21.mlp.down_proj", + "layers.22.mlp.gate_proj", + "layers.23.mlp.gate_proj", + "layers.11.mlp.gate_proj", + "layers.13.mlp.down_proj", + "layers.8.mlp.down_proj", + "layers.18.mlp.gate_proj", + "layers.7.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.20.mlp.gate_proj", + "layers.16.mlp.down_proj", + "layers.23.mlp.down_proj", + "layers.12.mlp.gate_proj", + "layers.12.mlp.down_proj", + "layers.1.mlp.up_proj", + "layers.2.mlp.gate_proj", + "layers.15.mlp.down_proj", + "layers.25.mlp.up_proj", + "layers.27.mlp.down_proj", + "layers.19.mlp.down_proj", + "layers.14.mlp.gate_proj", + "layers.27.mlp.gate_proj", + "layers.18.mlp.up_proj", + "layers.2.mlp.up_proj", + "layers.0.mlp.gate_proj", + "q_proj", + "layers.10.mlp.down_proj", + "layers.11.mlp.up_proj", + "layers.15.mlp.up_proj", + "layers.7.mlp.up_proj", + "layers.8.mlp.up_proj", + "layers.3.mlp.up_proj", + "layers.10.mlp.up_proj", + "layers.23.mlp.up_proj", + "layers.19.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.0.mlp.up_proj", + "layers.12.mlp.up_proj", + "layers.13.mlp.up_proj", + "o_proj", + "layers.5.mlp.down_proj", + "layers.14.mlp.down_proj", + "layers.2.mlp.down_proj", + "layers.6.mlp.down_proj", + "layers.6.mlp.gate_proj", + "layers.25.mlp.gate_proj", + "layers.3.mlp.down_proj", + "k_proj", + "layers.17.mlp.gate_proj", + "layers.24.mlp.gate_proj", + "layers.1.mlp.gate_proj", + "layers.4.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.0.mlp.down_proj", + "layers.9.mlp.gate_proj", + "layers.5.mlp.gate_proj", + "layers.24.mlp.up_proj", + "layers.20.mlp.down_proj", + "layers.16.mlp.gate_proj", + "layers.26.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.22.mlp.down_proj", + "layers.16.mlp.up_proj", + "v_proj", + "layers.27.mlp.up_proj", + "layers.13.mlp.gate_proj", + "layers.21.mlp.up_proj", + "layers.4.mlp.gate_proj", + "layers.9.mlp.up_proj", + "layers.3.mlp.gate_proj", + "layers.7.mlp.gate_proj", + "layers.4.mlp.up_proj", + "layers.24.mlp.down_proj", + "layers.26.mlp.gate_proj", + "layers.22.mlp.up_proj", + "layers.11.mlp.down_proj", + "layers.10.mlp.gate_proj", + "layers.5.mlp.up_proj", + "layers.21.mlp.gate_proj", + "layers.6.mlp.up_proj", + "layers.19.mlp.up_proj", + "layers.14.mlp.up_proj", + "layers.26.mlp.down_proj", + "layers.17.mlp.up_proj", + "layers.15.mlp.gate_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-5600/adapter_model.safetensors b/checkpoint-5600/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..637d781db8f10d626af09fcd0e4d8ea069d26490 --- /dev/null +++ b/checkpoint-5600/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74257e848b750a49698bdf41eb6c843a5b8aebff3d20ff7a08082a98f0275bc8 +size 323020440 diff --git a/checkpoint-5600/chat_template.jinja b/checkpoint-5600/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/checkpoint-5600/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-5600/global_step5600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-5600/global_step5600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..74305e41956169d39a86d648454260dbc42851e5 --- /dev/null +++ b/checkpoint-5600/global_step5600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24f79b77fb3e1f3cdd9e3663e7bae80c69554861acbbc9262c12249e947fb08c +size 1937772272 diff --git a/checkpoint-5600/global_step5600/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-5600/global_step5600/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2ef1d7ef2489d7fe34610c45776c262e64e8264 --- /dev/null +++ b/checkpoint-5600/global_step5600/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:675e0e63308ccc692519955ed0c9f0f4ac7d45849d952e2628f4040f90eae053 +size 460630 diff --git a/checkpoint-5600/latest b/checkpoint-5600/latest new file mode 100644 index 0000000000000000000000000000000000000000..78b8fe3c22bdf229583622ff3d38fb1865c2fdc0 --- /dev/null +++ b/checkpoint-5600/latest @@ -0,0 +1 @@ +global_step5600 \ No newline at end of file diff --git a/checkpoint-5600/processor_config.json b/checkpoint-5600/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/checkpoint-5600/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/checkpoint-5600/rng_state.pth b/checkpoint-5600/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7814adab650500f5954ee9f4503d797f0801caaf --- /dev/null +++ b/checkpoint-5600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edb95d607e9ed3b3532def29a25c505bd834fd7a1ae8bfc3adf494b6202062a0 +size 14244 diff --git a/checkpoint-5600/scheduler.pt b/checkpoint-5600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a7fa76ac79394023e6a4a839c1691703600834d1 --- /dev/null +++ b/checkpoint-5600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36b8cbda23b894149f146dbadc4aa74984388359a02a31795495fbe29077a3e5 +size 1000 diff --git a/checkpoint-5600/tokenizer.json b/checkpoint-5600/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/checkpoint-5600/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/checkpoint-5600/tokenizer_config.json b/checkpoint-5600/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/checkpoint-5600/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-5600/trainer_state.json b/checkpoint-5600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6e7dfea476eb236920d0c256b920acf0d6f9d6ac --- /dev/null +++ b/checkpoint-5600/trainer_state.json @@ -0,0 +1,39234 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9733920704845815, + "eval_steps": 500, + "global_step": 5600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0003524229074889868, + "grad_norm": 1.6512674233185107, + "learning_rate": 0.0, + "loss": 1.493973731994629, + "step": 1 + }, + { + "epoch": 0.0007048458149779736, + "grad_norm": 1.4463228571593894, + "learning_rate": 7.042253521126761e-08, + "loss": 1.3692013025283813, + "step": 2 + }, + { + "epoch": 0.0010572687224669603, + "grad_norm": 1.4036766254408197, + "learning_rate": 1.4084507042253522e-07, + "loss": 1.3996260166168213, + "step": 3 + }, + { + "epoch": 0.0014096916299559472, + "grad_norm": 1.29446596506829, + "learning_rate": 2.1126760563380284e-07, + "loss": 1.3011515140533447, + "step": 4 + }, + { + "epoch": 0.001762114537444934, + "grad_norm": 1.5130555881795185, + "learning_rate": 2.8169014084507043e-07, + "loss": 1.3736083507537842, + "step": 5 + }, + { + "epoch": 0.0021145374449339205, + "grad_norm": 1.247517750517551, + "learning_rate": 3.521126760563381e-07, + "loss": 1.051241159439087, + "step": 6 + }, + { + "epoch": 0.0024669603524229075, + "grad_norm": 1.611437944890658, + "learning_rate": 4.225352112676057e-07, + "loss": 1.2594621181488037, + "step": 7 + }, + { + "epoch": 0.0028193832599118945, + "grad_norm": 1.4604380967241444, + "learning_rate": 4.929577464788733e-07, + "loss": 1.0498416423797607, + "step": 8 + }, + { + "epoch": 0.003171806167400881, + "grad_norm": 1.367174801368101, + "learning_rate": 5.633802816901409e-07, + "loss": 1.3313459157943726, + "step": 9 + }, + { + "epoch": 0.003524229074889868, + "grad_norm": 1.4378623823320218, + "learning_rate": 6.338028169014085e-07, + "loss": 1.2484922409057617, + "step": 10 + }, + { + "epoch": 0.0038766519823788545, + "grad_norm": 1.197911167360161, + "learning_rate": 7.042253521126762e-07, + "loss": 1.097194790840149, + "step": 11 + }, + { + "epoch": 0.004229074889867841, + "grad_norm": 1.3767897701080816, + "learning_rate": 7.746478873239437e-07, + "loss": 1.3065136671066284, + "step": 12 + }, + { + "epoch": 0.0045814977973568285, + "grad_norm": 1.2501177622273731, + "learning_rate": 8.450704225352114e-07, + "loss": 1.1574026346206665, + "step": 13 + }, + { + "epoch": 0.004933920704845815, + "grad_norm": 1.3002699887597202, + "learning_rate": 9.154929577464789e-07, + "loss": 1.1509445905685425, + "step": 14 + }, + { + "epoch": 0.0052863436123348016, + "grad_norm": 1.3458236321153771, + "learning_rate": 9.859154929577465e-07, + "loss": 1.069403886795044, + "step": 15 + }, + { + "epoch": 0.005638766519823789, + "grad_norm": 1.52712721337833, + "learning_rate": 1.0563380281690142e-06, + "loss": 1.1731287240982056, + "step": 16 + }, + { + "epoch": 0.0059911894273127755, + "grad_norm": 1.5628075837505453, + "learning_rate": 1.1267605633802817e-06, + "loss": 0.9314254522323608, + "step": 17 + }, + { + "epoch": 0.006343612334801762, + "grad_norm": 1.3686084350519343, + "learning_rate": 1.1971830985915492e-06, + "loss": 1.2915008068084717, + "step": 18 + }, + { + "epoch": 0.006696035242290749, + "grad_norm": 1.2653916141417434, + "learning_rate": 1.267605633802817e-06, + "loss": 1.1088309288024902, + "step": 19 + }, + { + "epoch": 0.007048458149779736, + "grad_norm": 1.362753082153478, + "learning_rate": 1.3380281690140844e-06, + "loss": 1.21511709690094, + "step": 20 + }, + { + "epoch": 0.0074008810572687225, + "grad_norm": 1.3054604275805306, + "learning_rate": 1.4084507042253523e-06, + "loss": 1.241409420967102, + "step": 21 + }, + { + "epoch": 0.007753303964757709, + "grad_norm": 1.3646723208790772, + "learning_rate": 1.4788732394366198e-06, + "loss": 1.2170014381408691, + "step": 22 + }, + { + "epoch": 0.008105726872246696, + "grad_norm": 1.424586503093174, + "learning_rate": 1.5492957746478873e-06, + "loss": 1.1405870914459229, + "step": 23 + }, + { + "epoch": 0.008458149779735682, + "grad_norm": 1.429368633092772, + "learning_rate": 1.6197183098591552e-06, + "loss": 1.122542381286621, + "step": 24 + }, + { + "epoch": 0.00881057268722467, + "grad_norm": 1.2201478884239083, + "learning_rate": 1.6901408450704227e-06, + "loss": 1.1686937808990479, + "step": 25 + }, + { + "epoch": 0.009162995594713657, + "grad_norm": 1.4065678272985154, + "learning_rate": 1.7605633802816902e-06, + "loss": 1.215955376625061, + "step": 26 + }, + { + "epoch": 0.009515418502202643, + "grad_norm": 1.3879787249393913, + "learning_rate": 1.8309859154929579e-06, + "loss": 1.075179100036621, + "step": 27 + }, + { + "epoch": 0.00986784140969163, + "grad_norm": 1.2313632017619234, + "learning_rate": 1.9014084507042254e-06, + "loss": 1.198237419128418, + "step": 28 + }, + { + "epoch": 0.010220264317180617, + "grad_norm": 1.6833211669458825, + "learning_rate": 1.971830985915493e-06, + "loss": 1.2356700897216797, + "step": 29 + }, + { + "epoch": 0.010572687224669603, + "grad_norm": 1.3637967517131555, + "learning_rate": 2.0422535211267608e-06, + "loss": 1.2373592853546143, + "step": 30 + }, + { + "epoch": 0.01092511013215859, + "grad_norm": 1.377232613936239, + "learning_rate": 2.1126760563380285e-06, + "loss": 1.1857718229293823, + "step": 31 + }, + { + "epoch": 0.011277533039647578, + "grad_norm": 1.3566319214936433, + "learning_rate": 2.1830985915492958e-06, + "loss": 1.1844017505645752, + "step": 32 + }, + { + "epoch": 0.011629955947136564, + "grad_norm": 1.2486508447822717, + "learning_rate": 2.2535211267605635e-06, + "loss": 1.275226354598999, + "step": 33 + }, + { + "epoch": 0.011982378854625551, + "grad_norm": 1.3044888735575617, + "learning_rate": 2.323943661971831e-06, + "loss": 1.169473648071289, + "step": 34 + }, + { + "epoch": 0.012334801762114538, + "grad_norm": 1.2608655384056326, + "learning_rate": 2.3943661971830984e-06, + "loss": 1.2182841300964355, + "step": 35 + }, + { + "epoch": 0.012687224669603524, + "grad_norm": 1.3780698009940295, + "learning_rate": 2.4647887323943666e-06, + "loss": 1.2110469341278076, + "step": 36 + }, + { + "epoch": 0.01303964757709251, + "grad_norm": 1.3829042894220551, + "learning_rate": 2.535211267605634e-06, + "loss": 1.2886571884155273, + "step": 37 + }, + { + "epoch": 0.013392070484581497, + "grad_norm": 1.2954566526081723, + "learning_rate": 2.6056338028169015e-06, + "loss": 1.0740901231765747, + "step": 38 + }, + { + "epoch": 0.013744493392070485, + "grad_norm": 1.2079072281757672, + "learning_rate": 2.676056338028169e-06, + "loss": 1.0119279623031616, + "step": 39 + }, + { + "epoch": 0.014096916299559472, + "grad_norm": 1.1460333237155051, + "learning_rate": 2.746478873239437e-06, + "loss": 1.0752044916152954, + "step": 40 + }, + { + "epoch": 0.014449339207048459, + "grad_norm": 1.3690776364650978, + "learning_rate": 2.8169014084507046e-06, + "loss": 1.345343828201294, + "step": 41 + }, + { + "epoch": 0.014801762114537445, + "grad_norm": 1.0813865739605455, + "learning_rate": 2.887323943661972e-06, + "loss": 1.102332353591919, + "step": 42 + }, + { + "epoch": 0.015154185022026432, + "grad_norm": 1.1643083589428873, + "learning_rate": 2.9577464788732396e-06, + "loss": 1.006919264793396, + "step": 43 + }, + { + "epoch": 0.015506607929515418, + "grad_norm": 1.1582412568670832, + "learning_rate": 3.0281690140845073e-06, + "loss": 1.104026436805725, + "step": 44 + }, + { + "epoch": 0.015859030837004406, + "grad_norm": 1.3060563783851553, + "learning_rate": 3.0985915492957746e-06, + "loss": 1.299152135848999, + "step": 45 + }, + { + "epoch": 0.01621145374449339, + "grad_norm": 1.4304085919726754, + "learning_rate": 3.1690140845070427e-06, + "loss": 1.1075072288513184, + "step": 46 + }, + { + "epoch": 0.01656387665198238, + "grad_norm": 0.9865545367526579, + "learning_rate": 3.2394366197183104e-06, + "loss": 1.0296107530593872, + "step": 47 + }, + { + "epoch": 0.016916299559471364, + "grad_norm": 1.1960961939132708, + "learning_rate": 3.3098591549295777e-06, + "loss": 1.1097803115844727, + "step": 48 + }, + { + "epoch": 0.017268722466960353, + "grad_norm": 1.0974682037636356, + "learning_rate": 3.3802816901408454e-06, + "loss": 0.945678174495697, + "step": 49 + }, + { + "epoch": 0.01762114537444934, + "grad_norm": 0.9924343523024514, + "learning_rate": 3.4507042253521127e-06, + "loss": 1.075556993484497, + "step": 50 + }, + { + "epoch": 0.017973568281938326, + "grad_norm": 1.0849849170905757, + "learning_rate": 3.5211267605633804e-06, + "loss": 1.0790367126464844, + "step": 51 + }, + { + "epoch": 0.018325991189427314, + "grad_norm": 1.220415189867698, + "learning_rate": 3.5915492957746485e-06, + "loss": 1.2567799091339111, + "step": 52 + }, + { + "epoch": 0.0186784140969163, + "grad_norm": 1.1058732491316554, + "learning_rate": 3.6619718309859158e-06, + "loss": 1.1437780857086182, + "step": 53 + }, + { + "epoch": 0.019030837004405287, + "grad_norm": 1.0871981925234313, + "learning_rate": 3.7323943661971835e-06, + "loss": 1.0962307453155518, + "step": 54 + }, + { + "epoch": 0.019383259911894272, + "grad_norm": 0.9603250960542756, + "learning_rate": 3.8028169014084508e-06, + "loss": 1.0149122476577759, + "step": 55 + }, + { + "epoch": 0.01973568281938326, + "grad_norm": 0.9630324155849409, + "learning_rate": 3.873239436619718e-06, + "loss": 0.9029096364974976, + "step": 56 + }, + { + "epoch": 0.02008810572687225, + "grad_norm": 1.1449327271146603, + "learning_rate": 3.943661971830986e-06, + "loss": 1.1290819644927979, + "step": 57 + }, + { + "epoch": 0.020440528634361233, + "grad_norm": 1.1046082203063978, + "learning_rate": 4.014084507042254e-06, + "loss": 1.0965365171432495, + "step": 58 + }, + { + "epoch": 0.02079295154185022, + "grad_norm": 1.2553158733514387, + "learning_rate": 4.0845070422535216e-06, + "loss": 1.2854020595550537, + "step": 59 + }, + { + "epoch": 0.021145374449339206, + "grad_norm": 1.0484971235480365, + "learning_rate": 4.154929577464789e-06, + "loss": 1.0303996801376343, + "step": 60 + }, + { + "epoch": 0.021497797356828195, + "grad_norm": 0.9670460326314384, + "learning_rate": 4.225352112676057e-06, + "loss": 1.0811198949813843, + "step": 61 + }, + { + "epoch": 0.02185022026431718, + "grad_norm": 0.992548164971829, + "learning_rate": 4.295774647887324e-06, + "loss": 1.1373648643493652, + "step": 62 + }, + { + "epoch": 0.022202643171806168, + "grad_norm": 1.009220008285868, + "learning_rate": 4.3661971830985915e-06, + "loss": 0.8717563152313232, + "step": 63 + }, + { + "epoch": 0.022555066079295156, + "grad_norm": 0.9171432664885892, + "learning_rate": 4.43661971830986e-06, + "loss": 0.9939290881156921, + "step": 64 + }, + { + "epoch": 0.02290748898678414, + "grad_norm": 1.139248361968882, + "learning_rate": 4.507042253521127e-06, + "loss": 1.1776926517486572, + "step": 65 + }, + { + "epoch": 0.02325991189427313, + "grad_norm": 0.8971048282009709, + "learning_rate": 4.577464788732395e-06, + "loss": 0.9149726629257202, + "step": 66 + }, + { + "epoch": 0.023612334801762114, + "grad_norm": 0.9597323965843616, + "learning_rate": 4.647887323943662e-06, + "loss": 0.996609091758728, + "step": 67 + }, + { + "epoch": 0.023964757709251102, + "grad_norm": 1.0074979173506051, + "learning_rate": 4.71830985915493e-06, + "loss": 1.102593183517456, + "step": 68 + }, + { + "epoch": 0.024317180616740087, + "grad_norm": 0.8938780612317906, + "learning_rate": 4.788732394366197e-06, + "loss": 1.0912048816680908, + "step": 69 + }, + { + "epoch": 0.024669603524229075, + "grad_norm": 0.936561005612989, + "learning_rate": 4.859154929577465e-06, + "loss": 1.1192498207092285, + "step": 70 + }, + { + "epoch": 0.025022026431718063, + "grad_norm": 1.059387656590118, + "learning_rate": 4.929577464788733e-06, + "loss": 1.0358459949493408, + "step": 71 + }, + { + "epoch": 0.025374449339207048, + "grad_norm": 0.9588756664450253, + "learning_rate": 5e-06, + "loss": 1.076169490814209, + "step": 72 + }, + { + "epoch": 0.025726872246696036, + "grad_norm": 1.355361750045824, + "learning_rate": 5.070422535211268e-06, + "loss": 0.9906084537506104, + "step": 73 + }, + { + "epoch": 0.02607929515418502, + "grad_norm": 1.187443908189842, + "learning_rate": 5.140845070422536e-06, + "loss": 0.8163654804229736, + "step": 74 + }, + { + "epoch": 0.02643171806167401, + "grad_norm": 0.9764403954844053, + "learning_rate": 5.211267605633803e-06, + "loss": 1.140099048614502, + "step": 75 + }, + { + "epoch": 0.026784140969162994, + "grad_norm": 0.863156257130764, + "learning_rate": 5.28169014084507e-06, + "loss": 0.7654916048049927, + "step": 76 + }, + { + "epoch": 0.027136563876651983, + "grad_norm": 1.0935626721226286, + "learning_rate": 5.352112676056338e-06, + "loss": 0.9476499557495117, + "step": 77 + }, + { + "epoch": 0.02748898678414097, + "grad_norm": 1.0875377862843238, + "learning_rate": 5.422535211267607e-06, + "loss": 1.120811939239502, + "step": 78 + }, + { + "epoch": 0.027841409691629956, + "grad_norm": 0.9578356099138406, + "learning_rate": 5.492957746478874e-06, + "loss": 0.9745736122131348, + "step": 79 + }, + { + "epoch": 0.028193832599118944, + "grad_norm": 1.074452545035149, + "learning_rate": 5.563380281690142e-06, + "loss": 1.086181879043579, + "step": 80 + }, + { + "epoch": 0.02854625550660793, + "grad_norm": 0.9510910839018534, + "learning_rate": 5.633802816901409e-06, + "loss": 0.9904681444168091, + "step": 81 + }, + { + "epoch": 0.028898678414096917, + "grad_norm": 1.0958191892945044, + "learning_rate": 5.7042253521126766e-06, + "loss": 1.0311436653137207, + "step": 82 + }, + { + "epoch": 0.029251101321585902, + "grad_norm": 1.066849780091366, + "learning_rate": 5.774647887323944e-06, + "loss": 0.996998131275177, + "step": 83 + }, + { + "epoch": 0.02960352422907489, + "grad_norm": 1.1397250117300832, + "learning_rate": 5.845070422535212e-06, + "loss": 1.1526594161987305, + "step": 84 + }, + { + "epoch": 0.029955947136563875, + "grad_norm": 1.145115830862378, + "learning_rate": 5.915492957746479e-06, + "loss": 1.0914695262908936, + "step": 85 + }, + { + "epoch": 0.030308370044052863, + "grad_norm": 1.1551265503738541, + "learning_rate": 5.9859154929577465e-06, + "loss": 0.9558745622634888, + "step": 86 + }, + { + "epoch": 0.03066079295154185, + "grad_norm": 1.0734015706063305, + "learning_rate": 6.056338028169015e-06, + "loss": 0.9668983221054077, + "step": 87 + }, + { + "epoch": 0.031013215859030836, + "grad_norm": 1.2231010171085557, + "learning_rate": 6.126760563380282e-06, + "loss": 1.0132758617401123, + "step": 88 + }, + { + "epoch": 0.03136563876651982, + "grad_norm": 0.9911917466596063, + "learning_rate": 6.197183098591549e-06, + "loss": 1.0816935300827026, + "step": 89 + }, + { + "epoch": 0.03171806167400881, + "grad_norm": 1.1311803004782939, + "learning_rate": 6.267605633802818e-06, + "loss": 1.03245210647583, + "step": 90 + }, + { + "epoch": 0.0320704845814978, + "grad_norm": 1.0820347157611818, + "learning_rate": 6.3380281690140855e-06, + "loss": 0.9812602400779724, + "step": 91 + }, + { + "epoch": 0.03242290748898678, + "grad_norm": 1.0039329578342324, + "learning_rate": 6.408450704225353e-06, + "loss": 0.9303219318389893, + "step": 92 + }, + { + "epoch": 0.032775330396475774, + "grad_norm": 1.0308477549900932, + "learning_rate": 6.478873239436621e-06, + "loss": 1.175403356552124, + "step": 93 + }, + { + "epoch": 0.03312775330396476, + "grad_norm": 1.0822064194108554, + "learning_rate": 6.549295774647888e-06, + "loss": 1.1863958835601807, + "step": 94 + }, + { + "epoch": 0.033480176211453744, + "grad_norm": 1.0560683839166303, + "learning_rate": 6.619718309859155e-06, + "loss": 0.8630557060241699, + "step": 95 + }, + { + "epoch": 0.03383259911894273, + "grad_norm": 1.1683225259655636, + "learning_rate": 6.690140845070423e-06, + "loss": 1.0499619245529175, + "step": 96 + }, + { + "epoch": 0.03418502202643172, + "grad_norm": 0.9766019012274652, + "learning_rate": 6.760563380281691e-06, + "loss": 0.9443086981773376, + "step": 97 + }, + { + "epoch": 0.034537444933920705, + "grad_norm": 1.1779098792527396, + "learning_rate": 6.830985915492958e-06, + "loss": 1.0011450052261353, + "step": 98 + }, + { + "epoch": 0.03488986784140969, + "grad_norm": 1.0941166094999715, + "learning_rate": 6.901408450704225e-06, + "loss": 1.0239083766937256, + "step": 99 + }, + { + "epoch": 0.03524229074889868, + "grad_norm": 1.1605003575433563, + "learning_rate": 6.9718309859154935e-06, + "loss": 1.1335347890853882, + "step": 100 + }, + { + "epoch": 0.035594713656387666, + "grad_norm": 1.1410420869639502, + "learning_rate": 7.042253521126761e-06, + "loss": 0.9650854468345642, + "step": 101 + }, + { + "epoch": 0.03594713656387665, + "grad_norm": 1.0366491839089684, + "learning_rate": 7.112676056338029e-06, + "loss": 0.9284406900405884, + "step": 102 + }, + { + "epoch": 0.036299559471365636, + "grad_norm": 1.0368314441443032, + "learning_rate": 7.183098591549297e-06, + "loss": 0.989676296710968, + "step": 103 + }, + { + "epoch": 0.03665198237885463, + "grad_norm": 1.0475480945800932, + "learning_rate": 7.253521126760564e-06, + "loss": 0.9149842262268066, + "step": 104 + }, + { + "epoch": 0.03700440528634361, + "grad_norm": 1.0115568298427282, + "learning_rate": 7.3239436619718316e-06, + "loss": 0.9793657064437866, + "step": 105 + }, + { + "epoch": 0.0373568281938326, + "grad_norm": 1.0923401558071288, + "learning_rate": 7.3943661971831e-06, + "loss": 0.9508543014526367, + "step": 106 + }, + { + "epoch": 0.03770925110132159, + "grad_norm": 1.123373083563155, + "learning_rate": 7.464788732394367e-06, + "loss": 1.0623283386230469, + "step": 107 + }, + { + "epoch": 0.038061674008810574, + "grad_norm": 1.0472469474411819, + "learning_rate": 7.535211267605634e-06, + "loss": 1.0039314031600952, + "step": 108 + }, + { + "epoch": 0.03841409691629956, + "grad_norm": 1.1301029490219276, + "learning_rate": 7.6056338028169015e-06, + "loss": 0.9315502643585205, + "step": 109 + }, + { + "epoch": 0.038766519823788544, + "grad_norm": 0.9958020122553335, + "learning_rate": 7.67605633802817e-06, + "loss": 0.936677098274231, + "step": 110 + }, + { + "epoch": 0.039118942731277535, + "grad_norm": 1.0684105284421879, + "learning_rate": 7.746478873239436e-06, + "loss": 0.9373410940170288, + "step": 111 + }, + { + "epoch": 0.03947136563876652, + "grad_norm": 1.155598878121798, + "learning_rate": 7.816901408450704e-06, + "loss": 1.01617431640625, + "step": 112 + }, + { + "epoch": 0.039823788546255505, + "grad_norm": 1.1889006080727076, + "learning_rate": 7.887323943661972e-06, + "loss": 1.1713547706604004, + "step": 113 + }, + { + "epoch": 0.0401762114537445, + "grad_norm": 1.1256223667919436, + "learning_rate": 7.95774647887324e-06, + "loss": 0.8982350826263428, + "step": 114 + }, + { + "epoch": 0.04052863436123348, + "grad_norm": 1.0914199985412718, + "learning_rate": 8.028169014084509e-06, + "loss": 0.8123869895935059, + "step": 115 + }, + { + "epoch": 0.040881057268722466, + "grad_norm": 1.1505365914239516, + "learning_rate": 8.098591549295775e-06, + "loss": 1.0762536525726318, + "step": 116 + }, + { + "epoch": 0.04123348017621145, + "grad_norm": 1.0367170014557934, + "learning_rate": 8.169014084507043e-06, + "loss": 1.004841923713684, + "step": 117 + }, + { + "epoch": 0.04158590308370044, + "grad_norm": 1.0966724197265187, + "learning_rate": 8.239436619718311e-06, + "loss": 0.9237936735153198, + "step": 118 + }, + { + "epoch": 0.04193832599118943, + "grad_norm": 1.0785540239343763, + "learning_rate": 8.309859154929578e-06, + "loss": 0.9038913249969482, + "step": 119 + }, + { + "epoch": 0.04229074889867841, + "grad_norm": 1.1784096317090726, + "learning_rate": 8.380281690140846e-06, + "loss": 0.9488446712493896, + "step": 120 + }, + { + "epoch": 0.042643171806167404, + "grad_norm": 1.1559534491366574, + "learning_rate": 8.450704225352114e-06, + "loss": 1.0862706899642944, + "step": 121 + }, + { + "epoch": 0.04299559471365639, + "grad_norm": 1.5143452874154766, + "learning_rate": 8.52112676056338e-06, + "loss": 0.8882313966751099, + "step": 122 + }, + { + "epoch": 0.043348017621145374, + "grad_norm": 1.1412568707979918, + "learning_rate": 8.591549295774648e-06, + "loss": 0.9125900268554688, + "step": 123 + }, + { + "epoch": 0.04370044052863436, + "grad_norm": 1.403727281403332, + "learning_rate": 8.661971830985915e-06, + "loss": 0.944568395614624, + "step": 124 + }, + { + "epoch": 0.04405286343612335, + "grad_norm": 1.2993905510610635, + "learning_rate": 8.732394366197183e-06, + "loss": 0.9303089380264282, + "step": 125 + }, + { + "epoch": 0.044405286343612335, + "grad_norm": 1.1184314169128153, + "learning_rate": 8.802816901408451e-06, + "loss": 1.0983362197875977, + "step": 126 + }, + { + "epoch": 0.04475770925110132, + "grad_norm": 1.40811546312751, + "learning_rate": 8.87323943661972e-06, + "loss": 1.002477765083313, + "step": 127 + }, + { + "epoch": 0.04511013215859031, + "grad_norm": 1.1638063617076078, + "learning_rate": 8.943661971830987e-06, + "loss": 0.9994120001792908, + "step": 128 + }, + { + "epoch": 0.045462555066079297, + "grad_norm": 1.2118035451866538, + "learning_rate": 9.014084507042254e-06, + "loss": 1.0785832405090332, + "step": 129 + }, + { + "epoch": 0.04581497797356828, + "grad_norm": 1.0820277493757582, + "learning_rate": 9.084507042253522e-06, + "loss": 0.779441237449646, + "step": 130 + }, + { + "epoch": 0.046167400881057266, + "grad_norm": 1.1766256779195974, + "learning_rate": 9.15492957746479e-06, + "loss": 1.0052348375320435, + "step": 131 + }, + { + "epoch": 0.04651982378854626, + "grad_norm": 1.0771619013639089, + "learning_rate": 9.225352112676057e-06, + "loss": 1.0327996015548706, + "step": 132 + }, + { + "epoch": 0.04687224669603524, + "grad_norm": 1.501276619683034, + "learning_rate": 9.295774647887325e-06, + "loss": 1.0643246173858643, + "step": 133 + }, + { + "epoch": 0.04722466960352423, + "grad_norm": 1.1427145785080848, + "learning_rate": 9.366197183098593e-06, + "loss": 0.8449216485023499, + "step": 134 + }, + { + "epoch": 0.04757709251101322, + "grad_norm": 1.2684019730338143, + "learning_rate": 9.43661971830986e-06, + "loss": 0.8867055177688599, + "step": 135 + }, + { + "epoch": 0.047929515418502204, + "grad_norm": 1.4156875615017863, + "learning_rate": 9.507042253521127e-06, + "loss": 1.048499584197998, + "step": 136 + }, + { + "epoch": 0.04828193832599119, + "grad_norm": 1.2120768691141688, + "learning_rate": 9.577464788732394e-06, + "loss": 1.0548617839813232, + "step": 137 + }, + { + "epoch": 0.048634361233480174, + "grad_norm": 1.0679337780928526, + "learning_rate": 9.647887323943664e-06, + "loss": 0.8882845044136047, + "step": 138 + }, + { + "epoch": 0.048986784140969165, + "grad_norm": 1.622342973826323, + "learning_rate": 9.71830985915493e-06, + "loss": 1.032647967338562, + "step": 139 + }, + { + "epoch": 0.04933920704845815, + "grad_norm": 1.058782348686911, + "learning_rate": 9.788732394366198e-06, + "loss": 1.039523959159851, + "step": 140 + }, + { + "epoch": 0.049691629955947135, + "grad_norm": 1.1234982994751406, + "learning_rate": 9.859154929577466e-06, + "loss": 0.8451036214828491, + "step": 141 + }, + { + "epoch": 0.05004405286343613, + "grad_norm": 1.1376284406077708, + "learning_rate": 9.929577464788733e-06, + "loss": 0.8285897970199585, + "step": 142 + }, + { + "epoch": 0.05039647577092511, + "grad_norm": 1.2057497016168632, + "learning_rate": 1e-05, + "loss": 0.9998278021812439, + "step": 143 + }, + { + "epoch": 0.050748898678414096, + "grad_norm": 1.4528082359287422, + "learning_rate": 1.0070422535211269e-05, + "loss": 0.9782301187515259, + "step": 144 + }, + { + "epoch": 0.05110132158590308, + "grad_norm": 1.1663459671948497, + "learning_rate": 1.0140845070422535e-05, + "loss": 1.0557070970535278, + "step": 145 + }, + { + "epoch": 0.05145374449339207, + "grad_norm": 1.159146071512081, + "learning_rate": 1.0211267605633803e-05, + "loss": 0.9516133069992065, + "step": 146 + }, + { + "epoch": 0.05180616740088106, + "grad_norm": 1.1694686158556986, + "learning_rate": 1.0281690140845072e-05, + "loss": 0.8965041637420654, + "step": 147 + }, + { + "epoch": 0.05215859030837004, + "grad_norm": 1.2713520268346183, + "learning_rate": 1.0352112676056338e-05, + "loss": 0.8627057075500488, + "step": 148 + }, + { + "epoch": 0.052511013215859034, + "grad_norm": 1.0456048049111641, + "learning_rate": 1.0422535211267606e-05, + "loss": 0.7627567648887634, + "step": 149 + }, + { + "epoch": 0.05286343612334802, + "grad_norm": 1.2332422487154633, + "learning_rate": 1.0492957746478873e-05, + "loss": 0.8522504568099976, + "step": 150 + }, + { + "epoch": 0.053215859030837004, + "grad_norm": 1.1106957565365498, + "learning_rate": 1.056338028169014e-05, + "loss": 0.7164312601089478, + "step": 151 + }, + { + "epoch": 0.05356828193832599, + "grad_norm": 1.0487512934158103, + "learning_rate": 1.0633802816901409e-05, + "loss": 0.9141941070556641, + "step": 152 + }, + { + "epoch": 0.05392070484581498, + "grad_norm": 1.5228596875919753, + "learning_rate": 1.0704225352112675e-05, + "loss": 0.9145504832267761, + "step": 153 + }, + { + "epoch": 0.054273127753303965, + "grad_norm": 1.19745569358961, + "learning_rate": 1.0774647887323943e-05, + "loss": 0.9851646423339844, + "step": 154 + }, + { + "epoch": 0.05462555066079295, + "grad_norm": 1.1547769204431162, + "learning_rate": 1.0845070422535213e-05, + "loss": 0.9319474697113037, + "step": 155 + }, + { + "epoch": 0.05497797356828194, + "grad_norm": 1.242055483054837, + "learning_rate": 1.0915492957746481e-05, + "loss": 0.995783269405365, + "step": 156 + }, + { + "epoch": 0.05533039647577093, + "grad_norm": 1.270129466753014, + "learning_rate": 1.0985915492957748e-05, + "loss": 0.8636226654052734, + "step": 157 + }, + { + "epoch": 0.05568281938325991, + "grad_norm": 1.26388911778751, + "learning_rate": 1.1056338028169016e-05, + "loss": 0.8860869407653809, + "step": 158 + }, + { + "epoch": 0.056035242290748896, + "grad_norm": 1.24911279543244, + "learning_rate": 1.1126760563380284e-05, + "loss": 0.9256196618080139, + "step": 159 + }, + { + "epoch": 0.05638766519823789, + "grad_norm": 1.1684473229538663, + "learning_rate": 1.119718309859155e-05, + "loss": 0.8217915296554565, + "step": 160 + }, + { + "epoch": 0.05674008810572687, + "grad_norm": 1.2938227991615623, + "learning_rate": 1.1267605633802819e-05, + "loss": 0.9808465838432312, + "step": 161 + }, + { + "epoch": 0.05709251101321586, + "grad_norm": 1.2234654171305366, + "learning_rate": 1.1338028169014087e-05, + "loss": 0.7733014822006226, + "step": 162 + }, + { + "epoch": 0.05744493392070485, + "grad_norm": 1.1428802626649461, + "learning_rate": 1.1408450704225353e-05, + "loss": 0.8581304550170898, + "step": 163 + }, + { + "epoch": 0.057797356828193834, + "grad_norm": 1.3252890457476052, + "learning_rate": 1.1478873239436621e-05, + "loss": 0.9242054224014282, + "step": 164 + }, + { + "epoch": 0.05814977973568282, + "grad_norm": 1.3695567443378234, + "learning_rate": 1.1549295774647888e-05, + "loss": 1.0302021503448486, + "step": 165 + }, + { + "epoch": 0.058502202643171804, + "grad_norm": 1.2950143159958714, + "learning_rate": 1.1619718309859156e-05, + "loss": 0.8954275846481323, + "step": 166 + }, + { + "epoch": 0.058854625550660795, + "grad_norm": 1.1779404187828553, + "learning_rate": 1.1690140845070424e-05, + "loss": 0.891846776008606, + "step": 167 + }, + { + "epoch": 0.05920704845814978, + "grad_norm": 1.1837706775348158, + "learning_rate": 1.176056338028169e-05, + "loss": 0.887005627155304, + "step": 168 + }, + { + "epoch": 0.059559471365638765, + "grad_norm": 1.289448297537656, + "learning_rate": 1.1830985915492958e-05, + "loss": 0.9020301103591919, + "step": 169 + }, + { + "epoch": 0.05991189427312775, + "grad_norm": 1.2185831955131692, + "learning_rate": 1.1901408450704227e-05, + "loss": 0.7925454378128052, + "step": 170 + }, + { + "epoch": 0.06026431718061674, + "grad_norm": 1.31750363404193, + "learning_rate": 1.1971830985915493e-05, + "loss": 0.8058332800865173, + "step": 171 + }, + { + "epoch": 0.060616740088105726, + "grad_norm": 1.2435062872951204, + "learning_rate": 1.2042253521126761e-05, + "loss": 0.892992377281189, + "step": 172 + }, + { + "epoch": 0.06096916299559471, + "grad_norm": 1.0835922361658872, + "learning_rate": 1.211267605633803e-05, + "loss": 0.8482734560966492, + "step": 173 + }, + { + "epoch": 0.0613215859030837, + "grad_norm": 1.2806384537102478, + "learning_rate": 1.2183098591549296e-05, + "loss": 0.8652878999710083, + "step": 174 + }, + { + "epoch": 0.06167400881057269, + "grad_norm": 1.183930720799068, + "learning_rate": 1.2253521126760564e-05, + "loss": 0.8590051531791687, + "step": 175 + }, + { + "epoch": 0.06202643171806167, + "grad_norm": 1.1264180921527844, + "learning_rate": 1.232394366197183e-05, + "loss": 0.7106916904449463, + "step": 176 + }, + { + "epoch": 0.06237885462555066, + "grad_norm": 1.5304901042334342, + "learning_rate": 1.2394366197183098e-05, + "loss": 0.9298936128616333, + "step": 177 + }, + { + "epoch": 0.06273127753303964, + "grad_norm": 1.3380597134261425, + "learning_rate": 1.2464788732394367e-05, + "loss": 1.027758240699768, + "step": 178 + }, + { + "epoch": 0.06308370044052863, + "grad_norm": 1.4071851827143296, + "learning_rate": 1.2535211267605636e-05, + "loss": 0.9576354026794434, + "step": 179 + }, + { + "epoch": 0.06343612334801763, + "grad_norm": 1.476054189108656, + "learning_rate": 1.2605633802816903e-05, + "loss": 0.6881245374679565, + "step": 180 + }, + { + "epoch": 0.0637885462555066, + "grad_norm": 1.736658600923819, + "learning_rate": 1.2676056338028171e-05, + "loss": 0.9629781246185303, + "step": 181 + }, + { + "epoch": 0.0641409691629956, + "grad_norm": 1.3990061114909895, + "learning_rate": 1.2746478873239439e-05, + "loss": 0.849892258644104, + "step": 182 + }, + { + "epoch": 0.06449339207048459, + "grad_norm": 1.2776276047787312, + "learning_rate": 1.2816901408450705e-05, + "loss": 0.9294229745864868, + "step": 183 + }, + { + "epoch": 0.06484581497797356, + "grad_norm": 1.346185395248099, + "learning_rate": 1.2887323943661974e-05, + "loss": 0.9534600973129272, + "step": 184 + }, + { + "epoch": 0.06519823788546256, + "grad_norm": 1.2547825941083024, + "learning_rate": 1.2957746478873242e-05, + "loss": 0.7937755584716797, + "step": 185 + }, + { + "epoch": 0.06555066079295155, + "grad_norm": 1.215372024356157, + "learning_rate": 1.3028169014084508e-05, + "loss": 0.9188590049743652, + "step": 186 + }, + { + "epoch": 0.06590308370044053, + "grad_norm": 1.3372931395210206, + "learning_rate": 1.3098591549295776e-05, + "loss": 0.8775123357772827, + "step": 187 + }, + { + "epoch": 0.06625550660792952, + "grad_norm": 1.2703292803517752, + "learning_rate": 1.3169014084507044e-05, + "loss": 0.8562190532684326, + "step": 188 + }, + { + "epoch": 0.0666079295154185, + "grad_norm": 1.1593142823065046, + "learning_rate": 1.323943661971831e-05, + "loss": 0.9427295327186584, + "step": 189 + }, + { + "epoch": 0.06696035242290749, + "grad_norm": 1.1080518257913534, + "learning_rate": 1.3309859154929579e-05, + "loss": 0.6142286062240601, + "step": 190 + }, + { + "epoch": 0.06731277533039648, + "grad_norm": 1.416041365414943, + "learning_rate": 1.3380281690140845e-05, + "loss": 0.7480863332748413, + "step": 191 + }, + { + "epoch": 0.06766519823788546, + "grad_norm": 1.6287312517465182, + "learning_rate": 1.3450704225352114e-05, + "loss": 0.898857593536377, + "step": 192 + }, + { + "epoch": 0.06801762114537445, + "grad_norm": 1.4737642135415263, + "learning_rate": 1.3521126760563382e-05, + "loss": 0.8584127426147461, + "step": 193 + }, + { + "epoch": 0.06837004405286344, + "grad_norm": 1.2178631494207084, + "learning_rate": 1.3591549295774648e-05, + "loss": 0.9400655031204224, + "step": 194 + }, + { + "epoch": 0.06872246696035242, + "grad_norm": 1.2698602238237462, + "learning_rate": 1.3661971830985916e-05, + "loss": 0.7750787734985352, + "step": 195 + }, + { + "epoch": 0.06907488986784141, + "grad_norm": 1.2474557266398312, + "learning_rate": 1.3732394366197184e-05, + "loss": 0.8530284762382507, + "step": 196 + }, + { + "epoch": 0.0694273127753304, + "grad_norm": 1.3191630227557989, + "learning_rate": 1.380281690140845e-05, + "loss": 0.9019994735717773, + "step": 197 + }, + { + "epoch": 0.06977973568281938, + "grad_norm": 1.1994310415476668, + "learning_rate": 1.3873239436619719e-05, + "loss": 0.7749642133712769, + "step": 198 + }, + { + "epoch": 0.07013215859030837, + "grad_norm": 1.3060142025317714, + "learning_rate": 1.3943661971830987e-05, + "loss": 0.956200122833252, + "step": 199 + }, + { + "epoch": 0.07048458149779736, + "grad_norm": 1.3510407726181874, + "learning_rate": 1.4014084507042253e-05, + "loss": 0.8544470071792603, + "step": 200 + }, + { + "epoch": 0.07083700440528634, + "grad_norm": 1.447521091304659, + "learning_rate": 1.4084507042253522e-05, + "loss": 0.8776387572288513, + "step": 201 + }, + { + "epoch": 0.07118942731277533, + "grad_norm": 1.5340123254246993, + "learning_rate": 1.4154929577464788e-05, + "loss": 0.9949591755867004, + "step": 202 + }, + { + "epoch": 0.07154185022026431, + "grad_norm": 1.306920931788941, + "learning_rate": 1.4225352112676058e-05, + "loss": 0.9616764783859253, + "step": 203 + }, + { + "epoch": 0.0718942731277533, + "grad_norm": 1.3490978686730206, + "learning_rate": 1.4295774647887326e-05, + "loss": 0.9247175455093384, + "step": 204 + }, + { + "epoch": 0.0722466960352423, + "grad_norm": 1.4241509312853966, + "learning_rate": 1.4366197183098594e-05, + "loss": 0.7946479320526123, + "step": 205 + }, + { + "epoch": 0.07259911894273127, + "grad_norm": 1.3949991357763207, + "learning_rate": 1.443661971830986e-05, + "loss": 0.7929860353469849, + "step": 206 + }, + { + "epoch": 0.07295154185022026, + "grad_norm": 1.3725430537583514, + "learning_rate": 1.4507042253521129e-05, + "loss": 0.9215391874313354, + "step": 207 + }, + { + "epoch": 0.07330396475770926, + "grad_norm": 1.4247014676365253, + "learning_rate": 1.4577464788732397e-05, + "loss": 0.8767607808113098, + "step": 208 + }, + { + "epoch": 0.07365638766519823, + "grad_norm": 1.3691339839746066, + "learning_rate": 1.4647887323943663e-05, + "loss": 0.8586276769638062, + "step": 209 + }, + { + "epoch": 0.07400881057268723, + "grad_norm": 1.3252388254138234, + "learning_rate": 1.4718309859154931e-05, + "loss": 0.8680851459503174, + "step": 210 + }, + { + "epoch": 0.07436123348017622, + "grad_norm": 1.2834178375463614, + "learning_rate": 1.47887323943662e-05, + "loss": 0.8887720108032227, + "step": 211 + }, + { + "epoch": 0.0747136563876652, + "grad_norm": 1.4918681608584679, + "learning_rate": 1.4859154929577466e-05, + "loss": 0.8887100219726562, + "step": 212 + }, + { + "epoch": 0.07506607929515419, + "grad_norm": 1.247870788657092, + "learning_rate": 1.4929577464788734e-05, + "loss": 0.9257807731628418, + "step": 213 + }, + { + "epoch": 0.07541850220264318, + "grad_norm": 1.2922967878533598, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.8107355833053589, + "step": 214 + }, + { + "epoch": 0.07577092511013216, + "grad_norm": 1.390091314994072, + "learning_rate": 1.5070422535211269e-05, + "loss": 0.8765913844108582, + "step": 215 + }, + { + "epoch": 0.07612334801762115, + "grad_norm": 1.3936279931065536, + "learning_rate": 1.5140845070422537e-05, + "loss": 0.8973524570465088, + "step": 216 + }, + { + "epoch": 0.07647577092511013, + "grad_norm": 1.310665112588589, + "learning_rate": 1.5211267605633803e-05, + "loss": 0.9194613695144653, + "step": 217 + }, + { + "epoch": 0.07682819383259912, + "grad_norm": 1.4152279415932816, + "learning_rate": 1.528169014084507e-05, + "loss": 0.8832643032073975, + "step": 218 + }, + { + "epoch": 0.07718061674008811, + "grad_norm": 1.465705079678902, + "learning_rate": 1.535211267605634e-05, + "loss": 0.9575356245040894, + "step": 219 + }, + { + "epoch": 0.07753303964757709, + "grad_norm": 1.2268114727867823, + "learning_rate": 1.5422535211267607e-05, + "loss": 0.8302342891693115, + "step": 220 + }, + { + "epoch": 0.07788546255506608, + "grad_norm": 1.2978917843344704, + "learning_rate": 1.5492957746478872e-05, + "loss": 0.7999966144561768, + "step": 221 + }, + { + "epoch": 0.07823788546255507, + "grad_norm": 1.271952593735668, + "learning_rate": 1.556338028169014e-05, + "loss": 0.8201859593391418, + "step": 222 + }, + { + "epoch": 0.07859030837004405, + "grad_norm": 1.635464665304201, + "learning_rate": 1.563380281690141e-05, + "loss": 0.872761607170105, + "step": 223 + }, + { + "epoch": 0.07894273127753304, + "grad_norm": 1.7544850567681591, + "learning_rate": 1.5704225352112677e-05, + "loss": 0.8695409297943115, + "step": 224 + }, + { + "epoch": 0.07929515418502203, + "grad_norm": 1.2478131333285527, + "learning_rate": 1.5774647887323945e-05, + "loss": 0.8532050848007202, + "step": 225 + }, + { + "epoch": 0.07964757709251101, + "grad_norm": 1.5276196879895285, + "learning_rate": 1.5845070422535213e-05, + "loss": 0.7875121235847473, + "step": 226 + }, + { + "epoch": 0.08, + "grad_norm": 1.5837485275916963, + "learning_rate": 1.591549295774648e-05, + "loss": 0.7131509780883789, + "step": 227 + }, + { + "epoch": 0.080352422907489, + "grad_norm": 1.4681482709870555, + "learning_rate": 1.598591549295775e-05, + "loss": 0.9758431911468506, + "step": 228 + }, + { + "epoch": 0.08070484581497797, + "grad_norm": 1.4451165548552447, + "learning_rate": 1.6056338028169017e-05, + "loss": 0.7894232273101807, + "step": 229 + }, + { + "epoch": 0.08105726872246696, + "grad_norm": 1.2417235745587356, + "learning_rate": 1.6126760563380285e-05, + "loss": 0.9933483600616455, + "step": 230 + }, + { + "epoch": 0.08140969162995594, + "grad_norm": 1.4745298800972837, + "learning_rate": 1.619718309859155e-05, + "loss": 0.8424056768417358, + "step": 231 + }, + { + "epoch": 0.08176211453744493, + "grad_norm": 1.4626597398090972, + "learning_rate": 1.6267605633802818e-05, + "loss": 0.7957695126533508, + "step": 232 + }, + { + "epoch": 0.08211453744493392, + "grad_norm": 1.243843455131114, + "learning_rate": 1.6338028169014086e-05, + "loss": 0.8491722345352173, + "step": 233 + }, + { + "epoch": 0.0824669603524229, + "grad_norm": 1.407640698868158, + "learning_rate": 1.6408450704225354e-05, + "loss": 0.7010964751243591, + "step": 234 + }, + { + "epoch": 0.0828193832599119, + "grad_norm": 1.4584433632361322, + "learning_rate": 1.6478873239436623e-05, + "loss": 0.8713864088058472, + "step": 235 + }, + { + "epoch": 0.08317180616740089, + "grad_norm": 1.261328425360657, + "learning_rate": 1.6549295774647887e-05, + "loss": 0.6724761128425598, + "step": 236 + }, + { + "epoch": 0.08352422907488986, + "grad_norm": 1.219837126653021, + "learning_rate": 1.6619718309859155e-05, + "loss": 0.8612109422683716, + "step": 237 + }, + { + "epoch": 0.08387665198237886, + "grad_norm": 1.4745868727167897, + "learning_rate": 1.6690140845070424e-05, + "loss": 0.5697110891342163, + "step": 238 + }, + { + "epoch": 0.08422907488986785, + "grad_norm": 1.2506294676144012, + "learning_rate": 1.676056338028169e-05, + "loss": 0.7877228260040283, + "step": 239 + }, + { + "epoch": 0.08458149779735682, + "grad_norm": 1.1492235860181979, + "learning_rate": 1.683098591549296e-05, + "loss": 0.8751014471054077, + "step": 240 + }, + { + "epoch": 0.08493392070484582, + "grad_norm": 1.527957574033417, + "learning_rate": 1.6901408450704228e-05, + "loss": 0.8731381893157959, + "step": 241 + }, + { + "epoch": 0.08528634361233481, + "grad_norm": 1.291362512763109, + "learning_rate": 1.6971830985915493e-05, + "loss": 0.831383228302002, + "step": 242 + }, + { + "epoch": 0.08563876651982379, + "grad_norm": 1.2699070733171296, + "learning_rate": 1.704225352112676e-05, + "loss": 0.792934238910675, + "step": 243 + }, + { + "epoch": 0.08599118942731278, + "grad_norm": 1.1592748972292606, + "learning_rate": 1.711267605633803e-05, + "loss": 0.6723657846450806, + "step": 244 + }, + { + "epoch": 0.08634361233480176, + "grad_norm": 1.4796981905185658, + "learning_rate": 1.7183098591549297e-05, + "loss": 0.8377546072006226, + "step": 245 + }, + { + "epoch": 0.08669603524229075, + "grad_norm": 1.2727987522874769, + "learning_rate": 1.7253521126760565e-05, + "loss": 0.8073972463607788, + "step": 246 + }, + { + "epoch": 0.08704845814977974, + "grad_norm": 1.6240304260373406, + "learning_rate": 1.732394366197183e-05, + "loss": 0.8913615942001343, + "step": 247 + }, + { + "epoch": 0.08740088105726872, + "grad_norm": 1.4436852067854697, + "learning_rate": 1.7394366197183098e-05, + "loss": 0.9133341312408447, + "step": 248 + }, + { + "epoch": 0.08775330396475771, + "grad_norm": 1.6098073633875791, + "learning_rate": 1.7464788732394366e-05, + "loss": 0.7593938112258911, + "step": 249 + }, + { + "epoch": 0.0881057268722467, + "grad_norm": 1.456505700957212, + "learning_rate": 1.7535211267605638e-05, + "loss": 0.8049266934394836, + "step": 250 + }, + { + "epoch": 0.08845814977973568, + "grad_norm": 1.44397678174898, + "learning_rate": 1.7605633802816902e-05, + "loss": 0.9065679311752319, + "step": 251 + }, + { + "epoch": 0.08881057268722467, + "grad_norm": 1.5285644429403964, + "learning_rate": 1.767605633802817e-05, + "loss": 0.9309085011482239, + "step": 252 + }, + { + "epoch": 0.08916299559471366, + "grad_norm": 1.3367293223358285, + "learning_rate": 1.774647887323944e-05, + "loss": 0.7846949100494385, + "step": 253 + }, + { + "epoch": 0.08951541850220264, + "grad_norm": 1.4721492627949804, + "learning_rate": 1.7816901408450707e-05, + "loss": 0.9153063297271729, + "step": 254 + }, + { + "epoch": 0.08986784140969163, + "grad_norm": 1.2843813691966974, + "learning_rate": 1.7887323943661975e-05, + "loss": 0.7743638157844543, + "step": 255 + }, + { + "epoch": 0.09022026431718062, + "grad_norm": 1.6034162783223496, + "learning_rate": 1.7957746478873243e-05, + "loss": 0.887751579284668, + "step": 256 + }, + { + "epoch": 0.0905726872246696, + "grad_norm": 1.2387435479452011, + "learning_rate": 1.8028169014084508e-05, + "loss": 0.8072899580001831, + "step": 257 + }, + { + "epoch": 0.09092511013215859, + "grad_norm": 1.3642448388425203, + "learning_rate": 1.8098591549295776e-05, + "loss": 0.8275943994522095, + "step": 258 + }, + { + "epoch": 0.09127753303964757, + "grad_norm": 1.3287842865535133, + "learning_rate": 1.8169014084507044e-05, + "loss": 0.8300620913505554, + "step": 259 + }, + { + "epoch": 0.09162995594713656, + "grad_norm": 1.26616505669333, + "learning_rate": 1.8239436619718312e-05, + "loss": 0.6886857748031616, + "step": 260 + }, + { + "epoch": 0.09198237885462555, + "grad_norm": 2.689833624979495, + "learning_rate": 1.830985915492958e-05, + "loss": 0.8190158605575562, + "step": 261 + }, + { + "epoch": 0.09233480176211453, + "grad_norm": 1.3392491700180422, + "learning_rate": 1.8380281690140845e-05, + "loss": 0.8500730991363525, + "step": 262 + }, + { + "epoch": 0.09268722466960352, + "grad_norm": 1.499663410513064, + "learning_rate": 1.8450704225352113e-05, + "loss": 0.8340811729431152, + "step": 263 + }, + { + "epoch": 0.09303964757709252, + "grad_norm": 1.3031308803407857, + "learning_rate": 1.852112676056338e-05, + "loss": 0.8055675029754639, + "step": 264 + }, + { + "epoch": 0.0933920704845815, + "grad_norm": 1.410218243221954, + "learning_rate": 1.859154929577465e-05, + "loss": 0.7956680059432983, + "step": 265 + }, + { + "epoch": 0.09374449339207049, + "grad_norm": 1.4181751660111779, + "learning_rate": 1.8661971830985917e-05, + "loss": 0.8232501745223999, + "step": 266 + }, + { + "epoch": 0.09409691629955948, + "grad_norm": 1.472224530959967, + "learning_rate": 1.8732394366197186e-05, + "loss": 0.8808565139770508, + "step": 267 + }, + { + "epoch": 0.09444933920704845, + "grad_norm": 1.5113548411958122, + "learning_rate": 1.880281690140845e-05, + "loss": 0.885380744934082, + "step": 268 + }, + { + "epoch": 0.09480176211453745, + "grad_norm": 1.5009611452094687, + "learning_rate": 1.887323943661972e-05, + "loss": 0.8408790826797485, + "step": 269 + }, + { + "epoch": 0.09515418502202644, + "grad_norm": 1.395810517840328, + "learning_rate": 1.8943661971830987e-05, + "loss": 0.7089993953704834, + "step": 270 + }, + { + "epoch": 0.09550660792951542, + "grad_norm": 1.280231938177333, + "learning_rate": 1.9014084507042255e-05, + "loss": 0.7941038608551025, + "step": 271 + }, + { + "epoch": 0.09585903083700441, + "grad_norm": 1.5210768015450882, + "learning_rate": 1.9084507042253523e-05, + "loss": 0.8269138932228088, + "step": 272 + }, + { + "epoch": 0.09621145374449339, + "grad_norm": 1.5053903060638305, + "learning_rate": 1.9154929577464788e-05, + "loss": 0.8206192255020142, + "step": 273 + }, + { + "epoch": 0.09656387665198238, + "grad_norm": 1.49737615599854, + "learning_rate": 1.922535211267606e-05, + "loss": 0.9146496653556824, + "step": 274 + }, + { + "epoch": 0.09691629955947137, + "grad_norm": 1.1755726979972605, + "learning_rate": 1.9295774647887327e-05, + "loss": 0.6738560199737549, + "step": 275 + }, + { + "epoch": 0.09726872246696035, + "grad_norm": 1.3169911381980228, + "learning_rate": 1.9366197183098595e-05, + "loss": 0.934916615486145, + "step": 276 + }, + { + "epoch": 0.09762114537444934, + "grad_norm": 1.357245739203775, + "learning_rate": 1.943661971830986e-05, + "loss": 0.8952134847640991, + "step": 277 + }, + { + "epoch": 0.09797356828193833, + "grad_norm": 1.3423178147772294, + "learning_rate": 1.9507042253521128e-05, + "loss": 0.9346420764923096, + "step": 278 + }, + { + "epoch": 0.09832599118942731, + "grad_norm": 1.5698833191970427, + "learning_rate": 1.9577464788732396e-05, + "loss": 0.8781993985176086, + "step": 279 + }, + { + "epoch": 0.0986784140969163, + "grad_norm": 1.4703395142125208, + "learning_rate": 1.9647887323943664e-05, + "loss": 0.8283448219299316, + "step": 280 + }, + { + "epoch": 0.09903083700440529, + "grad_norm": 1.2650765439550704, + "learning_rate": 1.9718309859154933e-05, + "loss": 0.8010722398757935, + "step": 281 + }, + { + "epoch": 0.09938325991189427, + "grad_norm": 1.3576050403922397, + "learning_rate": 1.97887323943662e-05, + "loss": 0.8697119951248169, + "step": 282 + }, + { + "epoch": 0.09973568281938326, + "grad_norm": 1.098837792765385, + "learning_rate": 1.9859154929577465e-05, + "loss": 0.6448882818222046, + "step": 283 + }, + { + "epoch": 0.10008810572687225, + "grad_norm": 1.5101908618325302, + "learning_rate": 1.9929577464788734e-05, + "loss": 0.7782007455825806, + "step": 284 + }, + { + "epoch": 0.10044052863436123, + "grad_norm": 1.455658231417001, + "learning_rate": 2e-05, + "loss": 0.8131508827209473, + "step": 285 + }, + { + "epoch": 0.10079295154185022, + "grad_norm": 1.4413777660177336, + "learning_rate": 1.999999830265561e-05, + "loss": 0.8592134714126587, + "step": 286 + }, + { + "epoch": 0.1011453744493392, + "grad_norm": 1.5671417589518397, + "learning_rate": 1.9999993210623002e-05, + "loss": 0.9374675750732422, + "step": 287 + }, + { + "epoch": 0.10149779735682819, + "grad_norm": 1.5499152824954487, + "learning_rate": 1.9999984723903913e-05, + "loss": 0.8416328430175781, + "step": 288 + }, + { + "epoch": 0.10185022026431718, + "grad_norm": 1.267360297703748, + "learning_rate": 1.9999972842501218e-05, + "loss": 0.7587184906005859, + "step": 289 + }, + { + "epoch": 0.10220264317180616, + "grad_norm": 1.4783535336356979, + "learning_rate": 1.9999957566418956e-05, + "loss": 1.010494351387024, + "step": 290 + }, + { + "epoch": 0.10255506607929515, + "grad_norm": 1.3092025632301814, + "learning_rate": 1.999993889566231e-05, + "loss": 0.7942835092544556, + "step": 291 + }, + { + "epoch": 0.10290748898678415, + "grad_norm": 1.4620379458028798, + "learning_rate": 1.999991683023762e-05, + "loss": 0.9069477915763855, + "step": 292 + }, + { + "epoch": 0.10325991189427312, + "grad_norm": 1.781963673155629, + "learning_rate": 1.9999891370152375e-05, + "loss": 0.8776397705078125, + "step": 293 + }, + { + "epoch": 0.10361233480176212, + "grad_norm": 1.3409879305652028, + "learning_rate": 1.9999862515415216e-05, + "loss": 0.8560416102409363, + "step": 294 + }, + { + "epoch": 0.10396475770925111, + "grad_norm": 1.601676543787724, + "learning_rate": 1.9999830266035942e-05, + "loss": 0.9177321195602417, + "step": 295 + }, + { + "epoch": 0.10431718061674009, + "grad_norm": 1.621521883940329, + "learning_rate": 1.99997946220255e-05, + "loss": 0.8830884695053101, + "step": 296 + }, + { + "epoch": 0.10466960352422908, + "grad_norm": 1.5076951372471592, + "learning_rate": 1.9999755583395987e-05, + "loss": 0.913659930229187, + "step": 297 + }, + { + "epoch": 0.10502202643171807, + "grad_norm": 1.48724181087663, + "learning_rate": 1.999971315016066e-05, + "loss": 0.773309588432312, + "step": 298 + }, + { + "epoch": 0.10537444933920705, + "grad_norm": 1.4640758198016095, + "learning_rate": 1.9999667322333916e-05, + "loss": 0.8432563543319702, + "step": 299 + }, + { + "epoch": 0.10572687224669604, + "grad_norm": 1.5419897004531282, + "learning_rate": 1.999961809993132e-05, + "loss": 0.9632397890090942, + "step": 300 + }, + { + "epoch": 0.10607929515418502, + "grad_norm": 1.4657018761848883, + "learning_rate": 1.999956548296958e-05, + "loss": 0.8205600380897522, + "step": 301 + }, + { + "epoch": 0.10643171806167401, + "grad_norm": 1.2908123355748096, + "learning_rate": 1.9999509471466557e-05, + "loss": 0.8789785504341125, + "step": 302 + }, + { + "epoch": 0.106784140969163, + "grad_norm": 1.4062841050093677, + "learning_rate": 1.999945006544126e-05, + "loss": 0.8445791006088257, + "step": 303 + }, + { + "epoch": 0.10713656387665198, + "grad_norm": 1.3201850616961108, + "learning_rate": 1.9999387264913865e-05, + "loss": 0.8025245666503906, + "step": 304 + }, + { + "epoch": 0.10748898678414097, + "grad_norm": 1.3596018005437036, + "learning_rate": 1.9999321069905688e-05, + "loss": 0.9271318912506104, + "step": 305 + }, + { + "epoch": 0.10784140969162996, + "grad_norm": 1.167387591378785, + "learning_rate": 1.999925148043919e-05, + "loss": 0.809894859790802, + "step": 306 + }, + { + "epoch": 0.10819383259911894, + "grad_norm": 1.4267923203712158, + "learning_rate": 1.999917849653801e-05, + "loss": 0.8940669298171997, + "step": 307 + }, + { + "epoch": 0.10854625550660793, + "grad_norm": 1.466148592973388, + "learning_rate": 1.9999102118226912e-05, + "loss": 0.9301233887672424, + "step": 308 + }, + { + "epoch": 0.10889867841409692, + "grad_norm": 1.271175959298383, + "learning_rate": 1.9999022345531834e-05, + "loss": 0.6429216861724854, + "step": 309 + }, + { + "epoch": 0.1092511013215859, + "grad_norm": 1.3392816449794738, + "learning_rate": 1.999893917847985e-05, + "loss": 0.7199009656906128, + "step": 310 + }, + { + "epoch": 0.10960352422907489, + "grad_norm": 1.2732787140894477, + "learning_rate": 1.999885261709919e-05, + "loss": 0.8312395811080933, + "step": 311 + }, + { + "epoch": 0.10995594713656388, + "grad_norm": 1.4809957988420102, + "learning_rate": 1.999876266141924e-05, + "loss": 0.8187745213508606, + "step": 312 + }, + { + "epoch": 0.11030837004405286, + "grad_norm": 1.2638906346778362, + "learning_rate": 1.9998669311470546e-05, + "loss": 0.8632344603538513, + "step": 313 + }, + { + "epoch": 0.11066079295154185, + "grad_norm": 1.5651718256034985, + "learning_rate": 1.9998572567284787e-05, + "loss": 0.8789447546005249, + "step": 314 + }, + { + "epoch": 0.11101321585903083, + "grad_norm": 1.4657438576086577, + "learning_rate": 1.999847242889481e-05, + "loss": 0.7647864818572998, + "step": 315 + }, + { + "epoch": 0.11136563876651982, + "grad_norm": 1.2962284510646964, + "learning_rate": 1.9998368896334606e-05, + "loss": 0.872633695602417, + "step": 316 + }, + { + "epoch": 0.11171806167400881, + "grad_norm": 1.4704185501053861, + "learning_rate": 1.9998261969639324e-05, + "loss": 0.8249840140342712, + "step": 317 + }, + { + "epoch": 0.11207048458149779, + "grad_norm": 1.6298830469717174, + "learning_rate": 1.999815164884526e-05, + "loss": 0.7558056116104126, + "step": 318 + }, + { + "epoch": 0.11242290748898678, + "grad_norm": 1.3075257157183537, + "learning_rate": 1.9998037933989866e-05, + "loss": 0.7447441220283508, + "step": 319 + }, + { + "epoch": 0.11277533039647578, + "grad_norm": 1.4956646267919036, + "learning_rate": 1.9997920825111743e-05, + "loss": 0.8260442018508911, + "step": 320 + }, + { + "epoch": 0.11312775330396475, + "grad_norm": 1.2866274072297625, + "learning_rate": 1.999780032225065e-05, + "loss": 0.7916134595870972, + "step": 321 + }, + { + "epoch": 0.11348017621145375, + "grad_norm": 1.3548711592442237, + "learning_rate": 1.9997676425447486e-05, + "loss": 0.7460259199142456, + "step": 322 + }, + { + "epoch": 0.11383259911894274, + "grad_norm": 1.4664419676620792, + "learning_rate": 1.9997549134744318e-05, + "loss": 0.9739946126937866, + "step": 323 + }, + { + "epoch": 0.11418502202643172, + "grad_norm": 1.3133090693965692, + "learning_rate": 1.9997418450184352e-05, + "loss": 0.7242900133132935, + "step": 324 + }, + { + "epoch": 0.1145374449339207, + "grad_norm": 1.7023646414032152, + "learning_rate": 1.9997284371811955e-05, + "loss": 0.7645323276519775, + "step": 325 + }, + { + "epoch": 0.1148898678414097, + "grad_norm": 1.3437215758424148, + "learning_rate": 1.9997146899672638e-05, + "loss": 0.7377017736434937, + "step": 326 + }, + { + "epoch": 0.11524229074889868, + "grad_norm": 1.3608732999796416, + "learning_rate": 1.9997006033813076e-05, + "loss": 0.7117934226989746, + "step": 327 + }, + { + "epoch": 0.11559471365638767, + "grad_norm": 1.485158034808982, + "learning_rate": 1.999686177428108e-05, + "loss": 0.8517680168151855, + "step": 328 + }, + { + "epoch": 0.11594713656387665, + "grad_norm": 1.3118416735480631, + "learning_rate": 1.9996714121125626e-05, + "loss": 0.7099400758743286, + "step": 329 + }, + { + "epoch": 0.11629955947136564, + "grad_norm": 1.3949559553781739, + "learning_rate": 1.9996563074396838e-05, + "loss": 0.8581711053848267, + "step": 330 + }, + { + "epoch": 0.11665198237885463, + "grad_norm": 1.322464822656225, + "learning_rate": 1.9996408634145994e-05, + "loss": 0.7841953635215759, + "step": 331 + }, + { + "epoch": 0.11700440528634361, + "grad_norm": 1.2580468593989962, + "learning_rate": 1.9996250800425515e-05, + "loss": 0.7376754879951477, + "step": 332 + }, + { + "epoch": 0.1173568281938326, + "grad_norm": 1.3538742269891202, + "learning_rate": 1.9996089573288985e-05, + "loss": 0.8934558033943176, + "step": 333 + }, + { + "epoch": 0.11770925110132159, + "grad_norm": 1.4597310886631008, + "learning_rate": 1.999592495279113e-05, + "loss": 0.7870250940322876, + "step": 334 + }, + { + "epoch": 0.11806167400881057, + "grad_norm": 1.5788273084375275, + "learning_rate": 1.9995756938987846e-05, + "loss": 0.7026203274726868, + "step": 335 + }, + { + "epoch": 0.11841409691629956, + "grad_norm": 2.206437289778364, + "learning_rate": 1.999558553193616e-05, + "loss": 1.0066381692886353, + "step": 336 + }, + { + "epoch": 0.11876651982378855, + "grad_norm": 1.349262918557434, + "learning_rate": 1.9995410731694255e-05, + "loss": 0.7860246896743774, + "step": 337 + }, + { + "epoch": 0.11911894273127753, + "grad_norm": 1.4261295710834618, + "learning_rate": 1.999523253832148e-05, + "loss": 0.8142588138580322, + "step": 338 + }, + { + "epoch": 0.11947136563876652, + "grad_norm": 1.403543131076251, + "learning_rate": 1.9995050951878317e-05, + "loss": 0.9737639427185059, + "step": 339 + }, + { + "epoch": 0.1198237885462555, + "grad_norm": 1.2538473699838193, + "learning_rate": 1.999486597242642e-05, + "loss": 0.6165765523910522, + "step": 340 + }, + { + "epoch": 0.12017621145374449, + "grad_norm": 1.4403971646421685, + "learning_rate": 1.999467760002857e-05, + "loss": 0.8553996086120605, + "step": 341 + }, + { + "epoch": 0.12052863436123348, + "grad_norm": 1.579218034733104, + "learning_rate": 1.9994485834748725e-05, + "loss": 0.9291022419929504, + "step": 342 + }, + { + "epoch": 0.12088105726872246, + "grad_norm": 1.3583147087232978, + "learning_rate": 1.9994290676651977e-05, + "loss": 0.8309136629104614, + "step": 343 + }, + { + "epoch": 0.12123348017621145, + "grad_norm": 1.2343518052190974, + "learning_rate": 1.999409212580458e-05, + "loss": 0.6963932514190674, + "step": 344 + }, + { + "epoch": 0.12158590308370044, + "grad_norm": 1.126432291251887, + "learning_rate": 1.9993890182273932e-05, + "loss": 0.8220632076263428, + "step": 345 + }, + { + "epoch": 0.12193832599118942, + "grad_norm": 1.5283410369228738, + "learning_rate": 1.9993684846128588e-05, + "loss": 0.8407794237136841, + "step": 346 + }, + { + "epoch": 0.12229074889867841, + "grad_norm": 1.479739244816861, + "learning_rate": 1.9993476117438257e-05, + "loss": 0.795718789100647, + "step": 347 + }, + { + "epoch": 0.1226431718061674, + "grad_norm": 1.3466106447402244, + "learning_rate": 1.9993263996273792e-05, + "loss": 0.7482223510742188, + "step": 348 + }, + { + "epoch": 0.12299559471365638, + "grad_norm": 1.4606743428798505, + "learning_rate": 1.99930484827072e-05, + "loss": 0.814468264579773, + "step": 349 + }, + { + "epoch": 0.12334801762114538, + "grad_norm": 1.5345713664893856, + "learning_rate": 1.9992829576811648e-05, + "loss": 0.8105748891830444, + "step": 350 + }, + { + "epoch": 0.12370044052863437, + "grad_norm": 1.6869192314100032, + "learning_rate": 1.9992607278661437e-05, + "loss": 0.8756073713302612, + "step": 351 + }, + { + "epoch": 0.12405286343612335, + "grad_norm": 1.228330868948225, + "learning_rate": 1.9992381588332043e-05, + "loss": 0.8643946647644043, + "step": 352 + }, + { + "epoch": 0.12440528634361234, + "grad_norm": 1.1468400313164093, + "learning_rate": 1.9992152505900067e-05, + "loss": 0.7691172361373901, + "step": 353 + }, + { + "epoch": 0.12475770925110131, + "grad_norm": 1.3198644948783926, + "learning_rate": 1.9991920031443288e-05, + "loss": 0.716686487197876, + "step": 354 + }, + { + "epoch": 0.12511013215859032, + "grad_norm": 1.39334404424432, + "learning_rate": 1.9991684165040616e-05, + "loss": 0.697482705116272, + "step": 355 + }, + { + "epoch": 0.12546255506607928, + "grad_norm": 1.5087579956634654, + "learning_rate": 1.999144490677212e-05, + "loss": 0.8039460182189941, + "step": 356 + }, + { + "epoch": 0.12581497797356828, + "grad_norm": 1.3206582875495743, + "learning_rate": 1.9991202256719032e-05, + "loss": 0.872138261795044, + "step": 357 + }, + { + "epoch": 0.12616740088105727, + "grad_norm": 1.330801420963485, + "learning_rate": 1.999095621496371e-05, + "loss": 0.8659502267837524, + "step": 358 + }, + { + "epoch": 0.12651982378854626, + "grad_norm": 1.2062023445068855, + "learning_rate": 1.9990706781589682e-05, + "loss": 0.7585660219192505, + "step": 359 + }, + { + "epoch": 0.12687224669603525, + "grad_norm": 1.349814688916852, + "learning_rate": 1.9990453956681626e-05, + "loss": 0.86381995677948, + "step": 360 + }, + { + "epoch": 0.12722466960352422, + "grad_norm": 1.3080210647965176, + "learning_rate": 1.9990197740325365e-05, + "loss": 0.7623461484909058, + "step": 361 + }, + { + "epoch": 0.1275770925110132, + "grad_norm": 1.4247026163468757, + "learning_rate": 1.9989938132607877e-05, + "loss": 0.8262917995452881, + "step": 362 + }, + { + "epoch": 0.1279295154185022, + "grad_norm": 1.3245955099655373, + "learning_rate": 1.9989675133617294e-05, + "loss": 0.7879630327224731, + "step": 363 + }, + { + "epoch": 0.1282819383259912, + "grad_norm": 1.5925116832241206, + "learning_rate": 1.9989408743442892e-05, + "loss": 0.8282565474510193, + "step": 364 + }, + { + "epoch": 0.12863436123348018, + "grad_norm": 1.151308483630064, + "learning_rate": 1.9989138962175105e-05, + "loss": 0.8358104228973389, + "step": 365 + }, + { + "epoch": 0.12898678414096917, + "grad_norm": 1.4831450607430074, + "learning_rate": 1.9988865789905513e-05, + "loss": 0.9111027121543884, + "step": 366 + }, + { + "epoch": 0.12933920704845814, + "grad_norm": 1.4181532995073547, + "learning_rate": 1.9988589226726847e-05, + "loss": 0.766915500164032, + "step": 367 + }, + { + "epoch": 0.12969162995594713, + "grad_norm": 1.3923253104774793, + "learning_rate": 1.9988309272733e-05, + "loss": 0.818048357963562, + "step": 368 + }, + { + "epoch": 0.13004405286343612, + "grad_norm": 1.2625645815303237, + "learning_rate": 1.9988025928019e-05, + "loss": 0.8188307285308838, + "step": 369 + }, + { + "epoch": 0.1303964757709251, + "grad_norm": 1.4656557007271924, + "learning_rate": 1.998773919268104e-05, + "loss": 0.88718181848526, + "step": 370 + }, + { + "epoch": 0.1307488986784141, + "grad_norm": 1.3104922660776017, + "learning_rate": 1.998744906681645e-05, + "loss": 0.9173898696899414, + "step": 371 + }, + { + "epoch": 0.1311013215859031, + "grad_norm": 1.4305544884130297, + "learning_rate": 1.9987155550523725e-05, + "loss": 0.8025110960006714, + "step": 372 + }, + { + "epoch": 0.13145374449339206, + "grad_norm": 1.2328392002659898, + "learning_rate": 1.9986858643902502e-05, + "loss": 0.8931341767311096, + "step": 373 + }, + { + "epoch": 0.13180616740088105, + "grad_norm": 1.258415234092876, + "learning_rate": 1.9986558347053574e-05, + "loss": 0.8813796043395996, + "step": 374 + }, + { + "epoch": 0.13215859030837004, + "grad_norm": 1.3254702068923054, + "learning_rate": 1.9986254660078877e-05, + "loss": 0.8021976947784424, + "step": 375 + }, + { + "epoch": 0.13251101321585904, + "grad_norm": 1.3001638136254743, + "learning_rate": 1.9985947583081506e-05, + "loss": 0.8083860874176025, + "step": 376 + }, + { + "epoch": 0.13286343612334803, + "grad_norm": 1.2519881014381842, + "learning_rate": 1.9985637116165705e-05, + "loss": 0.7639983296394348, + "step": 377 + }, + { + "epoch": 0.133215859030837, + "grad_norm": 1.3308962501940544, + "learning_rate": 1.9985323259436874e-05, + "loss": 0.7775800228118896, + "step": 378 + }, + { + "epoch": 0.13356828193832598, + "grad_norm": 1.3822704707659155, + "learning_rate": 1.9985006013001545e-05, + "loss": 0.8892228603363037, + "step": 379 + }, + { + "epoch": 0.13392070484581498, + "grad_norm": 1.4007373611969895, + "learning_rate": 1.998468537696742e-05, + "loss": 0.9158765077590942, + "step": 380 + }, + { + "epoch": 0.13427312775330397, + "grad_norm": 1.2142103786325267, + "learning_rate": 1.9984361351443343e-05, + "loss": 0.7523722648620605, + "step": 381 + }, + { + "epoch": 0.13462555066079296, + "grad_norm": 1.5406874167870075, + "learning_rate": 1.998403393653932e-05, + "loss": 0.8052740693092346, + "step": 382 + }, + { + "epoch": 0.13497797356828195, + "grad_norm": 4.839014305582762, + "learning_rate": 1.9983703132366484e-05, + "loss": 0.8271476626396179, + "step": 383 + }, + { + "epoch": 0.13533039647577091, + "grad_norm": 1.3724243356768093, + "learning_rate": 1.998336893903714e-05, + "loss": 0.8904454112052917, + "step": 384 + }, + { + "epoch": 0.1356828193832599, + "grad_norm": 1.5086695454887955, + "learning_rate": 1.9983031356664733e-05, + "loss": 0.8705847263336182, + "step": 385 + }, + { + "epoch": 0.1360352422907489, + "grad_norm": 1.3562221939291232, + "learning_rate": 1.9982690385363867e-05, + "loss": 0.8269569873809814, + "step": 386 + }, + { + "epoch": 0.1363876651982379, + "grad_norm": 1.6156870918588995, + "learning_rate": 1.998234602525029e-05, + "loss": 0.9796818494796753, + "step": 387 + }, + { + "epoch": 0.13674008810572688, + "grad_norm": 1.5268638185003427, + "learning_rate": 1.9981998276440892e-05, + "loss": 0.8276596665382385, + "step": 388 + }, + { + "epoch": 0.13709251101321585, + "grad_norm": 1.1979978409172833, + "learning_rate": 1.9981647139053737e-05, + "loss": 0.8739231824874878, + "step": 389 + }, + { + "epoch": 0.13744493392070484, + "grad_norm": 1.517970302113154, + "learning_rate": 1.9981292613208018e-05, + "loss": 0.677521824836731, + "step": 390 + }, + { + "epoch": 0.13779735682819383, + "grad_norm": 1.483399153515808, + "learning_rate": 1.9980934699024084e-05, + "loss": 0.744938313961029, + "step": 391 + }, + { + "epoch": 0.13814977973568282, + "grad_norm": 1.423178346498717, + "learning_rate": 1.998057339662344e-05, + "loss": 0.8367065787315369, + "step": 392 + }, + { + "epoch": 0.1385022026431718, + "grad_norm": 1.6714277386990386, + "learning_rate": 1.9980208706128733e-05, + "loss": 0.775547981262207, + "step": 393 + }, + { + "epoch": 0.1388546255506608, + "grad_norm": 1.244274379470138, + "learning_rate": 1.9979840627663764e-05, + "loss": 0.8287982940673828, + "step": 394 + }, + { + "epoch": 0.13920704845814977, + "grad_norm": 1.429588244120958, + "learning_rate": 1.997946916135349e-05, + "loss": 0.7582247257232666, + "step": 395 + }, + { + "epoch": 0.13955947136563876, + "grad_norm": 1.309709423857836, + "learning_rate": 1.997909430732401e-05, + "loss": 0.968267560005188, + "step": 396 + }, + { + "epoch": 0.13991189427312775, + "grad_norm": 1.4247483192434738, + "learning_rate": 1.9978716065702566e-05, + "loss": 0.8850257396697998, + "step": 397 + }, + { + "epoch": 0.14026431718061674, + "grad_norm": 1.1261344584223945, + "learning_rate": 1.9978334436617574e-05, + "loss": 0.7206246852874756, + "step": 398 + }, + { + "epoch": 0.14061674008810573, + "grad_norm": 1.2702546976441136, + "learning_rate": 1.9977949420198576e-05, + "loss": 0.7833065986633301, + "step": 399 + }, + { + "epoch": 0.14096916299559473, + "grad_norm": 1.2940706461552187, + "learning_rate": 1.9977561016576275e-05, + "loss": 0.7199673652648926, + "step": 400 + }, + { + "epoch": 0.1413215859030837, + "grad_norm": 1.3300807823897647, + "learning_rate": 1.9977169225882522e-05, + "loss": 0.7544811367988586, + "step": 401 + }, + { + "epoch": 0.14167400881057268, + "grad_norm": 1.3500860064281444, + "learning_rate": 1.9976774048250317e-05, + "loss": 0.7528219819068909, + "step": 402 + }, + { + "epoch": 0.14202643171806167, + "grad_norm": 1.230028309495833, + "learning_rate": 1.9976375483813814e-05, + "loss": 0.8025565147399902, + "step": 403 + }, + { + "epoch": 0.14237885462555067, + "grad_norm": 1.271700071603726, + "learning_rate": 1.997597353270831e-05, + "loss": 0.6553962230682373, + "step": 404 + }, + { + "epoch": 0.14273127753303966, + "grad_norm": 1.195900427449374, + "learning_rate": 1.9975568195070253e-05, + "loss": 0.7070015072822571, + "step": 405 + }, + { + "epoch": 0.14308370044052862, + "grad_norm": 1.238996854756085, + "learning_rate": 1.9975159471037247e-05, + "loss": 0.7454725503921509, + "step": 406 + }, + { + "epoch": 0.1434361233480176, + "grad_norm": 1.5517260528670263, + "learning_rate": 1.9974747360748038e-05, + "loss": 0.7074518799781799, + "step": 407 + }, + { + "epoch": 0.1437885462555066, + "grad_norm": 1.4240478656973132, + "learning_rate": 1.9974331864342527e-05, + "loss": 0.6870182752609253, + "step": 408 + }, + { + "epoch": 0.1441409691629956, + "grad_norm": 1.5514938206230895, + "learning_rate": 1.9973912981961763e-05, + "loss": 0.826898455619812, + "step": 409 + }, + { + "epoch": 0.1444933920704846, + "grad_norm": 1.483679538302774, + "learning_rate": 1.997349071374794e-05, + "loss": 0.7244436740875244, + "step": 410 + }, + { + "epoch": 0.14484581497797358, + "grad_norm": 1.2681717185328807, + "learning_rate": 1.9973065059844404e-05, + "loss": 0.6885448694229126, + "step": 411 + }, + { + "epoch": 0.14519823788546254, + "grad_norm": 1.3797417122455713, + "learning_rate": 1.9972636020395653e-05, + "loss": 0.8477644920349121, + "step": 412 + }, + { + "epoch": 0.14555066079295154, + "grad_norm": 1.5051840849568912, + "learning_rate": 1.9972203595547334e-05, + "loss": 0.9432111382484436, + "step": 413 + }, + { + "epoch": 0.14590308370044053, + "grad_norm": 1.351618505603555, + "learning_rate": 1.9971767785446243e-05, + "loss": 1.0101501941680908, + "step": 414 + }, + { + "epoch": 0.14625550660792952, + "grad_norm": 1.421926997117087, + "learning_rate": 1.997132859024032e-05, + "loss": 0.8174984455108643, + "step": 415 + }, + { + "epoch": 0.1466079295154185, + "grad_norm": 1.1573592385577054, + "learning_rate": 1.997088601007866e-05, + "loss": 0.6857198476791382, + "step": 416 + }, + { + "epoch": 0.14696035242290748, + "grad_norm": 1.1795540078822444, + "learning_rate": 1.9970440045111505e-05, + "loss": 0.7742792367935181, + "step": 417 + }, + { + "epoch": 0.14731277533039647, + "grad_norm": 1.783143700583216, + "learning_rate": 1.996999069549025e-05, + "loss": 0.7489269971847534, + "step": 418 + }, + { + "epoch": 0.14766519823788546, + "grad_norm": 1.4327273961807123, + "learning_rate": 1.9969537961367423e-05, + "loss": 0.7362021207809448, + "step": 419 + }, + { + "epoch": 0.14801762114537445, + "grad_norm": 1.3763810595433905, + "learning_rate": 1.996908184289673e-05, + "loss": 0.7596213221549988, + "step": 420 + }, + { + "epoch": 0.14837004405286344, + "grad_norm": 1.3357573192960268, + "learning_rate": 1.9968622340232993e-05, + "loss": 0.7739163637161255, + "step": 421 + }, + { + "epoch": 0.14872246696035243, + "grad_norm": 1.2890109075687697, + "learning_rate": 1.9968159453532215e-05, + "loss": 0.9059790372848511, + "step": 422 + }, + { + "epoch": 0.1490748898678414, + "grad_norm": 1.4830814966077062, + "learning_rate": 1.9967693182951516e-05, + "loss": 0.7298871278762817, + "step": 423 + }, + { + "epoch": 0.1494273127753304, + "grad_norm": 1.3303231094936145, + "learning_rate": 1.9967223528649194e-05, + "loss": 0.7218194007873535, + "step": 424 + }, + { + "epoch": 0.14977973568281938, + "grad_norm": 1.3738677080017252, + "learning_rate": 1.996675049078467e-05, + "loss": 0.8031259179115295, + "step": 425 + }, + { + "epoch": 0.15013215859030837, + "grad_norm": 1.402915539690338, + "learning_rate": 1.9966274069518533e-05, + "loss": 0.8583194613456726, + "step": 426 + }, + { + "epoch": 0.15048458149779737, + "grad_norm": 1.5081794718854693, + "learning_rate": 1.9965794265012514e-05, + "loss": 0.7829155921936035, + "step": 427 + }, + { + "epoch": 0.15083700440528636, + "grad_norm": 1.3040065928659967, + "learning_rate": 1.9965311077429484e-05, + "loss": 0.709203839302063, + "step": 428 + }, + { + "epoch": 0.15118942731277532, + "grad_norm": 1.324153309243564, + "learning_rate": 1.996482450693348e-05, + "loss": 0.7515710592269897, + "step": 429 + }, + { + "epoch": 0.1515418502202643, + "grad_norm": 1.5966034920450463, + "learning_rate": 1.9964334553689674e-05, + "loss": 0.8552615642547607, + "step": 430 + }, + { + "epoch": 0.1518942731277533, + "grad_norm": 1.3833039246024212, + "learning_rate": 1.9963841217864385e-05, + "loss": 0.7946224808692932, + "step": 431 + }, + { + "epoch": 0.1522466960352423, + "grad_norm": 1.351342046961, + "learning_rate": 1.9963344499625087e-05, + "loss": 0.7117756605148315, + "step": 432 + }, + { + "epoch": 0.1525991189427313, + "grad_norm": 1.5677032677150589, + "learning_rate": 1.9962844399140405e-05, + "loss": 0.8892849683761597, + "step": 433 + }, + { + "epoch": 0.15295154185022025, + "grad_norm": 1.6682742006947457, + "learning_rate": 1.9962340916580105e-05, + "loss": 0.9037783145904541, + "step": 434 + }, + { + "epoch": 0.15330396475770924, + "grad_norm": 1.3178590359087465, + "learning_rate": 1.9961834052115104e-05, + "loss": 0.7419179677963257, + "step": 435 + }, + { + "epoch": 0.15365638766519824, + "grad_norm": 1.500659178246394, + "learning_rate": 1.9961323805917464e-05, + "loss": 0.847285270690918, + "step": 436 + }, + { + "epoch": 0.15400881057268723, + "grad_norm": 1.520891708486689, + "learning_rate": 1.99608101781604e-05, + "loss": 0.793263852596283, + "step": 437 + }, + { + "epoch": 0.15436123348017622, + "grad_norm": 1.2927327484478677, + "learning_rate": 1.9960293169018276e-05, + "loss": 0.6600923538208008, + "step": 438 + }, + { + "epoch": 0.1547136563876652, + "grad_norm": 1.178823428760428, + "learning_rate": 1.9959772778666592e-05, + "loss": 0.7642164826393127, + "step": 439 + }, + { + "epoch": 0.15506607929515417, + "grad_norm": 1.4230767051116806, + "learning_rate": 1.995924900728201e-05, + "loss": 0.897221565246582, + "step": 440 + }, + { + "epoch": 0.15541850220264317, + "grad_norm": 1.3912415328195475, + "learning_rate": 1.9958721855042338e-05, + "loss": 0.830953061580658, + "step": 441 + }, + { + "epoch": 0.15577092511013216, + "grad_norm": 1.3683790024985447, + "learning_rate": 1.995819132212652e-05, + "loss": 0.7514863014221191, + "step": 442 + }, + { + "epoch": 0.15612334801762115, + "grad_norm": 1.3179910502987273, + "learning_rate": 1.995765740871466e-05, + "loss": 0.7039257287979126, + "step": 443 + }, + { + "epoch": 0.15647577092511014, + "grad_norm": 1.5017230130600239, + "learning_rate": 1.9957120114988e-05, + "loss": 0.810503363609314, + "step": 444 + }, + { + "epoch": 0.1568281938325991, + "grad_norm": 1.4050071397488821, + "learning_rate": 1.9956579441128942e-05, + "loss": 0.616968035697937, + "step": 445 + }, + { + "epoch": 0.1571806167400881, + "grad_norm": 1.3149075420166694, + "learning_rate": 1.9956035387321024e-05, + "loss": 0.7008740901947021, + "step": 446 + }, + { + "epoch": 0.1575330396475771, + "grad_norm": 1.4992101173925434, + "learning_rate": 1.995548795374893e-05, + "loss": 0.847025454044342, + "step": 447 + }, + { + "epoch": 0.15788546255506608, + "grad_norm": 1.3763555067673139, + "learning_rate": 1.9954937140598506e-05, + "loss": 0.7788053750991821, + "step": 448 + }, + { + "epoch": 0.15823788546255507, + "grad_norm": 1.301728118921247, + "learning_rate": 1.9954382948056735e-05, + "loss": 0.7592896819114685, + "step": 449 + }, + { + "epoch": 0.15859030837004406, + "grad_norm": 1.6001158206313053, + "learning_rate": 1.995382537631174e-05, + "loss": 0.9458491802215576, + "step": 450 + }, + { + "epoch": 0.15894273127753303, + "grad_norm": 1.3218132869761372, + "learning_rate": 1.9953264425552804e-05, + "loss": 0.8069632053375244, + "step": 451 + }, + { + "epoch": 0.15929515418502202, + "grad_norm": 1.316918406992957, + "learning_rate": 1.9952700095970357e-05, + "loss": 0.7876379489898682, + "step": 452 + }, + { + "epoch": 0.159647577092511, + "grad_norm": 1.5440089355741875, + "learning_rate": 1.9952132387755965e-05, + "loss": 0.796333909034729, + "step": 453 + }, + { + "epoch": 0.16, + "grad_norm": 1.243828269503452, + "learning_rate": 1.9951561301102348e-05, + "loss": 0.7171634435653687, + "step": 454 + }, + { + "epoch": 0.160352422907489, + "grad_norm": 1.429835470120866, + "learning_rate": 1.9950986836203374e-05, + "loss": 0.8312792778015137, + "step": 455 + }, + { + "epoch": 0.160704845814978, + "grad_norm": 1.4333167021702193, + "learning_rate": 1.995040899325406e-05, + "loss": 0.7496857643127441, + "step": 456 + }, + { + "epoch": 0.16105726872246695, + "grad_norm": 1.2513531381670333, + "learning_rate": 1.9949827772450555e-05, + "loss": 0.89504075050354, + "step": 457 + }, + { + "epoch": 0.16140969162995594, + "grad_norm": 1.5536951579594835, + "learning_rate": 1.9949243173990172e-05, + "loss": 0.7580761313438416, + "step": 458 + }, + { + "epoch": 0.16176211453744493, + "grad_norm": 1.6782383396512721, + "learning_rate": 1.9948655198071365e-05, + "loss": 0.7826676368713379, + "step": 459 + }, + { + "epoch": 0.16211453744493393, + "grad_norm": 1.5979456835427475, + "learning_rate": 1.9948063844893733e-05, + "loss": 0.7591372728347778, + "step": 460 + }, + { + "epoch": 0.16246696035242292, + "grad_norm": 1.394749193132719, + "learning_rate": 1.994746911465802e-05, + "loss": 0.7366905808448792, + "step": 461 + }, + { + "epoch": 0.16281938325991188, + "grad_norm": 1.2449236570155473, + "learning_rate": 1.9946871007566116e-05, + "loss": 0.7152266502380371, + "step": 462 + }, + { + "epoch": 0.16317180616740087, + "grad_norm": 1.475247855733958, + "learning_rate": 1.994626952382107e-05, + "loss": 0.8411930799484253, + "step": 463 + }, + { + "epoch": 0.16352422907488987, + "grad_norm": 1.1709525471997975, + "learning_rate": 1.9945664663627054e-05, + "loss": 0.6689857244491577, + "step": 464 + }, + { + "epoch": 0.16387665198237886, + "grad_norm": 1.3007920668059838, + "learning_rate": 1.9945056427189408e-05, + "loss": 0.6474499106407166, + "step": 465 + }, + { + "epoch": 0.16422907488986785, + "grad_norm": 1.397646475804827, + "learning_rate": 1.9944444814714604e-05, + "loss": 0.7861372232437134, + "step": 466 + }, + { + "epoch": 0.16458149779735684, + "grad_norm": 1.4072541980161448, + "learning_rate": 1.9943829826410273e-05, + "loss": 0.8301665186882019, + "step": 467 + }, + { + "epoch": 0.1649339207048458, + "grad_norm": 1.1473159016242473, + "learning_rate": 1.9943211462485176e-05, + "loss": 0.661811888217926, + "step": 468 + }, + { + "epoch": 0.1652863436123348, + "grad_norm": 1.4009911983471504, + "learning_rate": 1.9942589723149233e-05, + "loss": 0.7768537402153015, + "step": 469 + }, + { + "epoch": 0.1656387665198238, + "grad_norm": 1.209922489625636, + "learning_rate": 1.9941964608613503e-05, + "loss": 0.6139112710952759, + "step": 470 + }, + { + "epoch": 0.16599118942731278, + "grad_norm": 1.3814257371396368, + "learning_rate": 1.9941336119090193e-05, + "loss": 0.8284693956375122, + "step": 471 + }, + { + "epoch": 0.16634361233480177, + "grad_norm": 1.2594577624707568, + "learning_rate": 1.9940704254792655e-05, + "loss": 0.7281739711761475, + "step": 472 + }, + { + "epoch": 0.16669603524229074, + "grad_norm": 1.4773463672265492, + "learning_rate": 1.994006901593539e-05, + "loss": 0.687767744064331, + "step": 473 + }, + { + "epoch": 0.16704845814977973, + "grad_norm": 1.3067539084660165, + "learning_rate": 1.9939430402734046e-05, + "loss": 0.7553595304489136, + "step": 474 + }, + { + "epoch": 0.16740088105726872, + "grad_norm": 1.5537103296420662, + "learning_rate": 1.99387884154054e-05, + "loss": 0.9263294339179993, + "step": 475 + }, + { + "epoch": 0.1677533039647577, + "grad_norm": 1.5514792381885942, + "learning_rate": 1.9938143054167397e-05, + "loss": 0.7014337182044983, + "step": 476 + }, + { + "epoch": 0.1681057268722467, + "grad_norm": 1.1598559513797833, + "learning_rate": 1.9937494319239112e-05, + "loss": 0.6454538106918335, + "step": 477 + }, + { + "epoch": 0.1684581497797357, + "grad_norm": 1.3402764899565285, + "learning_rate": 1.9936842210840775e-05, + "loss": 0.7792352437973022, + "step": 478 + }, + { + "epoch": 0.16881057268722466, + "grad_norm": 1.481603380133959, + "learning_rate": 1.9936186729193753e-05, + "loss": 0.8773127794265747, + "step": 479 + }, + { + "epoch": 0.16916299559471365, + "grad_norm": 1.3472965431143242, + "learning_rate": 1.993552787452056e-05, + "loss": 0.892439603805542, + "step": 480 + }, + { + "epoch": 0.16951541850220264, + "grad_norm": 1.5839752051025837, + "learning_rate": 1.993486564704486e-05, + "loss": 0.89835524559021, + "step": 481 + }, + { + "epoch": 0.16986784140969163, + "grad_norm": 1.4593777249036533, + "learning_rate": 1.9934200046991453e-05, + "loss": 0.8013701438903809, + "step": 482 + }, + { + "epoch": 0.17022026431718063, + "grad_norm": 1.5168797838116639, + "learning_rate": 1.9933531074586296e-05, + "loss": 0.8086763620376587, + "step": 483 + }, + { + "epoch": 0.17057268722466962, + "grad_norm": 1.4399310447978144, + "learning_rate": 1.9932858730056486e-05, + "loss": 0.7736518383026123, + "step": 484 + }, + { + "epoch": 0.17092511013215858, + "grad_norm": 1.2982542574143365, + "learning_rate": 1.9932183013630257e-05, + "loss": 0.6247539520263672, + "step": 485 + }, + { + "epoch": 0.17127753303964757, + "grad_norm": 1.519445958865324, + "learning_rate": 1.9931503925536996e-05, + "loss": 0.7172006368637085, + "step": 486 + }, + { + "epoch": 0.17162995594713656, + "grad_norm": 1.3043787656359138, + "learning_rate": 1.993082146600723e-05, + "loss": 0.7854465246200562, + "step": 487 + }, + { + "epoch": 0.17198237885462556, + "grad_norm": 1.2038371426907561, + "learning_rate": 1.9930135635272637e-05, + "loss": 0.7018419504165649, + "step": 488 + }, + { + "epoch": 0.17233480176211455, + "grad_norm": 1.2578522146284077, + "learning_rate": 1.9929446433566033e-05, + "loss": 0.783660352230072, + "step": 489 + }, + { + "epoch": 0.1726872246696035, + "grad_norm": 1.4288043068768257, + "learning_rate": 1.992875386112138e-05, + "loss": 1.0166207551956177, + "step": 490 + }, + { + "epoch": 0.1730396475770925, + "grad_norm": 1.5208280960226344, + "learning_rate": 1.9928057918173786e-05, + "loss": 0.7692895531654358, + "step": 491 + }, + { + "epoch": 0.1733920704845815, + "grad_norm": 1.3733404774184526, + "learning_rate": 1.9927358604959503e-05, + "loss": 0.8005259037017822, + "step": 492 + }, + { + "epoch": 0.1737444933920705, + "grad_norm": 1.3189354109245792, + "learning_rate": 1.9926655921715924e-05, + "loss": 0.6780292987823486, + "step": 493 + }, + { + "epoch": 0.17409691629955948, + "grad_norm": 1.2272422506889333, + "learning_rate": 1.9925949868681587e-05, + "loss": 0.6501175165176392, + "step": 494 + }, + { + "epoch": 0.17444933920704847, + "grad_norm": 1.3095934443108421, + "learning_rate": 1.9925240446096176e-05, + "loss": 0.781839907169342, + "step": 495 + }, + { + "epoch": 0.17480176211453743, + "grad_norm": 1.4508599784840917, + "learning_rate": 1.992452765420052e-05, + "loss": 0.7617994546890259, + "step": 496 + }, + { + "epoch": 0.17515418502202643, + "grad_norm": 1.2324738440312524, + "learning_rate": 1.992381149323659e-05, + "loss": 0.8019097447395325, + "step": 497 + }, + { + "epoch": 0.17550660792951542, + "grad_norm": 1.3071824216187324, + "learning_rate": 1.9923091963447496e-05, + "loss": 0.7526847124099731, + "step": 498 + }, + { + "epoch": 0.1758590308370044, + "grad_norm": 1.340463358272731, + "learning_rate": 1.9922369065077497e-05, + "loss": 0.7101150751113892, + "step": 499 + }, + { + "epoch": 0.1762114537444934, + "grad_norm": 1.396850141714641, + "learning_rate": 1.9921642798372e-05, + "loss": 0.8519806861877441, + "step": 500 + }, + { + "epoch": 0.17656387665198237, + "grad_norm": 1.5427241760761283, + "learning_rate": 1.9920913163577542e-05, + "loss": 0.774759829044342, + "step": 501 + }, + { + "epoch": 0.17691629955947136, + "grad_norm": 1.4501760642130928, + "learning_rate": 1.992018016094182e-05, + "loss": 0.8597595691680908, + "step": 502 + }, + { + "epoch": 0.17726872246696035, + "grad_norm": 1.6336800938277667, + "learning_rate": 1.9919443790713658e-05, + "loss": 0.7023826241493225, + "step": 503 + }, + { + "epoch": 0.17762114537444934, + "grad_norm": 1.8758125980343456, + "learning_rate": 1.991870405314303e-05, + "loss": 0.8290892839431763, + "step": 504 + }, + { + "epoch": 0.17797356828193833, + "grad_norm": 1.368620384992611, + "learning_rate": 1.9917960948481062e-05, + "loss": 0.9240517020225525, + "step": 505 + }, + { + "epoch": 0.17832599118942732, + "grad_norm": 1.4203507781601712, + "learning_rate": 1.9917214476980012e-05, + "loss": 0.8247153759002686, + "step": 506 + }, + { + "epoch": 0.1786784140969163, + "grad_norm": 1.5364946844029868, + "learning_rate": 1.991646463889328e-05, + "loss": 0.9101368188858032, + "step": 507 + }, + { + "epoch": 0.17903083700440528, + "grad_norm": 1.3883082747026767, + "learning_rate": 1.9915711434475416e-05, + "loss": 0.7688114643096924, + "step": 508 + }, + { + "epoch": 0.17938325991189427, + "grad_norm": 1.41173691792053, + "learning_rate": 1.9914954863982106e-05, + "loss": 0.820112943649292, + "step": 509 + }, + { + "epoch": 0.17973568281938326, + "grad_norm": 1.2372115494246672, + "learning_rate": 1.9914194927670186e-05, + "loss": 0.6393542289733887, + "step": 510 + }, + { + "epoch": 0.18008810572687226, + "grad_norm": 1.5514274082803117, + "learning_rate": 1.991343162579763e-05, + "loss": 0.9463154673576355, + "step": 511 + }, + { + "epoch": 0.18044052863436125, + "grad_norm": 1.2818287593652882, + "learning_rate": 1.9912664958623556e-05, + "loss": 0.9498215913772583, + "step": 512 + }, + { + "epoch": 0.1807929515418502, + "grad_norm": 1.3538150363158374, + "learning_rate": 1.991189492640822e-05, + "loss": 0.7659052014350891, + "step": 513 + }, + { + "epoch": 0.1811453744493392, + "grad_norm": 1.3014303918670855, + "learning_rate": 1.9911121529413028e-05, + "loss": 0.9946317672729492, + "step": 514 + }, + { + "epoch": 0.1814977973568282, + "grad_norm": 1.2888096801517381, + "learning_rate": 1.991034476790052e-05, + "loss": 0.762086033821106, + "step": 515 + }, + { + "epoch": 0.18185022026431719, + "grad_norm": 1.2685969775930512, + "learning_rate": 1.990956464213438e-05, + "loss": 0.7507720589637756, + "step": 516 + }, + { + "epoch": 0.18220264317180618, + "grad_norm": 1.2567492686992259, + "learning_rate": 1.990878115237945e-05, + "loss": 0.7859716415405273, + "step": 517 + }, + { + "epoch": 0.18255506607929514, + "grad_norm": 1.3199744761398897, + "learning_rate": 1.9907994298901688e-05, + "loss": 0.8585234880447388, + "step": 518 + }, + { + "epoch": 0.18290748898678413, + "grad_norm": 1.2014345702103446, + "learning_rate": 1.990720408196821e-05, + "loss": 0.8569823503494263, + "step": 519 + }, + { + "epoch": 0.18325991189427313, + "grad_norm": 1.4066812868889107, + "learning_rate": 1.990641050184727e-05, + "loss": 0.8297367095947266, + "step": 520 + }, + { + "epoch": 0.18361233480176212, + "grad_norm": 1.4158335601181062, + "learning_rate": 1.9905613558808262e-05, + "loss": 0.7918041348457336, + "step": 521 + }, + { + "epoch": 0.1839647577092511, + "grad_norm": 1.3066639133280875, + "learning_rate": 1.9904813253121727e-05, + "loss": 0.8322931528091431, + "step": 522 + }, + { + "epoch": 0.1843171806167401, + "grad_norm": 1.600997340162295, + "learning_rate": 1.990400958505934e-05, + "loss": 0.6822292804718018, + "step": 523 + }, + { + "epoch": 0.18466960352422906, + "grad_norm": 1.344951810567012, + "learning_rate": 1.9903202554893925e-05, + "loss": 0.8989835977554321, + "step": 524 + }, + { + "epoch": 0.18502202643171806, + "grad_norm": 1.4644963211452282, + "learning_rate": 1.990239216289944e-05, + "loss": 0.671294093132019, + "step": 525 + }, + { + "epoch": 0.18537444933920705, + "grad_norm": 1.3104672306859468, + "learning_rate": 1.990157840935099e-05, + "loss": 0.9045379161834717, + "step": 526 + }, + { + "epoch": 0.18572687224669604, + "grad_norm": 1.2000125993399395, + "learning_rate": 1.990076129452482e-05, + "loss": 0.7117471694946289, + "step": 527 + }, + { + "epoch": 0.18607929515418503, + "grad_norm": 1.406356072194557, + "learning_rate": 1.9899940818698315e-05, + "loss": 0.890752911567688, + "step": 528 + }, + { + "epoch": 0.186431718061674, + "grad_norm": 1.3199977159633904, + "learning_rate": 1.9899116982149994e-05, + "loss": 0.7209222316741943, + "step": 529 + }, + { + "epoch": 0.186784140969163, + "grad_norm": 1.4346812218183875, + "learning_rate": 1.9898289785159534e-05, + "loss": 0.6912863254547119, + "step": 530 + }, + { + "epoch": 0.18713656387665198, + "grad_norm": 1.4271479463954384, + "learning_rate": 1.9897459228007736e-05, + "loss": 0.7060319185256958, + "step": 531 + }, + { + "epoch": 0.18748898678414097, + "grad_norm": 1.2685161281492263, + "learning_rate": 1.9896625310976553e-05, + "loss": 0.6975364685058594, + "step": 532 + }, + { + "epoch": 0.18784140969162996, + "grad_norm": 1.513572022269192, + "learning_rate": 1.989578803434907e-05, + "loss": 0.8576006293296814, + "step": 533 + }, + { + "epoch": 0.18819383259911895, + "grad_norm": 1.3324082350150075, + "learning_rate": 1.9894947398409516e-05, + "loss": 0.7182095646858215, + "step": 534 + }, + { + "epoch": 0.18854625550660792, + "grad_norm": 1.4808328503550712, + "learning_rate": 1.9894103403443265e-05, + "loss": 0.7546031475067139, + "step": 535 + }, + { + "epoch": 0.1888986784140969, + "grad_norm": 1.4412494852286755, + "learning_rate": 1.9893256049736824e-05, + "loss": 0.7083312273025513, + "step": 536 + }, + { + "epoch": 0.1892511013215859, + "grad_norm": 1.178231291011438, + "learning_rate": 1.9892405337577846e-05, + "loss": 0.5614915490150452, + "step": 537 + }, + { + "epoch": 0.1896035242290749, + "grad_norm": 1.440292679191453, + "learning_rate": 1.9891551267255114e-05, + "loss": 0.7647485733032227, + "step": 538 + }, + { + "epoch": 0.18995594713656389, + "grad_norm": 1.2459192275692494, + "learning_rate": 1.9890693839058566e-05, + "loss": 0.776042103767395, + "step": 539 + }, + { + "epoch": 0.19030837004405288, + "grad_norm": 1.4553672353845373, + "learning_rate": 1.9889833053279268e-05, + "loss": 0.7694810628890991, + "step": 540 + }, + { + "epoch": 0.19066079295154184, + "grad_norm": 1.3521577159056863, + "learning_rate": 1.9888968910209433e-05, + "loss": 0.6935995817184448, + "step": 541 + }, + { + "epoch": 0.19101321585903083, + "grad_norm": 1.7084028073476007, + "learning_rate": 1.988810141014241e-05, + "loss": 0.7538039088249207, + "step": 542 + }, + { + "epoch": 0.19136563876651982, + "grad_norm": 1.305324041919721, + "learning_rate": 1.9887230553372686e-05, + "loss": 0.8149158954620361, + "step": 543 + }, + { + "epoch": 0.19171806167400882, + "grad_norm": 1.6378712299065388, + "learning_rate": 1.988635634019589e-05, + "loss": 0.7776780128479004, + "step": 544 + }, + { + "epoch": 0.1920704845814978, + "grad_norm": 1.48919568324374, + "learning_rate": 1.9885478770908793e-05, + "loss": 0.8527307510375977, + "step": 545 + }, + { + "epoch": 0.19242290748898677, + "grad_norm": 1.1804269388923583, + "learning_rate": 1.98845978458093e-05, + "loss": 0.7239484190940857, + "step": 546 + }, + { + "epoch": 0.19277533039647576, + "grad_norm": 1.4096556213691402, + "learning_rate": 1.9883713565196462e-05, + "loss": 0.6937836408615112, + "step": 547 + }, + { + "epoch": 0.19312775330396476, + "grad_norm": 1.3705456771921078, + "learning_rate": 1.9882825929370456e-05, + "loss": 0.8567923903465271, + "step": 548 + }, + { + "epoch": 0.19348017621145375, + "grad_norm": 1.2851787163283013, + "learning_rate": 1.9881934938632615e-05, + "loss": 0.7948861122131348, + "step": 549 + }, + { + "epoch": 0.19383259911894274, + "grad_norm": 1.2883171824741761, + "learning_rate": 1.9881040593285398e-05, + "loss": 0.6808983087539673, + "step": 550 + }, + { + "epoch": 0.19418502202643173, + "grad_norm": 1.349988333670182, + "learning_rate": 1.9880142893632412e-05, + "loss": 0.9089908599853516, + "step": 551 + }, + { + "epoch": 0.1945374449339207, + "grad_norm": 1.373721278775904, + "learning_rate": 1.9879241839978393e-05, + "loss": 0.7947918176651001, + "step": 552 + }, + { + "epoch": 0.1948898678414097, + "grad_norm": 1.1679807856929723, + "learning_rate": 1.9878337432629224e-05, + "loss": 0.880418598651886, + "step": 553 + }, + { + "epoch": 0.19524229074889868, + "grad_norm": 1.4208435552970164, + "learning_rate": 1.9877429671891917e-05, + "loss": 0.8845832347869873, + "step": 554 + }, + { + "epoch": 0.19559471365638767, + "grad_norm": 1.4905396338040395, + "learning_rate": 1.9876518558074638e-05, + "loss": 0.7635341286659241, + "step": 555 + }, + { + "epoch": 0.19594713656387666, + "grad_norm": 1.3799865187677636, + "learning_rate": 1.9875604091486678e-05, + "loss": 0.9301069974899292, + "step": 556 + }, + { + "epoch": 0.19629955947136563, + "grad_norm": 1.255573262915276, + "learning_rate": 1.9874686272438467e-05, + "loss": 0.8788589239120483, + "step": 557 + }, + { + "epoch": 0.19665198237885462, + "grad_norm": 1.2418539833380446, + "learning_rate": 1.987376510124158e-05, + "loss": 0.7452565431594849, + "step": 558 + }, + { + "epoch": 0.1970044052863436, + "grad_norm": 1.5278788646328887, + "learning_rate": 1.9872840578208722e-05, + "loss": 0.819628119468689, + "step": 559 + }, + { + "epoch": 0.1973568281938326, + "grad_norm": 1.4844496784402743, + "learning_rate": 1.9871912703653744e-05, + "loss": 0.7807571291923523, + "step": 560 + }, + { + "epoch": 0.1977092511013216, + "grad_norm": 1.2611555469759475, + "learning_rate": 1.9870981477891626e-05, + "loss": 0.7091392278671265, + "step": 561 + }, + { + "epoch": 0.19806167400881058, + "grad_norm": 1.2433638561435678, + "learning_rate": 1.9870046901238496e-05, + "loss": 0.8174105882644653, + "step": 562 + }, + { + "epoch": 0.19841409691629955, + "grad_norm": 1.2352337461151273, + "learning_rate": 1.9869108974011607e-05, + "loss": 0.696865439414978, + "step": 563 + }, + { + "epoch": 0.19876651982378854, + "grad_norm": 1.4794543945089762, + "learning_rate": 1.986816769652936e-05, + "loss": 0.914303183555603, + "step": 564 + }, + { + "epoch": 0.19911894273127753, + "grad_norm": 1.2869835497381619, + "learning_rate": 1.986722306911129e-05, + "loss": 0.8397856950759888, + "step": 565 + }, + { + "epoch": 0.19947136563876652, + "grad_norm": 1.216082157504287, + "learning_rate": 1.9866275092078066e-05, + "loss": 0.7206380367279053, + "step": 566 + }, + { + "epoch": 0.19982378854625552, + "grad_norm": 1.3259146958291776, + "learning_rate": 1.98653237657515e-05, + "loss": 0.7017316818237305, + "step": 567 + }, + { + "epoch": 0.2001762114537445, + "grad_norm": 1.6365100661152858, + "learning_rate": 1.9864369090454538e-05, + "loss": 0.8797772526741028, + "step": 568 + }, + { + "epoch": 0.20052863436123347, + "grad_norm": 1.3948984288943356, + "learning_rate": 1.9863411066511257e-05, + "loss": 0.6643391847610474, + "step": 569 + }, + { + "epoch": 0.20088105726872246, + "grad_norm": 1.4631143705399865, + "learning_rate": 1.9862449694246878e-05, + "loss": 0.8662393093109131, + "step": 570 + }, + { + "epoch": 0.20123348017621145, + "grad_norm": 1.4103722629610054, + "learning_rate": 1.9861484973987762e-05, + "loss": 0.7766140699386597, + "step": 571 + }, + { + "epoch": 0.20158590308370045, + "grad_norm": 1.4422501075340284, + "learning_rate": 1.9860516906061397e-05, + "loss": 0.8582239151000977, + "step": 572 + }, + { + "epoch": 0.20193832599118944, + "grad_norm": 1.2359229208879663, + "learning_rate": 1.9859545490796414e-05, + "loss": 0.5838385820388794, + "step": 573 + }, + { + "epoch": 0.2022907488986784, + "grad_norm": 1.4256083108556754, + "learning_rate": 1.9858570728522573e-05, + "loss": 0.6715164184570312, + "step": 574 + }, + { + "epoch": 0.2026431718061674, + "grad_norm": 1.604413564730453, + "learning_rate": 1.9857592619570783e-05, + "loss": 0.7665218114852905, + "step": 575 + }, + { + "epoch": 0.20299559471365639, + "grad_norm": 1.3992633216102752, + "learning_rate": 1.985661116427308e-05, + "loss": 0.8060458898544312, + "step": 576 + }, + { + "epoch": 0.20334801762114538, + "grad_norm": 1.3647027340900928, + "learning_rate": 1.985562636296264e-05, + "loss": 0.8354060649871826, + "step": 577 + }, + { + "epoch": 0.20370044052863437, + "grad_norm": 1.61178503454425, + "learning_rate": 1.985463821597376e-05, + "loss": 0.8814351558685303, + "step": 578 + }, + { + "epoch": 0.20405286343612336, + "grad_norm": 1.3581614903846795, + "learning_rate": 1.9853646723641895e-05, + "loss": 0.9068918228149414, + "step": 579 + }, + { + "epoch": 0.20440528634361232, + "grad_norm": 1.4217775001953692, + "learning_rate": 1.9852651886303624e-05, + "loss": 0.7671997547149658, + "step": 580 + }, + { + "epoch": 0.20475770925110132, + "grad_norm": 1.2987191699893856, + "learning_rate": 1.9851653704296664e-05, + "loss": 0.7906886339187622, + "step": 581 + }, + { + "epoch": 0.2051101321585903, + "grad_norm": 1.4550942850887114, + "learning_rate": 1.985065217795987e-05, + "loss": 0.8424232006072998, + "step": 582 + }, + { + "epoch": 0.2054625550660793, + "grad_norm": 1.2767538498679667, + "learning_rate": 1.984964730763322e-05, + "loss": 0.8335819244384766, + "step": 583 + }, + { + "epoch": 0.2058149779735683, + "grad_norm": 1.2913652769028938, + "learning_rate": 1.9848639093657844e-05, + "loss": 0.8340694308280945, + "step": 584 + }, + { + "epoch": 0.20616740088105726, + "grad_norm": 1.3161255240413319, + "learning_rate": 1.9847627536376e-05, + "loss": 0.9228274822235107, + "step": 585 + }, + { + "epoch": 0.20651982378854625, + "grad_norm": 1.548405161064148, + "learning_rate": 1.984661263613107e-05, + "loss": 0.7843449115753174, + "step": 586 + }, + { + "epoch": 0.20687224669603524, + "grad_norm": 1.3039537503613003, + "learning_rate": 1.9845594393267594e-05, + "loss": 0.7411990165710449, + "step": 587 + }, + { + "epoch": 0.20722466960352423, + "grad_norm": 1.3644443695047568, + "learning_rate": 1.9844572808131228e-05, + "loss": 0.7520540356636047, + "step": 588 + }, + { + "epoch": 0.20757709251101322, + "grad_norm": 1.2894133104841217, + "learning_rate": 1.9843547881068763e-05, + "loss": 0.795365571975708, + "step": 589 + }, + { + "epoch": 0.20792951541850221, + "grad_norm": 1.280356655308606, + "learning_rate": 1.984251961242814e-05, + "loss": 0.8415528535842896, + "step": 590 + }, + { + "epoch": 0.20828193832599118, + "grad_norm": 1.4654647998731167, + "learning_rate": 1.9841488002558416e-05, + "loss": 0.8555570244789124, + "step": 591 + }, + { + "epoch": 0.20863436123348017, + "grad_norm": 1.314593410908928, + "learning_rate": 1.9840453051809792e-05, + "loss": 0.8214600086212158, + "step": 592 + }, + { + "epoch": 0.20898678414096916, + "grad_norm": 1.2598900623176714, + "learning_rate": 1.9839414760533607e-05, + "loss": 0.7746415138244629, + "step": 593 + }, + { + "epoch": 0.20933920704845815, + "grad_norm": 1.6285440778435663, + "learning_rate": 1.9838373129082325e-05, + "loss": 1.0861419439315796, + "step": 594 + }, + { + "epoch": 0.20969162995594715, + "grad_norm": 1.327372383451943, + "learning_rate": 1.9837328157809547e-05, + "loss": 0.7530953884124756, + "step": 595 + }, + { + "epoch": 0.21004405286343614, + "grad_norm": 1.420023169388647, + "learning_rate": 1.9836279847070004e-05, + "loss": 0.8811959624290466, + "step": 596 + }, + { + "epoch": 0.2103964757709251, + "grad_norm": 1.2274254083036087, + "learning_rate": 1.9835228197219573e-05, + "loss": 0.7956523299217224, + "step": 597 + }, + { + "epoch": 0.2107488986784141, + "grad_norm": 1.306015861681406, + "learning_rate": 1.9834173208615253e-05, + "loss": 0.8710414171218872, + "step": 598 + }, + { + "epoch": 0.21110132158590308, + "grad_norm": 1.303850147164254, + "learning_rate": 1.983311488161518e-05, + "loss": 0.9057297706604004, + "step": 599 + }, + { + "epoch": 0.21145374449339208, + "grad_norm": 1.2517049783711822, + "learning_rate": 1.983205321657862e-05, + "loss": 0.7531988024711609, + "step": 600 + }, + { + "epoch": 0.21180616740088107, + "grad_norm": 1.392455719061042, + "learning_rate": 1.983098821386598e-05, + "loss": 0.6508063077926636, + "step": 601 + }, + { + "epoch": 0.21215859030837003, + "grad_norm": 1.238668234857589, + "learning_rate": 1.9829919873838796e-05, + "loss": 0.7267025709152222, + "step": 602 + }, + { + "epoch": 0.21251101321585902, + "grad_norm": 1.2232739877442529, + "learning_rate": 1.9828848196859727e-05, + "loss": 0.6930510997772217, + "step": 603 + }, + { + "epoch": 0.21286343612334802, + "grad_norm": 1.4104259448916805, + "learning_rate": 1.9827773183292583e-05, + "loss": 0.7613120079040527, + "step": 604 + }, + { + "epoch": 0.213215859030837, + "grad_norm": 1.2586328753898472, + "learning_rate": 1.9826694833502295e-05, + "loss": 0.763299822807312, + "step": 605 + }, + { + "epoch": 0.213568281938326, + "grad_norm": 1.4431352363644856, + "learning_rate": 1.9825613147854928e-05, + "loss": 0.7599194049835205, + "step": 606 + }, + { + "epoch": 0.213920704845815, + "grad_norm": 1.3487971590690426, + "learning_rate": 1.9824528126717687e-05, + "loss": 0.869399905204773, + "step": 607 + }, + { + "epoch": 0.21427312775330395, + "grad_norm": 1.3853231700631432, + "learning_rate": 1.9823439770458893e-05, + "loss": 0.733409583568573, + "step": 608 + }, + { + "epoch": 0.21462555066079295, + "grad_norm": 1.2766333009964275, + "learning_rate": 1.9822348079448014e-05, + "loss": 0.8302386999130249, + "step": 609 + }, + { + "epoch": 0.21497797356828194, + "grad_norm": 1.1872454682531661, + "learning_rate": 1.9821253054055645e-05, + "loss": 0.8234561681747437, + "step": 610 + }, + { + "epoch": 0.21533039647577093, + "grad_norm": 1.336729476582052, + "learning_rate": 1.9820154694653514e-05, + "loss": 0.81988525390625, + "step": 611 + }, + { + "epoch": 0.21568281938325992, + "grad_norm": 1.1619766622665528, + "learning_rate": 1.9819053001614478e-05, + "loss": 0.6437678933143616, + "step": 612 + }, + { + "epoch": 0.21603524229074889, + "grad_norm": 1.398835884660331, + "learning_rate": 1.9817947975312527e-05, + "loss": 0.8256562948226929, + "step": 613 + }, + { + "epoch": 0.21638766519823788, + "grad_norm": 1.4423824320045469, + "learning_rate": 1.9816839616122787e-05, + "loss": 0.8204725980758667, + "step": 614 + }, + { + "epoch": 0.21674008810572687, + "grad_norm": 1.4648639859051293, + "learning_rate": 1.9815727924421507e-05, + "loss": 0.7492775917053223, + "step": 615 + }, + { + "epoch": 0.21709251101321586, + "grad_norm": 1.4585481343848268, + "learning_rate": 1.9814612900586075e-05, + "loss": 0.629736065864563, + "step": 616 + }, + { + "epoch": 0.21744493392070485, + "grad_norm": 1.3908853161597456, + "learning_rate": 1.9813494544995e-05, + "loss": 0.7974159717559814, + "step": 617 + }, + { + "epoch": 0.21779735682819384, + "grad_norm": 1.4158223772493663, + "learning_rate": 1.981237285802794e-05, + "loss": 0.8367668390274048, + "step": 618 + }, + { + "epoch": 0.2181497797356828, + "grad_norm": 1.2051753792883582, + "learning_rate": 1.9811247840065667e-05, + "loss": 0.7942521572113037, + "step": 619 + }, + { + "epoch": 0.2185022026431718, + "grad_norm": 1.3827277102573685, + "learning_rate": 1.981011949149009e-05, + "loss": 0.7863545417785645, + "step": 620 + }, + { + "epoch": 0.2188546255506608, + "grad_norm": 1.3809343727942922, + "learning_rate": 1.9808987812684247e-05, + "loss": 0.8667019605636597, + "step": 621 + }, + { + "epoch": 0.21920704845814978, + "grad_norm": 1.5738475739563456, + "learning_rate": 1.9807852804032306e-05, + "loss": 0.8555353283882141, + "step": 622 + }, + { + "epoch": 0.21955947136563878, + "grad_norm": 1.244926951925701, + "learning_rate": 1.9806714465919573e-05, + "loss": 0.8170013427734375, + "step": 623 + }, + { + "epoch": 0.21991189427312777, + "grad_norm": 1.331256668600172, + "learning_rate": 1.9805572798732475e-05, + "loss": 0.9277342557907104, + "step": 624 + }, + { + "epoch": 0.22026431718061673, + "grad_norm": 1.4090219105247375, + "learning_rate": 1.980442780285857e-05, + "loss": 0.6536964178085327, + "step": 625 + }, + { + "epoch": 0.22061674008810572, + "grad_norm": 1.4088256669280743, + "learning_rate": 1.980327947868655e-05, + "loss": 0.7197799682617188, + "step": 626 + }, + { + "epoch": 0.22096916299559471, + "grad_norm": 1.1381025512945977, + "learning_rate": 1.980212782660624e-05, + "loss": 0.7558401823043823, + "step": 627 + }, + { + "epoch": 0.2213215859030837, + "grad_norm": 1.4031284519802554, + "learning_rate": 1.9800972847008586e-05, + "loss": 0.7918291091918945, + "step": 628 + }, + { + "epoch": 0.2216740088105727, + "grad_norm": 1.4810910878326864, + "learning_rate": 1.979981454028567e-05, + "loss": 0.7159492373466492, + "step": 629 + }, + { + "epoch": 0.22202643171806166, + "grad_norm": 1.5418605472416471, + "learning_rate": 1.9798652906830694e-05, + "loss": 0.854686439037323, + "step": 630 + }, + { + "epoch": 0.22237885462555065, + "grad_norm": 1.6329149097762432, + "learning_rate": 1.9797487947038007e-05, + "loss": 0.736785888671875, + "step": 631 + }, + { + "epoch": 0.22273127753303965, + "grad_norm": 1.2749674694710476, + "learning_rate": 1.9796319661303065e-05, + "loss": 0.7092996835708618, + "step": 632 + }, + { + "epoch": 0.22308370044052864, + "grad_norm": 1.4592836621170417, + "learning_rate": 1.9795148050022477e-05, + "loss": 0.8890455961227417, + "step": 633 + }, + { + "epoch": 0.22343612334801763, + "grad_norm": 1.2618947600836363, + "learning_rate": 1.979397311359396e-05, + "loss": 0.7476855516433716, + "step": 634 + }, + { + "epoch": 0.22378854625550662, + "grad_norm": 1.4307363207113668, + "learning_rate": 1.979279485241637e-05, + "loss": 0.7810029983520508, + "step": 635 + }, + { + "epoch": 0.22414096916299558, + "grad_norm": 1.2070666788938549, + "learning_rate": 1.9791613266889688e-05, + "loss": 0.6679891347885132, + "step": 636 + }, + { + "epoch": 0.22449339207048458, + "grad_norm": 1.6320710320094325, + "learning_rate": 1.979042835741503e-05, + "loss": 0.809790849685669, + "step": 637 + }, + { + "epoch": 0.22484581497797357, + "grad_norm": 1.6737967848633384, + "learning_rate": 1.9789240124394638e-05, + "loss": 0.8347213268280029, + "step": 638 + }, + { + "epoch": 0.22519823788546256, + "grad_norm": 1.1935958187808327, + "learning_rate": 1.9788048568231875e-05, + "loss": 0.6620997190475464, + "step": 639 + }, + { + "epoch": 0.22555066079295155, + "grad_norm": 1.2898316066784317, + "learning_rate": 1.9786853689331235e-05, + "loss": 0.7727694511413574, + "step": 640 + }, + { + "epoch": 0.22590308370044052, + "grad_norm": 1.2854878709867101, + "learning_rate": 1.9785655488098348e-05, + "loss": 0.7433278560638428, + "step": 641 + }, + { + "epoch": 0.2262555066079295, + "grad_norm": 1.3523753090224933, + "learning_rate": 1.9784453964939966e-05, + "loss": 0.7375571727752686, + "step": 642 + }, + { + "epoch": 0.2266079295154185, + "grad_norm": 1.3285668366741343, + "learning_rate": 1.9783249120263962e-05, + "loss": 0.5838407874107361, + "step": 643 + }, + { + "epoch": 0.2269603524229075, + "grad_norm": 1.3906475095958148, + "learning_rate": 1.978204095447935e-05, + "loss": 0.7120088934898376, + "step": 644 + }, + { + "epoch": 0.22731277533039648, + "grad_norm": 1.5058740006044322, + "learning_rate": 1.9780829467996262e-05, + "loss": 0.7668102383613586, + "step": 645 + }, + { + "epoch": 0.22766519823788547, + "grad_norm": 1.2993959173766831, + "learning_rate": 1.977961466122596e-05, + "loss": 0.748942494392395, + "step": 646 + }, + { + "epoch": 0.22801762114537444, + "grad_norm": 1.476253609353715, + "learning_rate": 1.9778396534580836e-05, + "loss": 0.7569374442100525, + "step": 647 + }, + { + "epoch": 0.22837004405286343, + "grad_norm": 1.352884217242173, + "learning_rate": 1.97771750884744e-05, + "loss": 0.7981363534927368, + "step": 648 + }, + { + "epoch": 0.22872246696035242, + "grad_norm": 1.5069792289976334, + "learning_rate": 1.97759503233213e-05, + "loss": 0.7501301765441895, + "step": 649 + }, + { + "epoch": 0.2290748898678414, + "grad_norm": 1.4079968546467614, + "learning_rate": 1.9774722239537305e-05, + "loss": 0.7880003452301025, + "step": 650 + }, + { + "epoch": 0.2294273127753304, + "grad_norm": 1.3141024886679253, + "learning_rate": 1.977349083753931e-05, + "loss": 0.9007930755615234, + "step": 651 + }, + { + "epoch": 0.2297797356828194, + "grad_norm": 1.1634171776911992, + "learning_rate": 1.9772256117745335e-05, + "loss": 0.6291126012802124, + "step": 652 + }, + { + "epoch": 0.23013215859030836, + "grad_norm": 1.1487631323898542, + "learning_rate": 1.9771018080574534e-05, + "loss": 0.8155031204223633, + "step": 653 + }, + { + "epoch": 0.23048458149779735, + "grad_norm": 1.2941785819245946, + "learning_rate": 1.976977672644718e-05, + "loss": 0.7103240489959717, + "step": 654 + }, + { + "epoch": 0.23083700440528634, + "grad_norm": 1.4170836267106273, + "learning_rate": 1.9768532055784678e-05, + "loss": 0.8590278625488281, + "step": 655 + }, + { + "epoch": 0.23118942731277534, + "grad_norm": 1.6156852038452685, + "learning_rate": 1.9767284069009545e-05, + "loss": 0.7729001641273499, + "step": 656 + }, + { + "epoch": 0.23154185022026433, + "grad_norm": 1.543950265697803, + "learning_rate": 1.9766032766545445e-05, + "loss": 0.8287409543991089, + "step": 657 + }, + { + "epoch": 0.2318942731277533, + "grad_norm": 1.327581925526745, + "learning_rate": 1.9764778148817147e-05, + "loss": 0.8651477098464966, + "step": 658 + }, + { + "epoch": 0.23224669603524228, + "grad_norm": 1.3954780395501065, + "learning_rate": 1.976352021625056e-05, + "loss": 0.7582576274871826, + "step": 659 + }, + { + "epoch": 0.23259911894273128, + "grad_norm": 1.2510605377459358, + "learning_rate": 1.976225896927271e-05, + "loss": 0.6579675078392029, + "step": 660 + }, + { + "epoch": 0.23295154185022027, + "grad_norm": 1.4130234326235036, + "learning_rate": 1.9760994408311757e-05, + "loss": 0.8817700147628784, + "step": 661 + }, + { + "epoch": 0.23330396475770926, + "grad_norm": 1.3799441341137708, + "learning_rate": 1.9759726533796976e-05, + "loss": 0.7241606712341309, + "step": 662 + }, + { + "epoch": 0.23365638766519825, + "grad_norm": 1.2880787484904483, + "learning_rate": 1.9758455346158768e-05, + "loss": 0.7434183359146118, + "step": 663 + }, + { + "epoch": 0.23400881057268721, + "grad_norm": 1.3406860649308125, + "learning_rate": 1.9757180845828663e-05, + "loss": 0.632422685623169, + "step": 664 + }, + { + "epoch": 0.2343612334801762, + "grad_norm": 1.394213400542702, + "learning_rate": 1.9755903033239318e-05, + "loss": 0.7276040315628052, + "step": 665 + }, + { + "epoch": 0.2347136563876652, + "grad_norm": 1.4191729622512466, + "learning_rate": 1.975462190882451e-05, + "loss": 0.8070325255393982, + "step": 666 + }, + { + "epoch": 0.2350660792951542, + "grad_norm": 1.505939347053283, + "learning_rate": 1.9753337473019133e-05, + "loss": 0.867915689945221, + "step": 667 + }, + { + "epoch": 0.23541850220264318, + "grad_norm": 1.2080841146883634, + "learning_rate": 1.9752049726259223e-05, + "loss": 0.7905307412147522, + "step": 668 + }, + { + "epoch": 0.23577092511013215, + "grad_norm": 1.3166867899458456, + "learning_rate": 1.9750758668981925e-05, + "loss": 0.7721420526504517, + "step": 669 + }, + { + "epoch": 0.23612334801762114, + "grad_norm": 1.3746426458674128, + "learning_rate": 1.9749464301625515e-05, + "loss": 0.7926005125045776, + "step": 670 + }, + { + "epoch": 0.23647577092511013, + "grad_norm": 1.387001164209418, + "learning_rate": 1.974816662462939e-05, + "loss": 0.7651785612106323, + "step": 671 + }, + { + "epoch": 0.23682819383259912, + "grad_norm": 1.3285492717471519, + "learning_rate": 1.974686563843407e-05, + "loss": 0.7548795938491821, + "step": 672 + }, + { + "epoch": 0.2371806167400881, + "grad_norm": 1.256836928643264, + "learning_rate": 1.9745561343481197e-05, + "loss": 0.5405399799346924, + "step": 673 + }, + { + "epoch": 0.2375330396475771, + "grad_norm": 1.429166434081011, + "learning_rate": 1.9744253740213542e-05, + "loss": 0.7561137080192566, + "step": 674 + }, + { + "epoch": 0.23788546255506607, + "grad_norm": 1.2880562459402407, + "learning_rate": 1.9742942829074993e-05, + "loss": 0.8809534907341003, + "step": 675 + }, + { + "epoch": 0.23823788546255506, + "grad_norm": 1.4170174919214424, + "learning_rate": 1.974162861051057e-05, + "loss": 0.750350832939148, + "step": 676 + }, + { + "epoch": 0.23859030837004405, + "grad_norm": 1.629083058939835, + "learning_rate": 1.9740311084966398e-05, + "loss": 0.89476478099823, + "step": 677 + }, + { + "epoch": 0.23894273127753304, + "grad_norm": 1.2576348651951754, + "learning_rate": 1.9738990252889748e-05, + "loss": 0.8647176027297974, + "step": 678 + }, + { + "epoch": 0.23929515418502204, + "grad_norm": 1.4086313229573832, + "learning_rate": 1.9737666114728996e-05, + "loss": 0.7331727743148804, + "step": 679 + }, + { + "epoch": 0.239647577092511, + "grad_norm": 1.471872239566745, + "learning_rate": 1.9736338670933642e-05, + "loss": 0.7714364528656006, + "step": 680 + }, + { + "epoch": 0.24, + "grad_norm": 1.2246586432486557, + "learning_rate": 1.973500792195432e-05, + "loss": 0.7840908765792847, + "step": 681 + }, + { + "epoch": 0.24035242290748898, + "grad_norm": 1.5714154435783916, + "learning_rate": 1.9733673868242767e-05, + "loss": 0.8723878860473633, + "step": 682 + }, + { + "epoch": 0.24070484581497797, + "grad_norm": 1.3325473695906174, + "learning_rate": 1.9732336510251864e-05, + "loss": 0.782090425491333, + "step": 683 + }, + { + "epoch": 0.24105726872246697, + "grad_norm": 1.4114017797446734, + "learning_rate": 1.9730995848435594e-05, + "loss": 0.8000990152359009, + "step": 684 + }, + { + "epoch": 0.24140969162995596, + "grad_norm": 1.2098442989857856, + "learning_rate": 1.9729651883249075e-05, + "loss": 0.7499237060546875, + "step": 685 + }, + { + "epoch": 0.24176211453744492, + "grad_norm": 1.376086425817015, + "learning_rate": 1.972830461514854e-05, + "loss": 0.8786858916282654, + "step": 686 + }, + { + "epoch": 0.2421145374449339, + "grad_norm": 1.2058295584451697, + "learning_rate": 1.972695404459134e-05, + "loss": 0.7039557695388794, + "step": 687 + }, + { + "epoch": 0.2424669603524229, + "grad_norm": 1.2391412724176054, + "learning_rate": 1.9725600172035962e-05, + "loss": 0.6699448823928833, + "step": 688 + }, + { + "epoch": 0.2428193832599119, + "grad_norm": 1.4984585662906706, + "learning_rate": 1.9724242997941995e-05, + "loss": 0.6753977537155151, + "step": 689 + }, + { + "epoch": 0.2431718061674009, + "grad_norm": 1.465232022987203, + "learning_rate": 1.9722882522770163e-05, + "loss": 0.7139854431152344, + "step": 690 + }, + { + "epoch": 0.24352422907488988, + "grad_norm": 1.2814158831499989, + "learning_rate": 1.9721518746982296e-05, + "loss": 0.7894896864891052, + "step": 691 + }, + { + "epoch": 0.24387665198237884, + "grad_norm": 1.2615077213285395, + "learning_rate": 1.972015167104136e-05, + "loss": 0.5663755536079407, + "step": 692 + }, + { + "epoch": 0.24422907488986784, + "grad_norm": 2.058599574246893, + "learning_rate": 1.971878129541144e-05, + "loss": 0.8607856035232544, + "step": 693 + }, + { + "epoch": 0.24458149779735683, + "grad_norm": 1.351791839280567, + "learning_rate": 1.9717407620557724e-05, + "loss": 0.7384383678436279, + "step": 694 + }, + { + "epoch": 0.24493392070484582, + "grad_norm": 1.3580988060863546, + "learning_rate": 1.971603064694654e-05, + "loss": 0.6145502328872681, + "step": 695 + }, + { + "epoch": 0.2452863436123348, + "grad_norm": 1.216736398001555, + "learning_rate": 1.9714650375045328e-05, + "loss": 0.6758620738983154, + "step": 696 + }, + { + "epoch": 0.24563876651982378, + "grad_norm": 1.4471588548341505, + "learning_rate": 1.9713266805322643e-05, + "loss": 0.7416598200798035, + "step": 697 + }, + { + "epoch": 0.24599118942731277, + "grad_norm": 1.5476710427855191, + "learning_rate": 1.9711879938248163e-05, + "loss": 0.7603555917739868, + "step": 698 + }, + { + "epoch": 0.24634361233480176, + "grad_norm": 1.442293220466076, + "learning_rate": 1.9710489774292692e-05, + "loss": 0.9119949340820312, + "step": 699 + }, + { + "epoch": 0.24669603524229075, + "grad_norm": 1.3843099449438452, + "learning_rate": 1.9709096313928144e-05, + "loss": 0.6884537935256958, + "step": 700 + }, + { + "epoch": 0.24704845814977974, + "grad_norm": 1.618333940643818, + "learning_rate": 1.9707699557627554e-05, + "loss": 0.7928721904754639, + "step": 701 + }, + { + "epoch": 0.24740088105726873, + "grad_norm": 1.593414442103489, + "learning_rate": 1.970629950586508e-05, + "loss": 0.888218104839325, + "step": 702 + }, + { + "epoch": 0.2477533039647577, + "grad_norm": 1.484965940613647, + "learning_rate": 1.9704896159115997e-05, + "loss": 0.7949875593185425, + "step": 703 + }, + { + "epoch": 0.2481057268722467, + "grad_norm": 1.5094809465076762, + "learning_rate": 1.970348951785669e-05, + "loss": 0.9031823873519897, + "step": 704 + }, + { + "epoch": 0.24845814977973568, + "grad_norm": 1.4099687182713576, + "learning_rate": 1.9702079582564682e-05, + "loss": 0.636865496635437, + "step": 705 + }, + { + "epoch": 0.24881057268722467, + "grad_norm": 1.5392719282626255, + "learning_rate": 1.9700666353718593e-05, + "loss": 0.731717586517334, + "step": 706 + }, + { + "epoch": 0.24916299559471367, + "grad_norm": 1.5878589631749256, + "learning_rate": 1.9699249831798172e-05, + "loss": 0.7571220397949219, + "step": 707 + }, + { + "epoch": 0.24951541850220263, + "grad_norm": 1.5180992539956903, + "learning_rate": 1.969783001728429e-05, + "loss": 0.6112762689590454, + "step": 708 + }, + { + "epoch": 0.24986784140969162, + "grad_norm": 1.3651864060041954, + "learning_rate": 1.9696406910658918e-05, + "loss": 0.6737902164459229, + "step": 709 + }, + { + "epoch": 0.25022026431718064, + "grad_norm": 1.328645038543607, + "learning_rate": 1.9694980512405167e-05, + "loss": 0.6525848507881165, + "step": 710 + }, + { + "epoch": 0.2505726872246696, + "grad_norm": 1.302186292631501, + "learning_rate": 1.9693550823007248e-05, + "loss": 0.9107403755187988, + "step": 711 + }, + { + "epoch": 0.25092511013215857, + "grad_norm": 1.5423262639437814, + "learning_rate": 1.96921178429505e-05, + "loss": 0.7373934984207153, + "step": 712 + }, + { + "epoch": 0.25127753303964756, + "grad_norm": 1.4043304459804222, + "learning_rate": 1.9690681572721377e-05, + "loss": 0.6383399963378906, + "step": 713 + }, + { + "epoch": 0.25162995594713655, + "grad_norm": 1.3203935888344693, + "learning_rate": 1.9689242012807442e-05, + "loss": 0.6600236296653748, + "step": 714 + }, + { + "epoch": 0.25198237885462554, + "grad_norm": 1.6489156261044324, + "learning_rate": 1.9687799163697386e-05, + "loss": 0.9195891618728638, + "step": 715 + }, + { + "epoch": 0.25233480176211454, + "grad_norm": 1.300868905936819, + "learning_rate": 1.968635302588101e-05, + "loss": 0.7122433185577393, + "step": 716 + }, + { + "epoch": 0.2526872246696035, + "grad_norm": 1.467731789065586, + "learning_rate": 1.968490359984923e-05, + "loss": 0.7601606845855713, + "step": 717 + }, + { + "epoch": 0.2530396475770925, + "grad_norm": 1.2967441771844141, + "learning_rate": 1.9683450886094087e-05, + "loss": 0.8216352462768555, + "step": 718 + }, + { + "epoch": 0.2533920704845815, + "grad_norm": 1.4134852768930402, + "learning_rate": 1.9681994885108727e-05, + "loss": 0.8783165216445923, + "step": 719 + }, + { + "epoch": 0.2537444933920705, + "grad_norm": 1.5566095938184208, + "learning_rate": 1.9680535597387416e-05, + "loss": 0.7323269844055176, + "step": 720 + }, + { + "epoch": 0.2540969162995595, + "grad_norm": 1.6250423495927373, + "learning_rate": 1.9679073023425542e-05, + "loss": 0.93906170129776, + "step": 721 + }, + { + "epoch": 0.25444933920704843, + "grad_norm": 1.3857164700730882, + "learning_rate": 1.96776071637196e-05, + "loss": 0.774397611618042, + "step": 722 + }, + { + "epoch": 0.2548017621145374, + "grad_norm": 1.3653604324598565, + "learning_rate": 1.9676138018767204e-05, + "loss": 0.6634535789489746, + "step": 723 + }, + { + "epoch": 0.2551541850220264, + "grad_norm": 1.3364894441034205, + "learning_rate": 1.9674665589067082e-05, + "loss": 0.7705625295639038, + "step": 724 + }, + { + "epoch": 0.2555066079295154, + "grad_norm": 1.5708708799323368, + "learning_rate": 1.9673189875119082e-05, + "loss": 0.706364631652832, + "step": 725 + }, + { + "epoch": 0.2558590308370044, + "grad_norm": 1.2599963014034798, + "learning_rate": 1.9671710877424158e-05, + "loss": 0.7295894622802734, + "step": 726 + }, + { + "epoch": 0.2562114537444934, + "grad_norm": 1.6926806599843667, + "learning_rate": 1.9670228596484383e-05, + "loss": 0.8135089874267578, + "step": 727 + }, + { + "epoch": 0.2565638766519824, + "grad_norm": 1.5978181657651334, + "learning_rate": 1.966874303280295e-05, + "loss": 0.801734209060669, + "step": 728 + }, + { + "epoch": 0.2569162995594714, + "grad_norm": 1.728546952239603, + "learning_rate": 1.9667254186884164e-05, + "loss": 0.8405104875564575, + "step": 729 + }, + { + "epoch": 0.25726872246696036, + "grad_norm": 1.2523029350782668, + "learning_rate": 1.9665762059233434e-05, + "loss": 0.8320014476776123, + "step": 730 + }, + { + "epoch": 0.25762114537444936, + "grad_norm": 1.2667340666882572, + "learning_rate": 1.96642666503573e-05, + "loss": 0.8701308965682983, + "step": 731 + }, + { + "epoch": 0.25797356828193835, + "grad_norm": 1.1982399130470203, + "learning_rate": 1.9662767960763394e-05, + "loss": 0.7980693578720093, + "step": 732 + }, + { + "epoch": 0.25832599118942734, + "grad_norm": 1.3765503313855298, + "learning_rate": 1.9661265990960486e-05, + "loss": 0.7258214950561523, + "step": 733 + }, + { + "epoch": 0.2586784140969163, + "grad_norm": 1.1683887680739682, + "learning_rate": 1.9659760741458444e-05, + "loss": 0.6860172748565674, + "step": 734 + }, + { + "epoch": 0.25903083700440527, + "grad_norm": 1.4034749748766104, + "learning_rate": 1.9658252212768252e-05, + "loss": 0.7438071370124817, + "step": 735 + }, + { + "epoch": 0.25938325991189426, + "grad_norm": 1.6140837506314978, + "learning_rate": 1.9656740405402007e-05, + "loss": 0.8680309057235718, + "step": 736 + }, + { + "epoch": 0.25973568281938325, + "grad_norm": 1.5365221656010954, + "learning_rate": 1.9655225319872925e-05, + "loss": 0.933163046836853, + "step": 737 + }, + { + "epoch": 0.26008810572687224, + "grad_norm": 1.3636194628802456, + "learning_rate": 1.9653706956695333e-05, + "loss": 0.8746597170829773, + "step": 738 + }, + { + "epoch": 0.26044052863436123, + "grad_norm": 1.31799671460777, + "learning_rate": 1.965218531638466e-05, + "loss": 0.857211709022522, + "step": 739 + }, + { + "epoch": 0.2607929515418502, + "grad_norm": 1.313241643085953, + "learning_rate": 1.965066039945746e-05, + "loss": 0.7837733030319214, + "step": 740 + }, + { + "epoch": 0.2611453744493392, + "grad_norm": 1.3527479757495662, + "learning_rate": 1.9649132206431395e-05, + "loss": 0.8401491641998291, + "step": 741 + }, + { + "epoch": 0.2614977973568282, + "grad_norm": 1.14302378839197, + "learning_rate": 1.9647600737825235e-05, + "loss": 0.7070307731628418, + "step": 742 + }, + { + "epoch": 0.2618502202643172, + "grad_norm": 1.756317003631787, + "learning_rate": 1.9646065994158873e-05, + "loss": 0.7649509310722351, + "step": 743 + }, + { + "epoch": 0.2622026431718062, + "grad_norm": 1.5152987231460182, + "learning_rate": 1.9644527975953302e-05, + "loss": 0.7759182453155518, + "step": 744 + }, + { + "epoch": 0.26255506607929513, + "grad_norm": 1.5151017458848213, + "learning_rate": 1.9642986683730626e-05, + "loss": 0.8176295757293701, + "step": 745 + }, + { + "epoch": 0.2629074889867841, + "grad_norm": 1.2974538182792636, + "learning_rate": 1.9641442118014078e-05, + "loss": 0.8406162261962891, + "step": 746 + }, + { + "epoch": 0.2632599118942731, + "grad_norm": 1.3410871141615202, + "learning_rate": 1.9639894279327985e-05, + "loss": 0.8064795732498169, + "step": 747 + }, + { + "epoch": 0.2636123348017621, + "grad_norm": 1.2769637989850176, + "learning_rate": 1.9638343168197784e-05, + "loss": 0.6662956476211548, + "step": 748 + }, + { + "epoch": 0.2639647577092511, + "grad_norm": 1.5105008685571195, + "learning_rate": 1.9636788785150037e-05, + "loss": 0.8747783899307251, + "step": 749 + }, + { + "epoch": 0.2643171806167401, + "grad_norm": 1.4261291763421449, + "learning_rate": 1.9635231130712406e-05, + "loss": 0.7893349528312683, + "step": 750 + }, + { + "epoch": 0.2646696035242291, + "grad_norm": 1.2907133964100823, + "learning_rate": 1.9633670205413665e-05, + "loss": 0.7380903959274292, + "step": 751 + }, + { + "epoch": 0.26502202643171807, + "grad_norm": 1.5293000163357584, + "learning_rate": 1.96321060097837e-05, + "loss": 0.9164873957633972, + "step": 752 + }, + { + "epoch": 0.26537444933920706, + "grad_norm": 1.5448314355627197, + "learning_rate": 1.9630538544353505e-05, + "loss": 0.7664264440536499, + "step": 753 + }, + { + "epoch": 0.26572687224669606, + "grad_norm": 1.4037068281656377, + "learning_rate": 1.9628967809655187e-05, + "loss": 0.8117275238037109, + "step": 754 + }, + { + "epoch": 0.26607929515418505, + "grad_norm": 1.3044642797371147, + "learning_rate": 1.9627393806221967e-05, + "loss": 0.6203808784484863, + "step": 755 + }, + { + "epoch": 0.266431718061674, + "grad_norm": 1.5462507455011187, + "learning_rate": 1.9625816534588163e-05, + "loss": 0.8777878284454346, + "step": 756 + }, + { + "epoch": 0.266784140969163, + "grad_norm": 1.2883365910622429, + "learning_rate": 1.9624235995289212e-05, + "loss": 0.6984438300132751, + "step": 757 + }, + { + "epoch": 0.26713656387665197, + "grad_norm": 1.5746997664717406, + "learning_rate": 1.962265218886166e-05, + "loss": 0.7806228399276733, + "step": 758 + }, + { + "epoch": 0.26748898678414096, + "grad_norm": 1.318579751564355, + "learning_rate": 1.9621065115843155e-05, + "loss": 0.6924373507499695, + "step": 759 + }, + { + "epoch": 0.26784140969162995, + "grad_norm": 1.2867883287922122, + "learning_rate": 1.9619474776772462e-05, + "loss": 0.6809841394424438, + "step": 760 + }, + { + "epoch": 0.26819383259911894, + "grad_norm": 1.3766475304418688, + "learning_rate": 1.961788117218945e-05, + "loss": 0.8346723318099976, + "step": 761 + }, + { + "epoch": 0.26854625550660793, + "grad_norm": 1.3717126814625271, + "learning_rate": 1.96162843026351e-05, + "loss": 0.8000205755233765, + "step": 762 + }, + { + "epoch": 0.2688986784140969, + "grad_norm": 1.256040752163899, + "learning_rate": 1.9614684168651504e-05, + "loss": 0.8026692271232605, + "step": 763 + }, + { + "epoch": 0.2692511013215859, + "grad_norm": 1.4850412299335856, + "learning_rate": 1.961308077078185e-05, + "loss": 0.921292781829834, + "step": 764 + }, + { + "epoch": 0.2696035242290749, + "grad_norm": 1.6577133714061814, + "learning_rate": 1.9611474109570446e-05, + "loss": 0.8018487095832825, + "step": 765 + }, + { + "epoch": 0.2699559471365639, + "grad_norm": 1.2440268554728864, + "learning_rate": 1.9609864185562698e-05, + "loss": 0.7400588989257812, + "step": 766 + }, + { + "epoch": 0.27030837004405284, + "grad_norm": 1.2094824954459686, + "learning_rate": 1.960825099930513e-05, + "loss": 0.6243399977684021, + "step": 767 + }, + { + "epoch": 0.27066079295154183, + "grad_norm": 1.415024134390762, + "learning_rate": 1.9606634551345373e-05, + "loss": 0.7680903673171997, + "step": 768 + }, + { + "epoch": 0.2710132158590308, + "grad_norm": 1.3126349106428246, + "learning_rate": 1.960501484223215e-05, + "loss": 0.8783930540084839, + "step": 769 + }, + { + "epoch": 0.2713656387665198, + "grad_norm": 1.4964756858010921, + "learning_rate": 1.9603391872515308e-05, + "loss": 0.7910561561584473, + "step": 770 + }, + { + "epoch": 0.2717180616740088, + "grad_norm": 1.4400527227532898, + "learning_rate": 1.9601765642745795e-05, + "loss": 0.7325295209884644, + "step": 771 + }, + { + "epoch": 0.2720704845814978, + "grad_norm": 1.3018158119605838, + "learning_rate": 1.9600136153475666e-05, + "loss": 0.7017170190811157, + "step": 772 + }, + { + "epoch": 0.2724229074889868, + "grad_norm": 1.5395904311410002, + "learning_rate": 1.959850340525808e-05, + "loss": 0.9281908273696899, + "step": 773 + }, + { + "epoch": 0.2727753303964758, + "grad_norm": 1.256408104414643, + "learning_rate": 1.95968673986473e-05, + "loss": 0.7421029806137085, + "step": 774 + }, + { + "epoch": 0.27312775330396477, + "grad_norm": 1.3171523536350294, + "learning_rate": 1.9595228134198708e-05, + "loss": 0.7474848031997681, + "step": 775 + }, + { + "epoch": 0.27348017621145376, + "grad_norm": 1.3683438241049553, + "learning_rate": 1.9593585612468776e-05, + "loss": 0.7267760038375854, + "step": 776 + }, + { + "epoch": 0.27383259911894275, + "grad_norm": 1.4883233103137832, + "learning_rate": 1.9591939834015096e-05, + "loss": 0.739683985710144, + "step": 777 + }, + { + "epoch": 0.2741850220264317, + "grad_norm": 1.2437408403604437, + "learning_rate": 1.9590290799396353e-05, + "loss": 0.6615399122238159, + "step": 778 + }, + { + "epoch": 0.2745374449339207, + "grad_norm": 1.5863201035209105, + "learning_rate": 1.9588638509172343e-05, + "loss": 0.8045977354049683, + "step": 779 + }, + { + "epoch": 0.2748898678414097, + "grad_norm": 1.5522608295626732, + "learning_rate": 1.958698296390397e-05, + "loss": 0.8760169744491577, + "step": 780 + }, + { + "epoch": 0.27524229074889867, + "grad_norm": 1.5297761597873432, + "learning_rate": 1.9585324164153236e-05, + "loss": 0.6676662564277649, + "step": 781 + }, + { + "epoch": 0.27559471365638766, + "grad_norm": 1.1706549585314092, + "learning_rate": 1.958366211048326e-05, + "loss": 0.6650630235671997, + "step": 782 + }, + { + "epoch": 0.27594713656387665, + "grad_norm": 1.157826702613003, + "learning_rate": 1.9581996803458248e-05, + "loss": 0.7399466037750244, + "step": 783 + }, + { + "epoch": 0.27629955947136564, + "grad_norm": 1.316772401506473, + "learning_rate": 1.9580328243643528e-05, + "loss": 0.6121753454208374, + "step": 784 + }, + { + "epoch": 0.27665198237885463, + "grad_norm": 1.334268754223845, + "learning_rate": 1.9578656431605515e-05, + "loss": 0.8562870025634766, + "step": 785 + }, + { + "epoch": 0.2770044052863436, + "grad_norm": 1.441421130314338, + "learning_rate": 1.9576981367911746e-05, + "loss": 0.717842161655426, + "step": 786 + }, + { + "epoch": 0.2773568281938326, + "grad_norm": 1.582720247126145, + "learning_rate": 1.9575303053130847e-05, + "loss": 0.802294135093689, + "step": 787 + }, + { + "epoch": 0.2777092511013216, + "grad_norm": 1.2234390313515955, + "learning_rate": 1.957362148783256e-05, + "loss": 0.6636664867401123, + "step": 788 + }, + { + "epoch": 0.2780616740088106, + "grad_norm": 1.3850288575091645, + "learning_rate": 1.9571936672587718e-05, + "loss": 0.7177780866622925, + "step": 789 + }, + { + "epoch": 0.27841409691629954, + "grad_norm": 1.6181532263095582, + "learning_rate": 1.957024860796826e-05, + "loss": 0.8263623714447021, + "step": 790 + }, + { + "epoch": 0.27876651982378853, + "grad_norm": 1.4470113515398348, + "learning_rate": 1.9568557294547244e-05, + "loss": 0.7620534896850586, + "step": 791 + }, + { + "epoch": 0.2791189427312775, + "grad_norm": 1.5101791429302596, + "learning_rate": 1.956686273289881e-05, + "loss": 0.812814474105835, + "step": 792 + }, + { + "epoch": 0.2794713656387665, + "grad_norm": 1.2812553609430264, + "learning_rate": 1.956516492359821e-05, + "loss": 0.6494747400283813, + "step": 793 + }, + { + "epoch": 0.2798237885462555, + "grad_norm": 1.3413487769011592, + "learning_rate": 1.9563463867221793e-05, + "loss": 0.7152044773101807, + "step": 794 + }, + { + "epoch": 0.2801762114537445, + "grad_norm": 1.5718962936709213, + "learning_rate": 1.956175956434702e-05, + "loss": 0.7607219815254211, + "step": 795 + }, + { + "epoch": 0.2805286343612335, + "grad_norm": 1.4483911078118432, + "learning_rate": 1.9560052015552455e-05, + "loss": 0.8793845176696777, + "step": 796 + }, + { + "epoch": 0.2808810572687225, + "grad_norm": 1.509282266928049, + "learning_rate": 1.9558341221417744e-05, + "loss": 0.8314816951751709, + "step": 797 + }, + { + "epoch": 0.28123348017621147, + "grad_norm": 1.2634078342185056, + "learning_rate": 1.9556627182523656e-05, + "loss": 0.8195264339447021, + "step": 798 + }, + { + "epoch": 0.28158590308370046, + "grad_norm": 1.2808718319688324, + "learning_rate": 1.9554909899452055e-05, + "loss": 0.8079999685287476, + "step": 799 + }, + { + "epoch": 0.28193832599118945, + "grad_norm": 1.6867283155032318, + "learning_rate": 1.9553189372785903e-05, + "loss": 0.7614034414291382, + "step": 800 + }, + { + "epoch": 0.2822907488986784, + "grad_norm": 1.13179227916607, + "learning_rate": 1.9551465603109263e-05, + "loss": 0.6271458268165588, + "step": 801 + }, + { + "epoch": 0.2826431718061674, + "grad_norm": 1.5872328004173855, + "learning_rate": 1.9549738591007302e-05, + "loss": 0.8061915040016174, + "step": 802 + }, + { + "epoch": 0.2829955947136564, + "grad_norm": 1.3494758196376249, + "learning_rate": 1.9548008337066294e-05, + "loss": 0.663912296295166, + "step": 803 + }, + { + "epoch": 0.28334801762114536, + "grad_norm": 1.503624867364233, + "learning_rate": 1.9546274841873597e-05, + "loss": 0.7582170963287354, + "step": 804 + }, + { + "epoch": 0.28370044052863436, + "grad_norm": 1.3181032025931252, + "learning_rate": 1.9544538106017682e-05, + "loss": 0.7855465412139893, + "step": 805 + }, + { + "epoch": 0.28405286343612335, + "grad_norm": 1.4462567272335825, + "learning_rate": 1.9542798130088116e-05, + "loss": 0.6976481676101685, + "step": 806 + }, + { + "epoch": 0.28440528634361234, + "grad_norm": 1.8291330426153005, + "learning_rate": 1.954105491467557e-05, + "loss": 0.7678342461585999, + "step": 807 + }, + { + "epoch": 0.28475770925110133, + "grad_norm": 1.2407582020259869, + "learning_rate": 1.9539308460371812e-05, + "loss": 0.6238858699798584, + "step": 808 + }, + { + "epoch": 0.2851101321585903, + "grad_norm": 1.239785831064125, + "learning_rate": 1.95375587677697e-05, + "loss": 0.7756681442260742, + "step": 809 + }, + { + "epoch": 0.2854625550660793, + "grad_norm": 1.462836813794646, + "learning_rate": 1.953580583746321e-05, + "loss": 0.8908202648162842, + "step": 810 + }, + { + "epoch": 0.2858149779735683, + "grad_norm": 1.1795831445984086, + "learning_rate": 1.9534049670047402e-05, + "loss": 0.6769838929176331, + "step": 811 + }, + { + "epoch": 0.28616740088105724, + "grad_norm": 1.2674126564024601, + "learning_rate": 1.953229026611844e-05, + "loss": 0.8452527523040771, + "step": 812 + }, + { + "epoch": 0.28651982378854624, + "grad_norm": 1.1830287106246784, + "learning_rate": 1.9530527626273592e-05, + "loss": 0.7494348287582397, + "step": 813 + }, + { + "epoch": 0.2868722466960352, + "grad_norm": 1.399665317775642, + "learning_rate": 1.9528761751111215e-05, + "loss": 0.7691028714179993, + "step": 814 + }, + { + "epoch": 0.2872246696035242, + "grad_norm": 1.2077153417445161, + "learning_rate": 1.9526992641230768e-05, + "loss": 0.6854703426361084, + "step": 815 + }, + { + "epoch": 0.2875770925110132, + "grad_norm": 1.4832887577463363, + "learning_rate": 1.9525220297232815e-05, + "loss": 0.7520424127578735, + "step": 816 + }, + { + "epoch": 0.2879295154185022, + "grad_norm": 1.499896401145914, + "learning_rate": 1.9523444719719003e-05, + "loss": 0.7894444465637207, + "step": 817 + }, + { + "epoch": 0.2882819383259912, + "grad_norm": 1.4246285223246848, + "learning_rate": 1.952166590929209e-05, + "loss": 0.7835032939910889, + "step": 818 + }, + { + "epoch": 0.2886343612334802, + "grad_norm": 1.4284322886298129, + "learning_rate": 1.9519883866555928e-05, + "loss": 0.7932062149047852, + "step": 819 + }, + { + "epoch": 0.2889867841409692, + "grad_norm": 1.3689283839888975, + "learning_rate": 1.951809859211546e-05, + "loss": 0.7917006015777588, + "step": 820 + }, + { + "epoch": 0.28933920704845817, + "grad_norm": 1.1579342690806749, + "learning_rate": 1.9516310086576734e-05, + "loss": 0.5330606698989868, + "step": 821 + }, + { + "epoch": 0.28969162995594716, + "grad_norm": 1.3284680872988386, + "learning_rate": 1.9514518350546893e-05, + "loss": 0.7243788242340088, + "step": 822 + }, + { + "epoch": 0.2900440528634361, + "grad_norm": 1.5494348453743318, + "learning_rate": 1.9512723384634175e-05, + "loss": 0.7692278623580933, + "step": 823 + }, + { + "epoch": 0.2903964757709251, + "grad_norm": 1.4784711521599085, + "learning_rate": 1.9510925189447916e-05, + "loss": 0.7537804841995239, + "step": 824 + }, + { + "epoch": 0.2907488986784141, + "grad_norm": 1.6724318756605505, + "learning_rate": 1.9509123765598545e-05, + "loss": 0.9168751239776611, + "step": 825 + }, + { + "epoch": 0.2911013215859031, + "grad_norm": 1.3269710873120673, + "learning_rate": 1.9507319113697592e-05, + "loss": 0.7863682508468628, + "step": 826 + }, + { + "epoch": 0.29145374449339206, + "grad_norm": 1.3629585622585614, + "learning_rate": 1.9505511234357677e-05, + "loss": 0.7119239568710327, + "step": 827 + }, + { + "epoch": 0.29180616740088106, + "grad_norm": 1.3362093903256012, + "learning_rate": 1.950370012819252e-05, + "loss": 0.6071019172668457, + "step": 828 + }, + { + "epoch": 0.29215859030837005, + "grad_norm": 1.479432309492857, + "learning_rate": 1.9501885795816937e-05, + "loss": 0.9750580787658691, + "step": 829 + }, + { + "epoch": 0.29251101321585904, + "grad_norm": 1.3797663030734688, + "learning_rate": 1.9500068237846837e-05, + "loss": 0.7465370297431946, + "step": 830 + }, + { + "epoch": 0.29286343612334803, + "grad_norm": 1.3385246660479724, + "learning_rate": 1.949824745489922e-05, + "loss": 0.7821183204650879, + "step": 831 + }, + { + "epoch": 0.293215859030837, + "grad_norm": 1.4671979426722186, + "learning_rate": 1.949642344759219e-05, + "loss": 0.7555009126663208, + "step": 832 + }, + { + "epoch": 0.293568281938326, + "grad_norm": 1.4661056896012497, + "learning_rate": 1.9494596216544942e-05, + "loss": 0.841058075428009, + "step": 833 + }, + { + "epoch": 0.29392070484581495, + "grad_norm": 1.4108089015351581, + "learning_rate": 1.9492765762377762e-05, + "loss": 0.737910270690918, + "step": 834 + }, + { + "epoch": 0.29427312775330394, + "grad_norm": 1.430769526790491, + "learning_rate": 1.9490932085712027e-05, + "loss": 0.6817367076873779, + "step": 835 + }, + { + "epoch": 0.29462555066079293, + "grad_norm": 1.4513808156166068, + "learning_rate": 1.9489095187170218e-05, + "loss": 0.6739218235015869, + "step": 836 + }, + { + "epoch": 0.2949779735682819, + "grad_norm": 1.3318980758968664, + "learning_rate": 1.9487255067375907e-05, + "loss": 0.8632504940032959, + "step": 837 + }, + { + "epoch": 0.2953303964757709, + "grad_norm": 1.4421404890889282, + "learning_rate": 1.9485411726953753e-05, + "loss": 0.6615850925445557, + "step": 838 + }, + { + "epoch": 0.2956828193832599, + "grad_norm": 1.5159755088266125, + "learning_rate": 1.9483565166529515e-05, + "loss": 0.8647087812423706, + "step": 839 + }, + { + "epoch": 0.2960352422907489, + "grad_norm": 1.0418453596772383, + "learning_rate": 1.9481715386730044e-05, + "loss": 0.5152087807655334, + "step": 840 + }, + { + "epoch": 0.2963876651982379, + "grad_norm": 1.265898832717726, + "learning_rate": 1.9479862388183283e-05, + "loss": 0.7942806482315063, + "step": 841 + }, + { + "epoch": 0.2967400881057269, + "grad_norm": 1.4728087242398047, + "learning_rate": 1.947800617151826e-05, + "loss": 0.6364283561706543, + "step": 842 + }, + { + "epoch": 0.2970925110132159, + "grad_norm": 1.322764194763318, + "learning_rate": 1.9476146737365112e-05, + "loss": 0.8278179168701172, + "step": 843 + }, + { + "epoch": 0.29744493392070487, + "grad_norm": 1.4629329991948483, + "learning_rate": 1.9474284086355057e-05, + "loss": 0.7369956970214844, + "step": 844 + }, + { + "epoch": 0.29779735682819386, + "grad_norm": 1.314820640789224, + "learning_rate": 1.9472418219120403e-05, + "loss": 0.6879928112030029, + "step": 845 + }, + { + "epoch": 0.2981497797356828, + "grad_norm": 1.357319714737328, + "learning_rate": 1.9470549136294554e-05, + "loss": 0.8312973976135254, + "step": 846 + }, + { + "epoch": 0.2985022026431718, + "grad_norm": 1.3920317025034097, + "learning_rate": 1.946867683851201e-05, + "loss": 0.8102964162826538, + "step": 847 + }, + { + "epoch": 0.2988546255506608, + "grad_norm": 1.502648854525568, + "learning_rate": 1.9466801326408355e-05, + "loss": 0.6136792898178101, + "step": 848 + }, + { + "epoch": 0.29920704845814977, + "grad_norm": 1.4028028409959845, + "learning_rate": 1.946492260062027e-05, + "loss": 0.6388760805130005, + "step": 849 + }, + { + "epoch": 0.29955947136563876, + "grad_norm": 1.2376129930975477, + "learning_rate": 1.9463040661785516e-05, + "loss": 0.6443628072738647, + "step": 850 + }, + { + "epoch": 0.29991189427312775, + "grad_norm": 1.4811436209967876, + "learning_rate": 1.9461155510542962e-05, + "loss": 0.7763667702674866, + "step": 851 + }, + { + "epoch": 0.30026431718061675, + "grad_norm": 1.3770267697185403, + "learning_rate": 1.9459267147532555e-05, + "loss": 0.8040921688079834, + "step": 852 + }, + { + "epoch": 0.30061674008810574, + "grad_norm": 1.5848801035694915, + "learning_rate": 1.9457375573395334e-05, + "loss": 0.6271079778671265, + "step": 853 + }, + { + "epoch": 0.30096916299559473, + "grad_norm": 1.4490523944014555, + "learning_rate": 1.945548078877343e-05, + "loss": 0.6970022916793823, + "step": 854 + }, + { + "epoch": 0.3013215859030837, + "grad_norm": 1.5989299247861681, + "learning_rate": 1.9453582794310063e-05, + "loss": 0.8283002972602844, + "step": 855 + }, + { + "epoch": 0.3016740088105727, + "grad_norm": 1.3183590658260465, + "learning_rate": 1.9451681590649545e-05, + "loss": 0.7989551424980164, + "step": 856 + }, + { + "epoch": 0.30202643171806165, + "grad_norm": 1.6791390781024222, + "learning_rate": 1.9449777178437274e-05, + "loss": 0.7000687122344971, + "step": 857 + }, + { + "epoch": 0.30237885462555064, + "grad_norm": 1.8022925697135672, + "learning_rate": 1.944786955831974e-05, + "loss": 0.8005126714706421, + "step": 858 + }, + { + "epoch": 0.30273127753303963, + "grad_norm": 1.4999207234472591, + "learning_rate": 1.9445958730944515e-05, + "loss": 0.7060712575912476, + "step": 859 + }, + { + "epoch": 0.3030837004405286, + "grad_norm": 1.4072429406012825, + "learning_rate": 1.9444044696960277e-05, + "loss": 0.6979726552963257, + "step": 860 + }, + { + "epoch": 0.3034361233480176, + "grad_norm": 1.4515999764557612, + "learning_rate": 1.9442127457016768e-05, + "loss": 0.7916465401649475, + "step": 861 + }, + { + "epoch": 0.3037885462555066, + "grad_norm": 1.1660322947657744, + "learning_rate": 1.944020701176484e-05, + "loss": 0.6980502009391785, + "step": 862 + }, + { + "epoch": 0.3041409691629956, + "grad_norm": 1.7481448087127538, + "learning_rate": 1.943828336185642e-05, + "loss": 0.8479218482971191, + "step": 863 + }, + { + "epoch": 0.3044933920704846, + "grad_norm": 1.6916771358958562, + "learning_rate": 1.9436356507944532e-05, + "loss": 0.8374297022819519, + "step": 864 + }, + { + "epoch": 0.3048458149779736, + "grad_norm": 1.3059238044039985, + "learning_rate": 1.943442645068328e-05, + "loss": 0.6871248483657837, + "step": 865 + }, + { + "epoch": 0.3051982378854626, + "grad_norm": 1.4668202087885096, + "learning_rate": 1.9432493190727854e-05, + "loss": 0.92267906665802, + "step": 866 + }, + { + "epoch": 0.30555066079295157, + "grad_norm": 1.7147503992363287, + "learning_rate": 1.9430556728734543e-05, + "loss": 0.7068654298782349, + "step": 867 + }, + { + "epoch": 0.3059030837004405, + "grad_norm": 1.354783765213683, + "learning_rate": 1.942861706536071e-05, + "loss": 0.830272912979126, + "step": 868 + }, + { + "epoch": 0.3062555066079295, + "grad_norm": 1.5223972366721212, + "learning_rate": 1.9426674201264814e-05, + "loss": 0.7996113300323486, + "step": 869 + }, + { + "epoch": 0.3066079295154185, + "grad_norm": 1.7576483396811688, + "learning_rate": 1.9424728137106398e-05, + "loss": 0.7519441843032837, + "step": 870 + }, + { + "epoch": 0.3069603524229075, + "grad_norm": 1.92300313533063, + "learning_rate": 1.9422778873546084e-05, + "loss": 0.5812790393829346, + "step": 871 + }, + { + "epoch": 0.30731277533039647, + "grad_norm": 1.058437605318741, + "learning_rate": 1.9420826411245595e-05, + "loss": 0.5953323841094971, + "step": 872 + }, + { + "epoch": 0.30766519823788546, + "grad_norm": 1.3954802825469448, + "learning_rate": 1.941887075086772e-05, + "loss": 0.8307937979698181, + "step": 873 + }, + { + "epoch": 0.30801762114537445, + "grad_norm": 1.5122839417773277, + "learning_rate": 1.9416911893076358e-05, + "loss": 0.7753443121910095, + "step": 874 + }, + { + "epoch": 0.30837004405286345, + "grad_norm": 1.3848386830658772, + "learning_rate": 1.9414949838536468e-05, + "loss": 0.8803520798683167, + "step": 875 + }, + { + "epoch": 0.30872246696035244, + "grad_norm": 1.3111930018969615, + "learning_rate": 1.9412984587914115e-05, + "loss": 0.6811587810516357, + "step": 876 + }, + { + "epoch": 0.30907488986784143, + "grad_norm": 1.3880932208512609, + "learning_rate": 1.9411016141876438e-05, + "loss": 0.802099347114563, + "step": 877 + }, + { + "epoch": 0.3094273127753304, + "grad_norm": 1.560285458084049, + "learning_rate": 1.940904450109166e-05, + "loss": 0.7325229644775391, + "step": 878 + }, + { + "epoch": 0.30977973568281936, + "grad_norm": 1.5126812875374416, + "learning_rate": 1.9407069666229097e-05, + "loss": 0.6515973210334778, + "step": 879 + }, + { + "epoch": 0.31013215859030835, + "grad_norm": 1.2990709527675965, + "learning_rate": 1.9405091637959138e-05, + "loss": 0.7314589619636536, + "step": 880 + }, + { + "epoch": 0.31048458149779734, + "grad_norm": 1.2146229290292494, + "learning_rate": 1.9403110416953267e-05, + "loss": 0.6668078303337097, + "step": 881 + }, + { + "epoch": 0.31083700440528633, + "grad_norm": 1.4214853985415763, + "learning_rate": 1.9401126003884047e-05, + "loss": 0.693236231803894, + "step": 882 + }, + { + "epoch": 0.3111894273127753, + "grad_norm": 2.210010730425174, + "learning_rate": 1.939913839942512e-05, + "loss": 0.8242754936218262, + "step": 883 + }, + { + "epoch": 0.3115418502202643, + "grad_norm": 1.4121001226290237, + "learning_rate": 1.939714760425122e-05, + "loss": 0.7776592373847961, + "step": 884 + }, + { + "epoch": 0.3118942731277533, + "grad_norm": 1.6297557283357365, + "learning_rate": 1.9395153619038158e-05, + "loss": 0.7023555636405945, + "step": 885 + }, + { + "epoch": 0.3122466960352423, + "grad_norm": 1.333512905730993, + "learning_rate": 1.939315644446283e-05, + "loss": 0.690382182598114, + "step": 886 + }, + { + "epoch": 0.3125991189427313, + "grad_norm": 1.4632969046362096, + "learning_rate": 1.9391156081203214e-05, + "loss": 0.7590082287788391, + "step": 887 + }, + { + "epoch": 0.3129515418502203, + "grad_norm": 1.3672878296080273, + "learning_rate": 1.9389152529938377e-05, + "loss": 0.7378168702125549, + "step": 888 + }, + { + "epoch": 0.3133039647577093, + "grad_norm": 1.3616414763479574, + "learning_rate": 1.938714579134845e-05, + "loss": 0.7036890983581543, + "step": 889 + }, + { + "epoch": 0.3136563876651982, + "grad_norm": 1.4808362954559244, + "learning_rate": 1.938513586611467e-05, + "loss": 0.8881829977035522, + "step": 890 + }, + { + "epoch": 0.3140088105726872, + "grad_norm": 1.5370313355999317, + "learning_rate": 1.9383122754919342e-05, + "loss": 0.7467600107192993, + "step": 891 + }, + { + "epoch": 0.3143612334801762, + "grad_norm": 1.6168044285318155, + "learning_rate": 1.938110645844585e-05, + "loss": 0.9358077049255371, + "step": 892 + }, + { + "epoch": 0.3147136563876652, + "grad_norm": 1.3982581442164577, + "learning_rate": 1.9379086977378664e-05, + "loss": 0.7751256227493286, + "step": 893 + }, + { + "epoch": 0.3150660792951542, + "grad_norm": 1.3071717433837386, + "learning_rate": 1.9377064312403338e-05, + "loss": 0.8020666837692261, + "step": 894 + }, + { + "epoch": 0.31541850220264317, + "grad_norm": 1.2076526617304193, + "learning_rate": 1.9375038464206507e-05, + "loss": 0.7251513004302979, + "step": 895 + }, + { + "epoch": 0.31577092511013216, + "grad_norm": 1.3323484110232422, + "learning_rate": 1.9373009433475874e-05, + "loss": 0.7163990139961243, + "step": 896 + }, + { + "epoch": 0.31612334801762115, + "grad_norm": 1.7368098259899396, + "learning_rate": 1.937097722090024e-05, + "loss": 0.7208842039108276, + "step": 897 + }, + { + "epoch": 0.31647577092511014, + "grad_norm": 1.3890083085574685, + "learning_rate": 1.9368941827169475e-05, + "loss": 0.7660849690437317, + "step": 898 + }, + { + "epoch": 0.31682819383259914, + "grad_norm": 1.4598849060474621, + "learning_rate": 1.9366903252974532e-05, + "loss": 0.7017598152160645, + "step": 899 + }, + { + "epoch": 0.31718061674008813, + "grad_norm": 1.1578269588811556, + "learning_rate": 1.9364861499007443e-05, + "loss": 0.6831692457199097, + "step": 900 + }, + { + "epoch": 0.3175330396475771, + "grad_norm": 1.5110843884258551, + "learning_rate": 1.936281656596132e-05, + "loss": 0.6555520296096802, + "step": 901 + }, + { + "epoch": 0.31788546255506606, + "grad_norm": 1.5455350998398028, + "learning_rate": 1.9360768454530356e-05, + "loss": 0.7401334047317505, + "step": 902 + }, + { + "epoch": 0.31823788546255505, + "grad_norm": 1.445337217541868, + "learning_rate": 1.935871716540982e-05, + "loss": 0.7415893077850342, + "step": 903 + }, + { + "epoch": 0.31859030837004404, + "grad_norm": 1.280838808592201, + "learning_rate": 1.935666269929606e-05, + "loss": 0.8254752159118652, + "step": 904 + }, + { + "epoch": 0.31894273127753303, + "grad_norm": 1.4164353369528349, + "learning_rate": 1.9354605056886505e-05, + "loss": 0.708149254322052, + "step": 905 + }, + { + "epoch": 0.319295154185022, + "grad_norm": 5.868993531178127, + "learning_rate": 1.9352544238879654e-05, + "loss": 0.8084006905555725, + "step": 906 + }, + { + "epoch": 0.319647577092511, + "grad_norm": 1.264327413823813, + "learning_rate": 1.93504802459751e-05, + "loss": 0.8039542436599731, + "step": 907 + }, + { + "epoch": 0.32, + "grad_norm": 1.3552380315038073, + "learning_rate": 1.93484130788735e-05, + "loss": 0.7563241720199585, + "step": 908 + }, + { + "epoch": 0.320352422907489, + "grad_norm": 1.4802258000623036, + "learning_rate": 1.9346342738276593e-05, + "loss": 0.7972971200942993, + "step": 909 + }, + { + "epoch": 0.320704845814978, + "grad_norm": 1.2978401429696003, + "learning_rate": 1.93442692248872e-05, + "loss": 0.6693121790885925, + "step": 910 + }, + { + "epoch": 0.321057268722467, + "grad_norm": 1.567978048588056, + "learning_rate": 1.9342192539409203e-05, + "loss": 0.6597858667373657, + "step": 911 + }, + { + "epoch": 0.321409691629956, + "grad_norm": 1.368700143265877, + "learning_rate": 1.934011268254758e-05, + "loss": 0.6771499514579773, + "step": 912 + }, + { + "epoch": 0.3217621145374449, + "grad_norm": 1.2365987861589656, + "learning_rate": 1.9338029655008375e-05, + "loss": 0.6903397440910339, + "step": 913 + }, + { + "epoch": 0.3221145374449339, + "grad_norm": 1.1408319382533163, + "learning_rate": 1.9335943457498717e-05, + "loss": 0.6287999153137207, + "step": 914 + }, + { + "epoch": 0.3224669603524229, + "grad_norm": 1.6382789883498257, + "learning_rate": 1.93338540907268e-05, + "loss": 0.7199264764785767, + "step": 915 + }, + { + "epoch": 0.3228193832599119, + "grad_norm": 1.3951711845041654, + "learning_rate": 1.9331761555401896e-05, + "loss": 0.6960160732269287, + "step": 916 + }, + { + "epoch": 0.3231718061674009, + "grad_norm": 1.4692108732272398, + "learning_rate": 1.932966585223436e-05, + "loss": 0.8981958627700806, + "step": 917 + }, + { + "epoch": 0.32352422907488987, + "grad_norm": 1.5685980092664367, + "learning_rate": 1.932756698193562e-05, + "loss": 0.786432147026062, + "step": 918 + }, + { + "epoch": 0.32387665198237886, + "grad_norm": 1.5208274085752962, + "learning_rate": 1.9325464945218172e-05, + "loss": 0.7260904312133789, + "step": 919 + }, + { + "epoch": 0.32422907488986785, + "grad_norm": 1.5076861367086136, + "learning_rate": 1.9323359742795595e-05, + "loss": 0.715835452079773, + "step": 920 + }, + { + "epoch": 0.32458149779735684, + "grad_norm": 1.5022880591009429, + "learning_rate": 1.932125137538254e-05, + "loss": 0.6312157511711121, + "step": 921 + }, + { + "epoch": 0.32493392070484584, + "grad_norm": 1.3825485581433186, + "learning_rate": 1.931913984369473e-05, + "loss": 0.7565821409225464, + "step": 922 + }, + { + "epoch": 0.3252863436123348, + "grad_norm": 1.3787375139479208, + "learning_rate": 1.931702514844896e-05, + "loss": 0.6866531372070312, + "step": 923 + }, + { + "epoch": 0.32563876651982376, + "grad_norm": 2.06933203374066, + "learning_rate": 1.9314907290363117e-05, + "loss": 0.879021167755127, + "step": 924 + }, + { + "epoch": 0.32599118942731276, + "grad_norm": 1.4876230584538193, + "learning_rate": 1.9312786270156135e-05, + "loss": 0.6972150802612305, + "step": 925 + }, + { + "epoch": 0.32634361233480175, + "grad_norm": 1.5939077112190465, + "learning_rate": 1.9310662088548042e-05, + "loss": 0.8735189437866211, + "step": 926 + }, + { + "epoch": 0.32669603524229074, + "grad_norm": 1.4550040646675775, + "learning_rate": 1.930853474625993e-05, + "loss": 0.6114254593849182, + "step": 927 + }, + { + "epoch": 0.32704845814977973, + "grad_norm": 1.5850836788259668, + "learning_rate": 1.930640424401396e-05, + "loss": 0.8032322525978088, + "step": 928 + }, + { + "epoch": 0.3274008810572687, + "grad_norm": 1.3666090686487828, + "learning_rate": 1.9304270582533376e-05, + "loss": 0.7391160726547241, + "step": 929 + }, + { + "epoch": 0.3277533039647577, + "grad_norm": 1.3744182830455962, + "learning_rate": 1.930213376254249e-05, + "loss": 0.7055366039276123, + "step": 930 + }, + { + "epoch": 0.3281057268722467, + "grad_norm": 1.3717314993069374, + "learning_rate": 1.9299993784766684e-05, + "loss": 0.671670138835907, + "step": 931 + }, + { + "epoch": 0.3284581497797357, + "grad_norm": 1.4961694507376992, + "learning_rate": 1.9297850649932416e-05, + "loss": 0.7486976385116577, + "step": 932 + }, + { + "epoch": 0.3288105726872247, + "grad_norm": 1.3777653583239398, + "learning_rate": 1.929570435876721e-05, + "loss": 0.8767625093460083, + "step": 933 + }, + { + "epoch": 0.3291629955947137, + "grad_norm": 1.5767252427705674, + "learning_rate": 1.929355491199967e-05, + "loss": 0.6841862797737122, + "step": 934 + }, + { + "epoch": 0.3295154185022026, + "grad_norm": 1.4985001262879563, + "learning_rate": 1.929140231035946e-05, + "loss": 0.7745054960250854, + "step": 935 + }, + { + "epoch": 0.3298678414096916, + "grad_norm": 1.4538548583561628, + "learning_rate": 1.928924655457733e-05, + "loss": 0.5879434943199158, + "step": 936 + }, + { + "epoch": 0.3302202643171806, + "grad_norm": 1.4292680321712006, + "learning_rate": 1.9287087645385084e-05, + "loss": 0.8484170436859131, + "step": 937 + }, + { + "epoch": 0.3305726872246696, + "grad_norm": 1.3383126778675687, + "learning_rate": 1.9284925583515604e-05, + "loss": 0.6518877148628235, + "step": 938 + }, + { + "epoch": 0.3309251101321586, + "grad_norm": 1.3496744406534642, + "learning_rate": 1.928276036970285e-05, + "loss": 0.7694787383079529, + "step": 939 + }, + { + "epoch": 0.3312775330396476, + "grad_norm": 1.475669634065235, + "learning_rate": 1.928059200468184e-05, + "loss": 0.6893239617347717, + "step": 940 + }, + { + "epoch": 0.33162995594713657, + "grad_norm": 1.9386710613485005, + "learning_rate": 1.927842048918867e-05, + "loss": 0.7731181383132935, + "step": 941 + }, + { + "epoch": 0.33198237885462556, + "grad_norm": 1.2730945433300995, + "learning_rate": 1.9276245823960495e-05, + "loss": 0.652579665184021, + "step": 942 + }, + { + "epoch": 0.33233480176211455, + "grad_norm": 1.4590802585162193, + "learning_rate": 1.927406800973555e-05, + "loss": 0.7504575252532959, + "step": 943 + }, + { + "epoch": 0.33268722466960354, + "grad_norm": 1.2636242756085148, + "learning_rate": 1.927188704725314e-05, + "loss": 0.6199444532394409, + "step": 944 + }, + { + "epoch": 0.33303964757709253, + "grad_norm": 1.3381297141173314, + "learning_rate": 1.9269702937253623e-05, + "loss": 0.7452073693275452, + "step": 945 + }, + { + "epoch": 0.33339207048458147, + "grad_norm": 1.6220831494484687, + "learning_rate": 1.926751568047845e-05, + "loss": 0.7538012266159058, + "step": 946 + }, + { + "epoch": 0.33374449339207046, + "grad_norm": 1.298282312930767, + "learning_rate": 1.9265325277670114e-05, + "loss": 0.6670408248901367, + "step": 947 + }, + { + "epoch": 0.33409691629955945, + "grad_norm": 1.3861711594873305, + "learning_rate": 1.926313172957219e-05, + "loss": 0.8060495853424072, + "step": 948 + }, + { + "epoch": 0.33444933920704845, + "grad_norm": 1.497135036962013, + "learning_rate": 1.926093503692933e-05, + "loss": 0.7494044303894043, + "step": 949 + }, + { + "epoch": 0.33480176211453744, + "grad_norm": 1.4954420855155135, + "learning_rate": 1.9258735200487235e-05, + "loss": 0.5751914978027344, + "step": 950 + }, + { + "epoch": 0.33515418502202643, + "grad_norm": 1.3135496972020755, + "learning_rate": 1.9256532220992683e-05, + "loss": 0.7234281301498413, + "step": 951 + }, + { + "epoch": 0.3355066079295154, + "grad_norm": 1.648299384166419, + "learning_rate": 1.9254326099193515e-05, + "loss": 0.7721251249313354, + "step": 952 + }, + { + "epoch": 0.3358590308370044, + "grad_norm": 1.5273494870998061, + "learning_rate": 1.925211683583864e-05, + "loss": 0.7240835428237915, + "step": 953 + }, + { + "epoch": 0.3362114537444934, + "grad_norm": 1.5101195617398009, + "learning_rate": 1.9249904431678037e-05, + "loss": 0.6622776985168457, + "step": 954 + }, + { + "epoch": 0.3365638766519824, + "grad_norm": 1.7484785330432984, + "learning_rate": 1.9247688887462747e-05, + "loss": 0.9682766199111938, + "step": 955 + }, + { + "epoch": 0.3369162995594714, + "grad_norm": 1.5743447413941896, + "learning_rate": 1.9245470203944878e-05, + "loss": 0.8363134860992432, + "step": 956 + }, + { + "epoch": 0.3372687224669604, + "grad_norm": 1.4500608043156524, + "learning_rate": 1.9243248381877605e-05, + "loss": 0.6530857086181641, + "step": 957 + }, + { + "epoch": 0.3376211453744493, + "grad_norm": 1.2035108561422267, + "learning_rate": 1.924102342201517e-05, + "loss": 0.5186585187911987, + "step": 958 + }, + { + "epoch": 0.3379735682819383, + "grad_norm": 1.3827408215949344, + "learning_rate": 1.9238795325112867e-05, + "loss": 0.6729516983032227, + "step": 959 + }, + { + "epoch": 0.3383259911894273, + "grad_norm": 4.587971824519282, + "learning_rate": 1.9236564091927083e-05, + "loss": 0.6991842985153198, + "step": 960 + }, + { + "epoch": 0.3386784140969163, + "grad_norm": 1.516889979226708, + "learning_rate": 1.9234329723215235e-05, + "loss": 0.7738245725631714, + "step": 961 + }, + { + "epoch": 0.3390308370044053, + "grad_norm": 1.4574207335379696, + "learning_rate": 1.923209221973583e-05, + "loss": 0.7027466893196106, + "step": 962 + }, + { + "epoch": 0.3393832599118943, + "grad_norm": 1.401098486802875, + "learning_rate": 1.922985158224843e-05, + "loss": 0.7868508696556091, + "step": 963 + }, + { + "epoch": 0.33973568281938327, + "grad_norm": 1.3325223534105368, + "learning_rate": 1.9227607811513662e-05, + "loss": 0.7499512434005737, + "step": 964 + }, + { + "epoch": 0.34008810572687226, + "grad_norm": 1.3198116129339372, + "learning_rate": 1.9225360908293217e-05, + "loss": 0.6662228107452393, + "step": 965 + }, + { + "epoch": 0.34044052863436125, + "grad_norm": 1.4854956624988247, + "learning_rate": 1.9223110873349847e-05, + "loss": 0.8570939302444458, + "step": 966 + }, + { + "epoch": 0.34079295154185024, + "grad_norm": 1.3385040645698225, + "learning_rate": 1.9220857707447372e-05, + "loss": 0.7497669458389282, + "step": 967 + }, + { + "epoch": 0.34114537444933923, + "grad_norm": 1.2753268813313299, + "learning_rate": 1.9218601411350663e-05, + "loss": 0.7356737852096558, + "step": 968 + }, + { + "epoch": 0.34149779735682817, + "grad_norm": 2.3286924006274896, + "learning_rate": 1.9216341985825672e-05, + "loss": 0.7880491018295288, + "step": 969 + }, + { + "epoch": 0.34185022026431716, + "grad_norm": 1.4677269303314853, + "learning_rate": 1.92140794316394e-05, + "loss": 0.734922468662262, + "step": 970 + }, + { + "epoch": 0.34220264317180615, + "grad_norm": 1.5501144518696521, + "learning_rate": 1.9211813749559916e-05, + "loss": 0.6710363626480103, + "step": 971 + }, + { + "epoch": 0.34255506607929515, + "grad_norm": 1.256856073477316, + "learning_rate": 1.920954494035634e-05, + "loss": 0.7300584316253662, + "step": 972 + }, + { + "epoch": 0.34290748898678414, + "grad_norm": 1.5351698758546528, + "learning_rate": 1.9207273004798873e-05, + "loss": 0.8584152460098267, + "step": 973 + }, + { + "epoch": 0.34325991189427313, + "grad_norm": 1.4115351274616093, + "learning_rate": 1.9204997943658764e-05, + "loss": 0.7307419776916504, + "step": 974 + }, + { + "epoch": 0.3436123348017621, + "grad_norm": 1.434441373942747, + "learning_rate": 1.920271975770832e-05, + "loss": 0.6004960536956787, + "step": 975 + }, + { + "epoch": 0.3439647577092511, + "grad_norm": 1.4015679334218965, + "learning_rate": 1.920043844772092e-05, + "loss": 0.7951763868331909, + "step": 976 + }, + { + "epoch": 0.3443171806167401, + "grad_norm": 1.6575061104845086, + "learning_rate": 1.919815401447099e-05, + "loss": 0.6835082769393921, + "step": 977 + }, + { + "epoch": 0.3446696035242291, + "grad_norm": 1.3790503468449504, + "learning_rate": 1.9195866458734034e-05, + "loss": 0.7556526064872742, + "step": 978 + }, + { + "epoch": 0.3450220264317181, + "grad_norm": 1.2642410643718298, + "learning_rate": 1.91935757812866e-05, + "loss": 0.6918114423751831, + "step": 979 + }, + { + "epoch": 0.345374449339207, + "grad_norm": 1.6331863015088222, + "learning_rate": 1.9191281982906304e-05, + "loss": 0.8197037577629089, + "step": 980 + }, + { + "epoch": 0.345726872246696, + "grad_norm": 1.9820857497043596, + "learning_rate": 1.9188985064371818e-05, + "loss": 0.833138644695282, + "step": 981 + }, + { + "epoch": 0.346079295154185, + "grad_norm": 1.3877421520016489, + "learning_rate": 1.9186685026462874e-05, + "loss": 0.6593397855758667, + "step": 982 + }, + { + "epoch": 0.346431718061674, + "grad_norm": 1.750501089720619, + "learning_rate": 1.918438186996026e-05, + "loss": 0.7535643577575684, + "step": 983 + }, + { + "epoch": 0.346784140969163, + "grad_norm": 1.5295833510904033, + "learning_rate": 1.9182075595645836e-05, + "loss": 0.6959745287895203, + "step": 984 + }, + { + "epoch": 0.347136563876652, + "grad_norm": 1.4045200992789866, + "learning_rate": 1.91797662043025e-05, + "loss": 0.7349518537521362, + "step": 985 + }, + { + "epoch": 0.347488986784141, + "grad_norm": 1.3769518272852244, + "learning_rate": 1.9177453696714224e-05, + "loss": 0.7677974700927734, + "step": 986 + }, + { + "epoch": 0.34784140969162997, + "grad_norm": 1.4486626509256493, + "learning_rate": 1.917513807366603e-05, + "loss": 0.7302255630493164, + "step": 987 + }, + { + "epoch": 0.34819383259911896, + "grad_norm": 1.477891236612788, + "learning_rate": 1.9172819335944003e-05, + "loss": 0.838138222694397, + "step": 988 + }, + { + "epoch": 0.34854625550660795, + "grad_norm": 1.555345260078333, + "learning_rate": 1.9170497484335276e-05, + "loss": 0.8018180131912231, + "step": 989 + }, + { + "epoch": 0.34889867841409694, + "grad_norm": 1.4299439839627417, + "learning_rate": 1.9168172519628056e-05, + "loss": 0.8085787296295166, + "step": 990 + }, + { + "epoch": 0.3492511013215859, + "grad_norm": 1.407734167007011, + "learning_rate": 1.9165844442611584e-05, + "loss": 0.8419004082679749, + "step": 991 + }, + { + "epoch": 0.34960352422907487, + "grad_norm": 1.485093259368171, + "learning_rate": 1.916351325407618e-05, + "loss": 0.8255139589309692, + "step": 992 + }, + { + "epoch": 0.34995594713656386, + "grad_norm": 1.3581016847128187, + "learning_rate": 1.9161178954813203e-05, + "loss": 0.7588528990745544, + "step": 993 + }, + { + "epoch": 0.35030837004405285, + "grad_norm": 1.3722258517458088, + "learning_rate": 1.9158841545615076e-05, + "loss": 0.7057096362113953, + "step": 994 + }, + { + "epoch": 0.35066079295154184, + "grad_norm": 1.3264479954648483, + "learning_rate": 1.915650102727528e-05, + "loss": 0.6913125514984131, + "step": 995 + }, + { + "epoch": 0.35101321585903084, + "grad_norm": 1.4277288783882767, + "learning_rate": 1.9154157400588348e-05, + "loss": 0.7622898817062378, + "step": 996 + }, + { + "epoch": 0.3513656387665198, + "grad_norm": 1.3345359637809249, + "learning_rate": 1.915181066634986e-05, + "loss": 0.6918702125549316, + "step": 997 + }, + { + "epoch": 0.3517180616740088, + "grad_norm": 1.4330955991310976, + "learning_rate": 1.914946082535647e-05, + "loss": 0.8801462650299072, + "step": 998 + }, + { + "epoch": 0.3520704845814978, + "grad_norm": 1.6364104196010791, + "learning_rate": 1.9147107878405873e-05, + "loss": 0.7901172637939453, + "step": 999 + }, + { + "epoch": 0.3524229074889868, + "grad_norm": 1.3202428944557627, + "learning_rate": 1.9144751826296818e-05, + "loss": 0.7308447360992432, + "step": 1000 + }, + { + "epoch": 0.3527753303964758, + "grad_norm": 1.3152547105893029, + "learning_rate": 1.9142392669829114e-05, + "loss": 0.5733275413513184, + "step": 1001 + }, + { + "epoch": 0.35312775330396473, + "grad_norm": 1.4327185784306546, + "learning_rate": 1.9140030409803622e-05, + "loss": 0.7251306772232056, + "step": 1002 + }, + { + "epoch": 0.3534801762114537, + "grad_norm": 1.3492122584167072, + "learning_rate": 1.913766504702225e-05, + "loss": 0.7983027696609497, + "step": 1003 + }, + { + "epoch": 0.3538325991189427, + "grad_norm": 1.5284478719025472, + "learning_rate": 1.9135296582287973e-05, + "loss": 0.7464017868041992, + "step": 1004 + }, + { + "epoch": 0.3541850220264317, + "grad_norm": 1.3377291300677683, + "learning_rate": 1.9132925016404805e-05, + "loss": 0.7333002686500549, + "step": 1005 + }, + { + "epoch": 0.3545374449339207, + "grad_norm": 1.4170618275882645, + "learning_rate": 1.9130550350177823e-05, + "loss": 0.729085385799408, + "step": 1006 + }, + { + "epoch": 0.3548898678414097, + "grad_norm": 1.1531700234964573, + "learning_rate": 1.9128172584413148e-05, + "loss": 0.7599227428436279, + "step": 1007 + }, + { + "epoch": 0.3552422907488987, + "grad_norm": 1.3499603875621307, + "learning_rate": 1.9125791719917962e-05, + "loss": 0.8110464811325073, + "step": 1008 + }, + { + "epoch": 0.3555947136563877, + "grad_norm": 1.443391069493257, + "learning_rate": 1.912340775750049e-05, + "loss": 0.7431697845458984, + "step": 1009 + }, + { + "epoch": 0.35594713656387666, + "grad_norm": 1.3353700802371913, + "learning_rate": 1.9121020697970016e-05, + "loss": 0.7833640575408936, + "step": 1010 + }, + { + "epoch": 0.35629955947136566, + "grad_norm": 1.2927496434698726, + "learning_rate": 1.9118630542136874e-05, + "loss": 0.7693058252334595, + "step": 1011 + }, + { + "epoch": 0.35665198237885465, + "grad_norm": 1.3593779388270224, + "learning_rate": 1.9116237290812445e-05, + "loss": 0.7724676132202148, + "step": 1012 + }, + { + "epoch": 0.3570044052863436, + "grad_norm": 1.3849928303091037, + "learning_rate": 1.911384094480916e-05, + "loss": 0.6024055480957031, + "step": 1013 + }, + { + "epoch": 0.3573568281938326, + "grad_norm": 1.254237630036734, + "learning_rate": 1.9111441504940514e-05, + "loss": 0.7710703611373901, + "step": 1014 + }, + { + "epoch": 0.35770925110132157, + "grad_norm": 1.3917926832468532, + "learning_rate": 1.910903897202103e-05, + "loss": 0.7591651678085327, + "step": 1015 + }, + { + "epoch": 0.35806167400881056, + "grad_norm": 1.3250641662724636, + "learning_rate": 1.9106633346866302e-05, + "loss": 0.7721874713897705, + "step": 1016 + }, + { + "epoch": 0.35841409691629955, + "grad_norm": 1.3837097156983347, + "learning_rate": 1.910422463029296e-05, + "loss": 0.6767420172691345, + "step": 1017 + }, + { + "epoch": 0.35876651982378854, + "grad_norm": 1.5808312779065312, + "learning_rate": 1.910181282311869e-05, + "loss": 0.6704902648925781, + "step": 1018 + }, + { + "epoch": 0.35911894273127754, + "grad_norm": 1.3288966146848866, + "learning_rate": 1.9099397926162227e-05, + "loss": 0.8871079683303833, + "step": 1019 + }, + { + "epoch": 0.3594713656387665, + "grad_norm": 1.5716465127646195, + "learning_rate": 1.909697994024335e-05, + "loss": 0.7222549319267273, + "step": 1020 + }, + { + "epoch": 0.3598237885462555, + "grad_norm": 1.4050103839828958, + "learning_rate": 1.9094558866182892e-05, + "loss": 0.7443021535873413, + "step": 1021 + }, + { + "epoch": 0.3601762114537445, + "grad_norm": 1.3877313570980134, + "learning_rate": 1.9092134704802735e-05, + "loss": 0.7698349952697754, + "step": 1022 + }, + { + "epoch": 0.3605286343612335, + "grad_norm": 1.9010750041325926, + "learning_rate": 1.9089707456925798e-05, + "loss": 0.863248348236084, + "step": 1023 + }, + { + "epoch": 0.3608810572687225, + "grad_norm": 1.1572981545597187, + "learning_rate": 1.9087277123376068e-05, + "loss": 0.7036338448524475, + "step": 1024 + }, + { + "epoch": 0.36123348017621143, + "grad_norm": 1.5140044810060398, + "learning_rate": 1.9084843704978558e-05, + "loss": 0.7427274584770203, + "step": 1025 + }, + { + "epoch": 0.3615859030837004, + "grad_norm": 1.5903685422277276, + "learning_rate": 1.908240720255934e-05, + "loss": 0.6548313498497009, + "step": 1026 + }, + { + "epoch": 0.3619383259911894, + "grad_norm": 1.3326463394362358, + "learning_rate": 1.9079967616945534e-05, + "loss": 0.7586454749107361, + "step": 1027 + }, + { + "epoch": 0.3622907488986784, + "grad_norm": 1.45389698507953, + "learning_rate": 1.90775249489653e-05, + "loss": 0.6954889297485352, + "step": 1028 + }, + { + "epoch": 0.3626431718061674, + "grad_norm": 1.6543950271160617, + "learning_rate": 1.907507919944785e-05, + "loss": 0.8798770904541016, + "step": 1029 + }, + { + "epoch": 0.3629955947136564, + "grad_norm": 1.3815054682339305, + "learning_rate": 1.9072630369223433e-05, + "loss": 0.6600694060325623, + "step": 1030 + }, + { + "epoch": 0.3633480176211454, + "grad_norm": 1.5776995405913148, + "learning_rate": 1.9070178459123366e-05, + "loss": 0.6830897927284241, + "step": 1031 + }, + { + "epoch": 0.36370044052863437, + "grad_norm": 1.1973844620945089, + "learning_rate": 1.906772346997998e-05, + "loss": 0.6283613443374634, + "step": 1032 + }, + { + "epoch": 0.36405286343612336, + "grad_norm": 1.2892968799675324, + "learning_rate": 1.9065265402626676e-05, + "loss": 0.6451754570007324, + "step": 1033 + }, + { + "epoch": 0.36440528634361236, + "grad_norm": 1.4387559441313162, + "learning_rate": 1.9062804257897887e-05, + "loss": 0.7949883937835693, + "step": 1034 + }, + { + "epoch": 0.36475770925110135, + "grad_norm": 1.4366893391590683, + "learning_rate": 1.90603400366291e-05, + "loss": 0.625343918800354, + "step": 1035 + }, + { + "epoch": 0.3651101321585903, + "grad_norm": 1.5716897663583798, + "learning_rate": 1.9057872739656843e-05, + "loss": 0.8398839235305786, + "step": 1036 + }, + { + "epoch": 0.3654625550660793, + "grad_norm": 1.6515297053174456, + "learning_rate": 1.9055402367818673e-05, + "loss": 0.8628166913986206, + "step": 1037 + }, + { + "epoch": 0.36581497797356827, + "grad_norm": 1.6000244306696312, + "learning_rate": 1.905292892195322e-05, + "loss": 0.7494110465049744, + "step": 1038 + }, + { + "epoch": 0.36616740088105726, + "grad_norm": 1.6358981860019415, + "learning_rate": 1.9050452402900134e-05, + "loss": 0.7695099115371704, + "step": 1039 + }, + { + "epoch": 0.36651982378854625, + "grad_norm": 1.3948395289772064, + "learning_rate": 1.904797281150012e-05, + "loss": 0.8067067861557007, + "step": 1040 + }, + { + "epoch": 0.36687224669603524, + "grad_norm": 1.5430196098026252, + "learning_rate": 1.9045490148594917e-05, + "loss": 0.7542074918746948, + "step": 1041 + }, + { + "epoch": 0.36722466960352423, + "grad_norm": 1.4232871422135234, + "learning_rate": 1.9043004415027314e-05, + "loss": 0.7027335166931152, + "step": 1042 + }, + { + "epoch": 0.3675770925110132, + "grad_norm": 1.2842638834648272, + "learning_rate": 1.9040515611641142e-05, + "loss": 0.7779253721237183, + "step": 1043 + }, + { + "epoch": 0.3679295154185022, + "grad_norm": 1.4713589430159515, + "learning_rate": 1.9038023739281275e-05, + "loss": 0.6840049028396606, + "step": 1044 + }, + { + "epoch": 0.3682819383259912, + "grad_norm": 1.2252786450532585, + "learning_rate": 1.903552879879362e-05, + "loss": 0.6183794736862183, + "step": 1045 + }, + { + "epoch": 0.3686343612334802, + "grad_norm": 1.3239395642180716, + "learning_rate": 1.9033030791025127e-05, + "loss": 0.7770168781280518, + "step": 1046 + }, + { + "epoch": 0.36898678414096914, + "grad_norm": 1.5646813675557831, + "learning_rate": 1.9030529716823806e-05, + "loss": 0.7192036509513855, + "step": 1047 + }, + { + "epoch": 0.36933920704845813, + "grad_norm": 1.3179369082607764, + "learning_rate": 1.9028025577038688e-05, + "loss": 0.6604419946670532, + "step": 1048 + }, + { + "epoch": 0.3696916299559471, + "grad_norm": 1.7088212085954357, + "learning_rate": 1.9025518372519847e-05, + "loss": 0.7999060153961182, + "step": 1049 + }, + { + "epoch": 0.3700440528634361, + "grad_norm": 1.6369356635778263, + "learning_rate": 1.9023008104118404e-05, + "loss": 0.7487536668777466, + "step": 1050 + }, + { + "epoch": 0.3703964757709251, + "grad_norm": 1.4534592079598474, + "learning_rate": 1.9020494772686513e-05, + "loss": 0.7786455154418945, + "step": 1051 + }, + { + "epoch": 0.3707488986784141, + "grad_norm": 1.5556124976221868, + "learning_rate": 1.9017978379077378e-05, + "loss": 0.7592626214027405, + "step": 1052 + }, + { + "epoch": 0.3711013215859031, + "grad_norm": 1.3193440168525459, + "learning_rate": 1.901545892414523e-05, + "loss": 0.774850606918335, + "step": 1053 + }, + { + "epoch": 0.3714537444933921, + "grad_norm": 1.4859587321900767, + "learning_rate": 1.901293640874535e-05, + "loss": 0.5430009365081787, + "step": 1054 + }, + { + "epoch": 0.37180616740088107, + "grad_norm": 1.4541817899150224, + "learning_rate": 1.9010410833734053e-05, + "loss": 0.7459923624992371, + "step": 1055 + }, + { + "epoch": 0.37215859030837006, + "grad_norm": 1.6269332982530442, + "learning_rate": 1.9007882199968692e-05, + "loss": 0.6372017860412598, + "step": 1056 + }, + { + "epoch": 0.37251101321585905, + "grad_norm": 1.6522112420188226, + "learning_rate": 1.900535050830766e-05, + "loss": 0.6773583292961121, + "step": 1057 + }, + { + "epoch": 0.372863436123348, + "grad_norm": 1.7342256392022233, + "learning_rate": 1.900281575961039e-05, + "loss": 0.8431004285812378, + "step": 1058 + }, + { + "epoch": 0.373215859030837, + "grad_norm": 1.4085085883480681, + "learning_rate": 1.9000277954737342e-05, + "loss": 0.6361340284347534, + "step": 1059 + }, + { + "epoch": 0.373568281938326, + "grad_norm": 1.3793359019510345, + "learning_rate": 1.8997737094550033e-05, + "loss": 0.7153787612915039, + "step": 1060 + }, + { + "epoch": 0.37392070484581497, + "grad_norm": 1.4220392348844544, + "learning_rate": 1.8995193179911e-05, + "loss": 0.7244935631752014, + "step": 1061 + }, + { + "epoch": 0.37427312775330396, + "grad_norm": 1.4061330426818142, + "learning_rate": 1.8992646211683817e-05, + "loss": 0.6648202538490295, + "step": 1062 + }, + { + "epoch": 0.37462555066079295, + "grad_norm": 1.4217807346058315, + "learning_rate": 1.8990096190733113e-05, + "loss": 0.6528836488723755, + "step": 1063 + }, + { + "epoch": 0.37497797356828194, + "grad_norm": 1.4695679092519263, + "learning_rate": 1.8987543117924532e-05, + "loss": 0.6749341487884521, + "step": 1064 + }, + { + "epoch": 0.37533039647577093, + "grad_norm": 1.3287092803608218, + "learning_rate": 1.8984986994124766e-05, + "loss": 0.7402256727218628, + "step": 1065 + }, + { + "epoch": 0.3756828193832599, + "grad_norm": 1.2181513754192281, + "learning_rate": 1.898242782020154e-05, + "loss": 0.5638695955276489, + "step": 1066 + }, + { + "epoch": 0.3760352422907489, + "grad_norm": 1.5457056768133204, + "learning_rate": 1.897986559702361e-05, + "loss": 0.829822838306427, + "step": 1067 + }, + { + "epoch": 0.3763876651982379, + "grad_norm": 1.3351440834834858, + "learning_rate": 1.8977300325460774e-05, + "loss": 0.6796025037765503, + "step": 1068 + }, + { + "epoch": 0.37674008810572684, + "grad_norm": 1.3611135527247238, + "learning_rate": 1.897473200638386e-05, + "loss": 0.8584038615226746, + "step": 1069 + }, + { + "epoch": 0.37709251101321584, + "grad_norm": 1.4622377307020165, + "learning_rate": 1.897216064066474e-05, + "loss": 0.8069149255752563, + "step": 1070 + }, + { + "epoch": 0.37744493392070483, + "grad_norm": 1.2194173424769332, + "learning_rate": 1.89695862291763e-05, + "loss": 0.5762223601341248, + "step": 1071 + }, + { + "epoch": 0.3777973568281938, + "grad_norm": 1.3827918624348656, + "learning_rate": 1.8967008772792483e-05, + "loss": 0.6626466512680054, + "step": 1072 + }, + { + "epoch": 0.3781497797356828, + "grad_norm": 1.15359758590964, + "learning_rate": 1.896442827238825e-05, + "loss": 0.6260244250297546, + "step": 1073 + }, + { + "epoch": 0.3785022026431718, + "grad_norm": 1.8994686915407593, + "learning_rate": 1.8961844728839602e-05, + "loss": 0.8090343475341797, + "step": 1074 + }, + { + "epoch": 0.3788546255506608, + "grad_norm": 1.4116056126096472, + "learning_rate": 1.8959258143023575e-05, + "loss": 0.66957026720047, + "step": 1075 + }, + { + "epoch": 0.3792070484581498, + "grad_norm": 1.308974606662818, + "learning_rate": 1.8956668515818223e-05, + "loss": 0.7103087306022644, + "step": 1076 + }, + { + "epoch": 0.3795594713656388, + "grad_norm": 1.468914156940793, + "learning_rate": 1.895407584810266e-05, + "loss": 0.7469112277030945, + "step": 1077 + }, + { + "epoch": 0.37991189427312777, + "grad_norm": 1.624950928787921, + "learning_rate": 1.8951480140757003e-05, + "loss": 0.8252213001251221, + "step": 1078 + }, + { + "epoch": 0.38026431718061676, + "grad_norm": 1.4238044077341658, + "learning_rate": 1.8948881394662417e-05, + "loss": 0.7204562425613403, + "step": 1079 + }, + { + "epoch": 0.38061674008810575, + "grad_norm": 1.5659608304591812, + "learning_rate": 1.89462796107011e-05, + "loss": 0.7325669527053833, + "step": 1080 + }, + { + "epoch": 0.3809691629955947, + "grad_norm": 1.2964480504204927, + "learning_rate": 1.8943674789756276e-05, + "loss": 0.738972008228302, + "step": 1081 + }, + { + "epoch": 0.3813215859030837, + "grad_norm": 1.5892566433984823, + "learning_rate": 1.8941066932712194e-05, + "loss": 0.7468631267547607, + "step": 1082 + }, + { + "epoch": 0.3816740088105727, + "grad_norm": 1.6145182365902104, + "learning_rate": 1.893845604045415e-05, + "loss": 0.6479831337928772, + "step": 1083 + }, + { + "epoch": 0.38202643171806167, + "grad_norm": 1.3615750017210906, + "learning_rate": 1.893584211386845e-05, + "loss": 0.7615871429443359, + "step": 1084 + }, + { + "epoch": 0.38237885462555066, + "grad_norm": 1.8901071385329251, + "learning_rate": 1.8933225153842446e-05, + "loss": 0.6934449076652527, + "step": 1085 + }, + { + "epoch": 0.38273127753303965, + "grad_norm": 1.2384833194245852, + "learning_rate": 1.8930605161264517e-05, + "loss": 0.5267079472541809, + "step": 1086 + }, + { + "epoch": 0.38308370044052864, + "grad_norm": 1.524832028509735, + "learning_rate": 1.892798213702407e-05, + "loss": 0.7309125661849976, + "step": 1087 + }, + { + "epoch": 0.38343612334801763, + "grad_norm": 1.3743253361073855, + "learning_rate": 1.892535608201153e-05, + "loss": 0.8133678436279297, + "step": 1088 + }, + { + "epoch": 0.3837885462555066, + "grad_norm": 1.3915725940468886, + "learning_rate": 1.892272699711837e-05, + "loss": 0.6097027063369751, + "step": 1089 + }, + { + "epoch": 0.3841409691629956, + "grad_norm": 1.548287022579551, + "learning_rate": 1.8920094883237082e-05, + "loss": 0.70456862449646, + "step": 1090 + }, + { + "epoch": 0.3844933920704846, + "grad_norm": 1.2952569165029428, + "learning_rate": 1.8917459741261183e-05, + "loss": 0.7236523628234863, + "step": 1091 + }, + { + "epoch": 0.38484581497797354, + "grad_norm": 1.5039785189114319, + "learning_rate": 1.8914821572085224e-05, + "loss": 0.7251272201538086, + "step": 1092 + }, + { + "epoch": 0.38519823788546254, + "grad_norm": 1.271767676796452, + "learning_rate": 1.8912180376604777e-05, + "loss": 0.7381070852279663, + "step": 1093 + }, + { + "epoch": 0.3855506607929515, + "grad_norm": 1.6023999081974447, + "learning_rate": 1.8909536155716458e-05, + "loss": 0.6654129028320312, + "step": 1094 + }, + { + "epoch": 0.3859030837004405, + "grad_norm": 1.4351957388528893, + "learning_rate": 1.8906888910317883e-05, + "loss": 0.7823128700256348, + "step": 1095 + }, + { + "epoch": 0.3862555066079295, + "grad_norm": 1.2302320218391962, + "learning_rate": 1.8904238641307718e-05, + "loss": 0.5988126993179321, + "step": 1096 + }, + { + "epoch": 0.3866079295154185, + "grad_norm": 1.6745614533481283, + "learning_rate": 1.8901585349585643e-05, + "loss": 0.7671465873718262, + "step": 1097 + }, + { + "epoch": 0.3869603524229075, + "grad_norm": 1.4027982600434907, + "learning_rate": 1.889892903605237e-05, + "loss": 0.7878838777542114, + "step": 1098 + }, + { + "epoch": 0.3873127753303965, + "grad_norm": 1.2802181437962392, + "learning_rate": 1.8896269701609634e-05, + "loss": 0.72254878282547, + "step": 1099 + }, + { + "epoch": 0.3876651982378855, + "grad_norm": 1.4183908379879375, + "learning_rate": 1.8893607347160198e-05, + "loss": 0.6796868443489075, + "step": 1100 + }, + { + "epoch": 0.38801762114537447, + "grad_norm": 1.510469064523606, + "learning_rate": 1.8890941973607843e-05, + "loss": 0.6378471851348877, + "step": 1101 + }, + { + "epoch": 0.38837004405286346, + "grad_norm": 1.327169163711753, + "learning_rate": 1.888827358185739e-05, + "loss": 0.8473032712936401, + "step": 1102 + }, + { + "epoch": 0.3887224669603524, + "grad_norm": 1.4704779902492213, + "learning_rate": 1.8885602172814667e-05, + "loss": 0.8272742033004761, + "step": 1103 + }, + { + "epoch": 0.3890748898678414, + "grad_norm": 1.45593169268278, + "learning_rate": 1.8882927747386533e-05, + "loss": 0.7244507670402527, + "step": 1104 + }, + { + "epoch": 0.3894273127753304, + "grad_norm": 1.3081271484466186, + "learning_rate": 1.888025030648088e-05, + "loss": 0.5764014720916748, + "step": 1105 + }, + { + "epoch": 0.3897797356828194, + "grad_norm": 1.230279760550168, + "learning_rate": 1.887756985100661e-05, + "loss": 0.6944009065628052, + "step": 1106 + }, + { + "epoch": 0.39013215859030836, + "grad_norm": 1.381963017332696, + "learning_rate": 1.8874886381873657e-05, + "loss": 0.7096902132034302, + "step": 1107 + }, + { + "epoch": 0.39048458149779736, + "grad_norm": 1.6526795986169043, + "learning_rate": 1.8872199899992973e-05, + "loss": 0.6304805278778076, + "step": 1108 + }, + { + "epoch": 0.39083700440528635, + "grad_norm": 1.3081643743142675, + "learning_rate": 1.8869510406276538e-05, + "loss": 0.7091327905654907, + "step": 1109 + }, + { + "epoch": 0.39118942731277534, + "grad_norm": 1.4257979117717376, + "learning_rate": 1.886681790163735e-05, + "loss": 0.6575565338134766, + "step": 1110 + }, + { + "epoch": 0.39154185022026433, + "grad_norm": 1.6155582257297172, + "learning_rate": 1.8864122386989426e-05, + "loss": 0.837468147277832, + "step": 1111 + }, + { + "epoch": 0.3918942731277533, + "grad_norm": 1.4395330206284223, + "learning_rate": 1.8861423863247816e-05, + "loss": 0.6861380338668823, + "step": 1112 + }, + { + "epoch": 0.3922466960352423, + "grad_norm": 1.3206140573248442, + "learning_rate": 1.8858722331328582e-05, + "loss": 0.7421156167984009, + "step": 1113 + }, + { + "epoch": 0.39259911894273125, + "grad_norm": 1.4106532753820455, + "learning_rate": 1.8856017792148807e-05, + "loss": 0.8037575483322144, + "step": 1114 + }, + { + "epoch": 0.39295154185022024, + "grad_norm": 1.34412494732323, + "learning_rate": 1.8853310246626608e-05, + "loss": 0.6530179381370544, + "step": 1115 + }, + { + "epoch": 0.39330396475770923, + "grad_norm": 1.7480111733406445, + "learning_rate": 1.88505996956811e-05, + "loss": 0.9039478302001953, + "step": 1116 + }, + { + "epoch": 0.3936563876651982, + "grad_norm": 1.2556675250098766, + "learning_rate": 1.8847886140232438e-05, + "loss": 0.7734917998313904, + "step": 1117 + }, + { + "epoch": 0.3940088105726872, + "grad_norm": 1.4809117769611548, + "learning_rate": 1.8845169581201786e-05, + "loss": 0.7146204113960266, + "step": 1118 + }, + { + "epoch": 0.3943612334801762, + "grad_norm": 1.4108388267740644, + "learning_rate": 1.8842450019511337e-05, + "loss": 0.6427414417266846, + "step": 1119 + }, + { + "epoch": 0.3947136563876652, + "grad_norm": 1.462443026711516, + "learning_rate": 1.883972745608429e-05, + "loss": 0.7241504192352295, + "step": 1120 + }, + { + "epoch": 0.3950660792951542, + "grad_norm": 1.5796197427651677, + "learning_rate": 1.8837001891844875e-05, + "loss": 0.7085466384887695, + "step": 1121 + }, + { + "epoch": 0.3954185022026432, + "grad_norm": 1.220037664049328, + "learning_rate": 1.8834273327718334e-05, + "loss": 0.6099711656570435, + "step": 1122 + }, + { + "epoch": 0.3957709251101322, + "grad_norm": 1.7637467057266936, + "learning_rate": 1.8831541764630936e-05, + "loss": 0.9153809547424316, + "step": 1123 + }, + { + "epoch": 0.39612334801762117, + "grad_norm": 1.432058114739846, + "learning_rate": 1.8828807203509953e-05, + "loss": 0.7025514841079712, + "step": 1124 + }, + { + "epoch": 0.3964757709251101, + "grad_norm": 1.3170228531933665, + "learning_rate": 1.882606964528369e-05, + "loss": 0.8254855275154114, + "step": 1125 + }, + { + "epoch": 0.3968281938325991, + "grad_norm": 1.3015643549096694, + "learning_rate": 1.8823329090881457e-05, + "loss": 0.6812278032302856, + "step": 1126 + }, + { + "epoch": 0.3971806167400881, + "grad_norm": 1.4379402990614556, + "learning_rate": 1.8820585541233592e-05, + "loss": 0.6570114493370056, + "step": 1127 + }, + { + "epoch": 0.3975330396475771, + "grad_norm": 1.4245448514304093, + "learning_rate": 1.881783899727144e-05, + "loss": 0.636163592338562, + "step": 1128 + }, + { + "epoch": 0.39788546255506607, + "grad_norm": 1.4535684365173425, + "learning_rate": 1.8815089459927373e-05, + "loss": 0.6744807958602905, + "step": 1129 + }, + { + "epoch": 0.39823788546255506, + "grad_norm": 1.2654983836452696, + "learning_rate": 1.8812336930134768e-05, + "loss": 0.6739502549171448, + "step": 1130 + }, + { + "epoch": 0.39859030837004406, + "grad_norm": 1.5274150360278067, + "learning_rate": 1.8809581408828026e-05, + "loss": 0.800058126449585, + "step": 1131 + }, + { + "epoch": 0.39894273127753305, + "grad_norm": 1.293199138820765, + "learning_rate": 1.880682289694256e-05, + "loss": 0.7158734798431396, + "step": 1132 + }, + { + "epoch": 0.39929515418502204, + "grad_norm": 1.426620948967722, + "learning_rate": 1.8804061395414795e-05, + "loss": 0.7142150402069092, + "step": 1133 + }, + { + "epoch": 0.39964757709251103, + "grad_norm": 1.5712220679274596, + "learning_rate": 1.8801296905182184e-05, + "loss": 0.7830438613891602, + "step": 1134 + }, + { + "epoch": 0.4, + "grad_norm": 1.3789411964854812, + "learning_rate": 1.879852942718318e-05, + "loss": 0.7037091255187988, + "step": 1135 + }, + { + "epoch": 0.400352422907489, + "grad_norm": 1.5410576826642701, + "learning_rate": 1.8795758962357254e-05, + "loss": 0.7634316682815552, + "step": 1136 + }, + { + "epoch": 0.40070484581497795, + "grad_norm": 1.3380525485574057, + "learning_rate": 1.8792985511644895e-05, + "loss": 0.8569636344909668, + "step": 1137 + }, + { + "epoch": 0.40105726872246694, + "grad_norm": 1.4697640342217926, + "learning_rate": 1.8790209075987603e-05, + "loss": 0.8589881062507629, + "step": 1138 + }, + { + "epoch": 0.40140969162995593, + "grad_norm": 1.4119711578026037, + "learning_rate": 1.8787429656327892e-05, + "loss": 0.6667177677154541, + "step": 1139 + }, + { + "epoch": 0.4017621145374449, + "grad_norm": 1.5302691962759787, + "learning_rate": 1.8784647253609286e-05, + "loss": 0.8272922039031982, + "step": 1140 + }, + { + "epoch": 0.4021145374449339, + "grad_norm": 1.4934073596410382, + "learning_rate": 1.8781861868776328e-05, + "loss": 0.735906720161438, + "step": 1141 + }, + { + "epoch": 0.4024669603524229, + "grad_norm": 1.6214826290901958, + "learning_rate": 1.8779073502774567e-05, + "loss": 0.7496200799942017, + "step": 1142 + }, + { + "epoch": 0.4028193832599119, + "grad_norm": 1.5379634398249482, + "learning_rate": 1.8776282156550563e-05, + "loss": 0.741244912147522, + "step": 1143 + }, + { + "epoch": 0.4031718061674009, + "grad_norm": 1.6175484470841388, + "learning_rate": 1.87734878310519e-05, + "loss": 0.6074572205543518, + "step": 1144 + }, + { + "epoch": 0.4035242290748899, + "grad_norm": 1.5403137415943102, + "learning_rate": 1.8770690527227156e-05, + "loss": 0.7852963805198669, + "step": 1145 + }, + { + "epoch": 0.4038766519823789, + "grad_norm": 1.3167947695811832, + "learning_rate": 1.8767890246025934e-05, + "loss": 0.8041664361953735, + "step": 1146 + }, + { + "epoch": 0.40422907488986787, + "grad_norm": 1.2847896666293108, + "learning_rate": 1.876508698839884e-05, + "loss": 0.6014564037322998, + "step": 1147 + }, + { + "epoch": 0.4045814977973568, + "grad_norm": 1.6737775020761936, + "learning_rate": 1.876228075529749e-05, + "loss": 0.7389206886291504, + "step": 1148 + }, + { + "epoch": 0.4049339207048458, + "grad_norm": 1.5291026740622409, + "learning_rate": 1.875947154767452e-05, + "loss": 0.7540062665939331, + "step": 1149 + }, + { + "epoch": 0.4052863436123348, + "grad_norm": 1.5780731113626183, + "learning_rate": 1.8756659366483564e-05, + "loss": 0.6953487396240234, + "step": 1150 + }, + { + "epoch": 0.4056387665198238, + "grad_norm": 1.8069469411894516, + "learning_rate": 1.875384421267927e-05, + "loss": 0.6715666055679321, + "step": 1151 + }, + { + "epoch": 0.40599118942731277, + "grad_norm": 1.6113428960633331, + "learning_rate": 1.8751026087217294e-05, + "loss": 0.7763206362724304, + "step": 1152 + }, + { + "epoch": 0.40634361233480176, + "grad_norm": 1.7227531605547286, + "learning_rate": 1.8748204991054304e-05, + "loss": 0.8445626497268677, + "step": 1153 + }, + { + "epoch": 0.40669603524229075, + "grad_norm": 1.4170830085508515, + "learning_rate": 1.8745380925147976e-05, + "loss": 0.6789584159851074, + "step": 1154 + }, + { + "epoch": 0.40704845814977975, + "grad_norm": 1.403092590323935, + "learning_rate": 1.8742553890456986e-05, + "loss": 0.6301349401473999, + "step": 1155 + }, + { + "epoch": 0.40740088105726874, + "grad_norm": 1.243923442253091, + "learning_rate": 1.873972388794103e-05, + "loss": 0.5968909859657288, + "step": 1156 + }, + { + "epoch": 0.40775330396475773, + "grad_norm": 1.489269903668207, + "learning_rate": 1.873689091856081e-05, + "loss": 0.759127676486969, + "step": 1157 + }, + { + "epoch": 0.4081057268722467, + "grad_norm": 1.7062525426103168, + "learning_rate": 1.873405498327802e-05, + "loss": 0.8113895654678345, + "step": 1158 + }, + { + "epoch": 0.40845814977973566, + "grad_norm": 2.2841166697739266, + "learning_rate": 1.8731216083055373e-05, + "loss": 0.6294944286346436, + "step": 1159 + }, + { + "epoch": 0.40881057268722465, + "grad_norm": 1.7643300465666825, + "learning_rate": 1.87283742188566e-05, + "loss": 0.7024469375610352, + "step": 1160 + }, + { + "epoch": 0.40916299559471364, + "grad_norm": 1.6192702903054457, + "learning_rate": 1.8725529391646413e-05, + "loss": 0.6593793034553528, + "step": 1161 + }, + { + "epoch": 0.40951541850220263, + "grad_norm": 1.491465083071803, + "learning_rate": 1.8722681602390548e-05, + "loss": 0.72177654504776, + "step": 1162 + }, + { + "epoch": 0.4098678414096916, + "grad_norm": 1.5089448151062697, + "learning_rate": 1.8719830852055736e-05, + "loss": 0.7099393606185913, + "step": 1163 + }, + { + "epoch": 0.4102202643171806, + "grad_norm": 1.3870038981594819, + "learning_rate": 1.871697714160972e-05, + "loss": 0.6221687197685242, + "step": 1164 + }, + { + "epoch": 0.4105726872246696, + "grad_norm": 1.6034975452453926, + "learning_rate": 1.8714120472021252e-05, + "loss": 0.7236911058425903, + "step": 1165 + }, + { + "epoch": 0.4109251101321586, + "grad_norm": 1.6733335742616042, + "learning_rate": 1.8711260844260072e-05, + "loss": 0.6777583360671997, + "step": 1166 + }, + { + "epoch": 0.4112775330396476, + "grad_norm": 1.2685396486773262, + "learning_rate": 1.870839825929694e-05, + "loss": 0.6408713459968567, + "step": 1167 + }, + { + "epoch": 0.4116299559471366, + "grad_norm": 1.5501797457897155, + "learning_rate": 1.870553271810362e-05, + "loss": 0.6081968545913696, + "step": 1168 + }, + { + "epoch": 0.4119823788546256, + "grad_norm": 1.324315376857478, + "learning_rate": 1.8702664221652864e-05, + "loss": 0.7269757986068726, + "step": 1169 + }, + { + "epoch": 0.4123348017621145, + "grad_norm": 1.359571395974998, + "learning_rate": 1.8699792770918443e-05, + "loss": 0.6563149094581604, + "step": 1170 + }, + { + "epoch": 0.4126872246696035, + "grad_norm": 1.412304869808958, + "learning_rate": 1.8696918366875123e-05, + "loss": 0.6900039911270142, + "step": 1171 + }, + { + "epoch": 0.4130396475770925, + "grad_norm": 1.6666238046463622, + "learning_rate": 1.869404101049868e-05, + "loss": 0.6575014591217041, + "step": 1172 + }, + { + "epoch": 0.4133920704845815, + "grad_norm": 1.7453316480937289, + "learning_rate": 1.8691160702765878e-05, + "loss": 0.8178410530090332, + "step": 1173 + }, + { + "epoch": 0.4137444933920705, + "grad_norm": 1.2369225358107252, + "learning_rate": 1.8688277444654495e-05, + "loss": 0.6247331500053406, + "step": 1174 + }, + { + "epoch": 0.41409691629955947, + "grad_norm": 1.4809443864869283, + "learning_rate": 1.868539123714331e-05, + "loss": 0.7220792770385742, + "step": 1175 + }, + { + "epoch": 0.41444933920704846, + "grad_norm": 1.3133478143499064, + "learning_rate": 1.8682502081212104e-05, + "loss": 0.6279594302177429, + "step": 1176 + }, + { + "epoch": 0.41480176211453745, + "grad_norm": 1.9965951061666904, + "learning_rate": 1.8679609977841646e-05, + "loss": 0.8814467787742615, + "step": 1177 + }, + { + "epoch": 0.41515418502202645, + "grad_norm": 1.337413771448709, + "learning_rate": 1.867671492801372e-05, + "loss": 0.6601974368095398, + "step": 1178 + }, + { + "epoch": 0.41550660792951544, + "grad_norm": 1.5188708939818696, + "learning_rate": 1.8673816932711107e-05, + "loss": 0.7004785537719727, + "step": 1179 + }, + { + "epoch": 0.41585903083700443, + "grad_norm": 1.5057078901191085, + "learning_rate": 1.8670915992917586e-05, + "loss": 0.7409330606460571, + "step": 1180 + }, + { + "epoch": 0.41621145374449336, + "grad_norm": 1.4232223858260633, + "learning_rate": 1.8668012109617933e-05, + "loss": 0.6698065996170044, + "step": 1181 + }, + { + "epoch": 0.41656387665198236, + "grad_norm": 1.5925482634189316, + "learning_rate": 1.8665105283797927e-05, + "loss": 0.7420671582221985, + "step": 1182 + }, + { + "epoch": 0.41691629955947135, + "grad_norm": 1.5560634478711484, + "learning_rate": 1.8662195516444345e-05, + "loss": 0.7719774842262268, + "step": 1183 + }, + { + "epoch": 0.41726872246696034, + "grad_norm": 1.4792437797078573, + "learning_rate": 1.8659282808544966e-05, + "loss": 0.6206108331680298, + "step": 1184 + }, + { + "epoch": 0.41762114537444933, + "grad_norm": 1.3470893025550628, + "learning_rate": 1.865636716108856e-05, + "loss": 0.799741268157959, + "step": 1185 + }, + { + "epoch": 0.4179735682819383, + "grad_norm": 1.419455186886867, + "learning_rate": 1.8653448575064893e-05, + "loss": 0.6839771866798401, + "step": 1186 + }, + { + "epoch": 0.4183259911894273, + "grad_norm": 1.4763673797370565, + "learning_rate": 1.8650527051464744e-05, + "loss": 0.7937930822372437, + "step": 1187 + }, + { + "epoch": 0.4186784140969163, + "grad_norm": 2.8190993538517524, + "learning_rate": 1.8647602591279873e-05, + "loss": 0.6819020509719849, + "step": 1188 + }, + { + "epoch": 0.4190308370044053, + "grad_norm": 1.3567646132379503, + "learning_rate": 1.864467519550305e-05, + "loss": 0.75614994764328, + "step": 1189 + }, + { + "epoch": 0.4193832599118943, + "grad_norm": 1.567742841021855, + "learning_rate": 1.864174486512803e-05, + "loss": 0.6966177225112915, + "step": 1190 + }, + { + "epoch": 0.4197356828193833, + "grad_norm": 1.7710714107881367, + "learning_rate": 1.8638811601149568e-05, + "loss": 0.821509838104248, + "step": 1191 + }, + { + "epoch": 0.4200881057268723, + "grad_norm": 1.2328562386437087, + "learning_rate": 1.8635875404563414e-05, + "loss": 0.5905138254165649, + "step": 1192 + }, + { + "epoch": 0.4204405286343612, + "grad_norm": 1.4647056442197128, + "learning_rate": 1.8632936276366323e-05, + "loss": 0.6856247186660767, + "step": 1193 + }, + { + "epoch": 0.4207929515418502, + "grad_norm": 1.4886760353067057, + "learning_rate": 1.862999421755603e-05, + "loss": 0.745036244392395, + "step": 1194 + }, + { + "epoch": 0.4211453744493392, + "grad_norm": 1.1750279689329006, + "learning_rate": 1.8627049229131276e-05, + "loss": 0.6503005027770996, + "step": 1195 + }, + { + "epoch": 0.4214977973568282, + "grad_norm": 1.5431880343600168, + "learning_rate": 1.86241013120918e-05, + "loss": 0.7498307228088379, + "step": 1196 + }, + { + "epoch": 0.4218502202643172, + "grad_norm": 1.3468463845976426, + "learning_rate": 1.862115046743831e-05, + "loss": 0.7928652763366699, + "step": 1197 + }, + { + "epoch": 0.42220264317180617, + "grad_norm": 1.2342083264732957, + "learning_rate": 1.861819669617254e-05, + "loss": 0.6854137182235718, + "step": 1198 + }, + { + "epoch": 0.42255506607929516, + "grad_norm": 1.2078818370142543, + "learning_rate": 1.86152399992972e-05, + "loss": 0.6196715831756592, + "step": 1199 + }, + { + "epoch": 0.42290748898678415, + "grad_norm": 1.3970249114344502, + "learning_rate": 1.8612280377816e-05, + "loss": 0.6937464475631714, + "step": 1200 + }, + { + "epoch": 0.42325991189427314, + "grad_norm": 1.68603514212184, + "learning_rate": 1.860931783273363e-05, + "loss": 0.7681070566177368, + "step": 1201 + }, + { + "epoch": 0.42361233480176214, + "grad_norm": 1.1472443629032707, + "learning_rate": 1.860635236505579e-05, + "loss": 0.676302969455719, + "step": 1202 + }, + { + "epoch": 0.4239647577092511, + "grad_norm": 1.3856112594345633, + "learning_rate": 1.8603383975789168e-05, + "loss": 0.6533253192901611, + "step": 1203 + }, + { + "epoch": 0.42431718061674006, + "grad_norm": 1.3469284337535972, + "learning_rate": 1.860041266594143e-05, + "loss": 0.689995288848877, + "step": 1204 + }, + { + "epoch": 0.42466960352422906, + "grad_norm": 1.5007772835228577, + "learning_rate": 1.859743843652124e-05, + "loss": 0.8129922747612, + "step": 1205 + }, + { + "epoch": 0.42502202643171805, + "grad_norm": 1.5410683437680426, + "learning_rate": 1.859446128853827e-05, + "loss": 0.8388077616691589, + "step": 1206 + }, + { + "epoch": 0.42537444933920704, + "grad_norm": 1.5558529097869003, + "learning_rate": 1.859148122300316e-05, + "loss": 0.8795225024223328, + "step": 1207 + }, + { + "epoch": 0.42572687224669603, + "grad_norm": 1.1213374735945745, + "learning_rate": 1.858849824092755e-05, + "loss": 0.7340251803398132, + "step": 1208 + }, + { + "epoch": 0.426079295154185, + "grad_norm": 1.4951423694810024, + "learning_rate": 1.8585512343324073e-05, + "loss": 0.8028355240821838, + "step": 1209 + }, + { + "epoch": 0.426431718061674, + "grad_norm": 1.4585659256901293, + "learning_rate": 1.8582523531206345e-05, + "loss": 0.8469998836517334, + "step": 1210 + }, + { + "epoch": 0.426784140969163, + "grad_norm": 1.5383443322846213, + "learning_rate": 1.857953180558898e-05, + "loss": 0.7562716007232666, + "step": 1211 + }, + { + "epoch": 0.427136563876652, + "grad_norm": 1.4113837543209433, + "learning_rate": 1.857653716748757e-05, + "loss": 0.7166177034378052, + "step": 1212 + }, + { + "epoch": 0.427488986784141, + "grad_norm": 1.5418199345701933, + "learning_rate": 1.85735396179187e-05, + "loss": 0.6946159601211548, + "step": 1213 + }, + { + "epoch": 0.42784140969163, + "grad_norm": 1.317478160039542, + "learning_rate": 1.8570539157899953e-05, + "loss": 0.5341482758522034, + "step": 1214 + }, + { + "epoch": 0.4281938325991189, + "grad_norm": 1.4287482623115888, + "learning_rate": 1.8567535788449886e-05, + "loss": 0.8128249645233154, + "step": 1215 + }, + { + "epoch": 0.4285462555066079, + "grad_norm": 1.34325298688053, + "learning_rate": 1.8564529510588046e-05, + "loss": 0.7136335372924805, + "step": 1216 + }, + { + "epoch": 0.4288986784140969, + "grad_norm": 1.358163949395023, + "learning_rate": 1.856152032533498e-05, + "loss": 0.6737562417984009, + "step": 1217 + }, + { + "epoch": 0.4292511013215859, + "grad_norm": 1.306172251281951, + "learning_rate": 1.855850823371221e-05, + "loss": 0.8102772235870361, + "step": 1218 + }, + { + "epoch": 0.4296035242290749, + "grad_norm": 1.4109010281873726, + "learning_rate": 1.855549323674224e-05, + "loss": 0.7389130592346191, + "step": 1219 + }, + { + "epoch": 0.4299559471365639, + "grad_norm": 1.6519920374913426, + "learning_rate": 1.8552475335448575e-05, + "loss": 0.9127305746078491, + "step": 1220 + }, + { + "epoch": 0.43030837004405287, + "grad_norm": 1.4401162301668198, + "learning_rate": 1.8549454530855697e-05, + "loss": 0.7599691152572632, + "step": 1221 + }, + { + "epoch": 0.43066079295154186, + "grad_norm": 1.59834239528244, + "learning_rate": 1.8546430823989075e-05, + "loss": 0.8343819379806519, + "step": 1222 + }, + { + "epoch": 0.43101321585903085, + "grad_norm": 1.7081796080725813, + "learning_rate": 1.8543404215875163e-05, + "loss": 0.7759256362915039, + "step": 1223 + }, + { + "epoch": 0.43136563876651984, + "grad_norm": 1.3364188660639875, + "learning_rate": 1.8540374707541398e-05, + "loss": 0.7803373336791992, + "step": 1224 + }, + { + "epoch": 0.43171806167400884, + "grad_norm": 1.4538494145578122, + "learning_rate": 1.8537342300016208e-05, + "loss": 0.6292921304702759, + "step": 1225 + }, + { + "epoch": 0.43207048458149777, + "grad_norm": 1.4521641959343445, + "learning_rate": 1.8534306994329e-05, + "loss": 0.8495175838470459, + "step": 1226 + }, + { + "epoch": 0.43242290748898676, + "grad_norm": 1.3062742481146943, + "learning_rate": 1.8531268791510167e-05, + "loss": 0.6141406297683716, + "step": 1227 + }, + { + "epoch": 0.43277533039647575, + "grad_norm": 1.576341879030456, + "learning_rate": 1.8528227692591076e-05, + "loss": 0.7087793350219727, + "step": 1228 + }, + { + "epoch": 0.43312775330396475, + "grad_norm": 1.5442094308389636, + "learning_rate": 1.8525183698604098e-05, + "loss": 0.7919498682022095, + "step": 1229 + }, + { + "epoch": 0.43348017621145374, + "grad_norm": 1.317139155945084, + "learning_rate": 1.8522136810582563e-05, + "loss": 0.7408226728439331, + "step": 1230 + }, + { + "epoch": 0.43383259911894273, + "grad_norm": 1.407715848952146, + "learning_rate": 1.85190870295608e-05, + "loss": 0.7140083312988281, + "step": 1231 + }, + { + "epoch": 0.4341850220264317, + "grad_norm": 1.4117801977693214, + "learning_rate": 1.8516034356574118e-05, + "loss": 0.7211521863937378, + "step": 1232 + }, + { + "epoch": 0.4345374449339207, + "grad_norm": 1.1753876244240768, + "learning_rate": 1.85129787926588e-05, + "loss": 0.7103208303451538, + "step": 1233 + }, + { + "epoch": 0.4348898678414097, + "grad_norm": 1.4479636604064312, + "learning_rate": 1.850992033885211e-05, + "loss": 0.816985011100769, + "step": 1234 + }, + { + "epoch": 0.4352422907488987, + "grad_norm": 1.4368000528699751, + "learning_rate": 1.850685899619231e-05, + "loss": 0.6678498983383179, + "step": 1235 + }, + { + "epoch": 0.4355947136563877, + "grad_norm": 1.4259303259837681, + "learning_rate": 1.8503794765718622e-05, + "loss": 0.7895394563674927, + "step": 1236 + }, + { + "epoch": 0.4359471365638766, + "grad_norm": 1.4256180200365283, + "learning_rate": 1.8500727648471258e-05, + "loss": 0.7295971512794495, + "step": 1237 + }, + { + "epoch": 0.4362995594713656, + "grad_norm": 1.552299015894991, + "learning_rate": 1.849765764549141e-05, + "loss": 0.7216300964355469, + "step": 1238 + }, + { + "epoch": 0.4366519823788546, + "grad_norm": 2.585430848560662, + "learning_rate": 1.8494584757821252e-05, + "loss": 0.8088986873626709, + "step": 1239 + }, + { + "epoch": 0.4370044052863436, + "grad_norm": 1.3100612400703413, + "learning_rate": 1.8491508986503928e-05, + "loss": 0.7380663156509399, + "step": 1240 + }, + { + "epoch": 0.4373568281938326, + "grad_norm": 1.6225248085666293, + "learning_rate": 1.8488430332583566e-05, + "loss": 0.8671622276306152, + "step": 1241 + }, + { + "epoch": 0.4377092511013216, + "grad_norm": 1.2548349586148027, + "learning_rate": 1.8485348797105277e-05, + "loss": 0.6649274826049805, + "step": 1242 + }, + { + "epoch": 0.4380616740088106, + "grad_norm": 1.3492988450242405, + "learning_rate": 1.848226438111515e-05, + "loss": 0.740972638130188, + "step": 1243 + }, + { + "epoch": 0.43841409691629957, + "grad_norm": 1.4062352938849376, + "learning_rate": 1.8479177085660237e-05, + "loss": 0.6593915820121765, + "step": 1244 + }, + { + "epoch": 0.43876651982378856, + "grad_norm": 1.567811244473075, + "learning_rate": 1.8476086911788588e-05, + "loss": 0.792604923248291, + "step": 1245 + }, + { + "epoch": 0.43911894273127755, + "grad_norm": 1.583820790059346, + "learning_rate": 1.8472993860549216e-05, + "loss": 0.7521885633468628, + "step": 1246 + }, + { + "epoch": 0.43947136563876654, + "grad_norm": 1.4520072830804587, + "learning_rate": 1.846989793299212e-05, + "loss": 0.7242246270179749, + "step": 1247 + }, + { + "epoch": 0.43982378854625553, + "grad_norm": 1.2892821056189339, + "learning_rate": 1.846679913016827e-05, + "loss": 0.7343394160270691, + "step": 1248 + }, + { + "epoch": 0.44017621145374447, + "grad_norm": 1.2525729631593605, + "learning_rate": 1.846369745312961e-05, + "loss": 0.747876763343811, + "step": 1249 + }, + { + "epoch": 0.44052863436123346, + "grad_norm": 1.428983542355963, + "learning_rate": 1.8460592902929064e-05, + "loss": 0.7280946969985962, + "step": 1250 + }, + { + "epoch": 0.44088105726872245, + "grad_norm": 1.4254243168735732, + "learning_rate": 1.845748548062053e-05, + "loss": 0.7288519144058228, + "step": 1251 + }, + { + "epoch": 0.44123348017621145, + "grad_norm": 1.4847519735948493, + "learning_rate": 1.8454375187258885e-05, + "loss": 0.6269914507865906, + "step": 1252 + }, + { + "epoch": 0.44158590308370044, + "grad_norm": 1.5355271633317282, + "learning_rate": 1.8451262023899973e-05, + "loss": 0.7848949432373047, + "step": 1253 + }, + { + "epoch": 0.44193832599118943, + "grad_norm": 1.580356922946946, + "learning_rate": 1.8448145991600618e-05, + "loss": 0.7306517958641052, + "step": 1254 + }, + { + "epoch": 0.4422907488986784, + "grad_norm": 1.3971874565683924, + "learning_rate": 1.8445027091418614e-05, + "loss": 0.6933906078338623, + "step": 1255 + }, + { + "epoch": 0.4426431718061674, + "grad_norm": 1.2942221540854206, + "learning_rate": 1.8441905324412732e-05, + "loss": 0.8260579109191895, + "step": 1256 + }, + { + "epoch": 0.4429955947136564, + "grad_norm": 1.4276139754434451, + "learning_rate": 1.8438780691642712e-05, + "loss": 0.6818344593048096, + "step": 1257 + }, + { + "epoch": 0.4433480176211454, + "grad_norm": 1.5571344695334373, + "learning_rate": 1.8435653194169274e-05, + "loss": 0.5980014801025391, + "step": 1258 + }, + { + "epoch": 0.4437004405286344, + "grad_norm": 1.6363647319534165, + "learning_rate": 1.8432522833054102e-05, + "loss": 0.7694655656814575, + "step": 1259 + }, + { + "epoch": 0.4440528634361233, + "grad_norm": 1.4888452953161495, + "learning_rate": 1.842938960935986e-05, + "loss": 0.6861646771430969, + "step": 1260 + }, + { + "epoch": 0.4444052863436123, + "grad_norm": 1.5245731543783476, + "learning_rate": 1.8426253524150176e-05, + "loss": 0.7346323728561401, + "step": 1261 + }, + { + "epoch": 0.4447577092511013, + "grad_norm": 1.5555183873270297, + "learning_rate": 1.8423114578489657e-05, + "loss": 0.7116265296936035, + "step": 1262 + }, + { + "epoch": 0.4451101321585903, + "grad_norm": 1.3587295641859045, + "learning_rate": 1.8419972773443877e-05, + "loss": 0.7148594856262207, + "step": 1263 + }, + { + "epoch": 0.4454625550660793, + "grad_norm": 1.4208610042885819, + "learning_rate": 1.8416828110079378e-05, + "loss": 0.6629737615585327, + "step": 1264 + }, + { + "epoch": 0.4458149779735683, + "grad_norm": 1.2215430932959532, + "learning_rate": 1.8413680589463677e-05, + "loss": 0.5734454393386841, + "step": 1265 + }, + { + "epoch": 0.4461674008810573, + "grad_norm": 1.4728067026699625, + "learning_rate": 1.8410530212665258e-05, + "loss": 0.8129212260246277, + "step": 1266 + }, + { + "epoch": 0.44651982378854627, + "grad_norm": 1.5823039225136746, + "learning_rate": 1.8407376980753578e-05, + "loss": 0.7408754825592041, + "step": 1267 + }, + { + "epoch": 0.44687224669603526, + "grad_norm": 2.9520848026313633, + "learning_rate": 1.840422089479906e-05, + "loss": 0.7315034866333008, + "step": 1268 + }, + { + "epoch": 0.44722466960352425, + "grad_norm": 1.453693040198655, + "learning_rate": 1.8401061955873102e-05, + "loss": 0.6774684190750122, + "step": 1269 + }, + { + "epoch": 0.44757709251101324, + "grad_norm": 1.4189733125983666, + "learning_rate": 1.8397900165048055e-05, + "loss": 0.6615294814109802, + "step": 1270 + }, + { + "epoch": 0.4479295154185022, + "grad_norm": 1.465563156151872, + "learning_rate": 1.8394735523397258e-05, + "loss": 0.6757136583328247, + "step": 1271 + }, + { + "epoch": 0.44828193832599117, + "grad_norm": 1.3581337883847424, + "learning_rate": 1.8391568031995004e-05, + "loss": 0.6395466327667236, + "step": 1272 + }, + { + "epoch": 0.44863436123348016, + "grad_norm": 1.3957918327614203, + "learning_rate": 1.8388397691916556e-05, + "loss": 0.6436404585838318, + "step": 1273 + }, + { + "epoch": 0.44898678414096915, + "grad_norm": 1.2217258095016672, + "learning_rate": 1.838522450423815e-05, + "loss": 0.6280484199523926, + "step": 1274 + }, + { + "epoch": 0.44933920704845814, + "grad_norm": 1.3831470857016404, + "learning_rate": 1.8382048470036983e-05, + "loss": 0.7485225200653076, + "step": 1275 + }, + { + "epoch": 0.44969162995594714, + "grad_norm": 1.5437699808102354, + "learning_rate": 1.8378869590391217e-05, + "loss": 0.745079517364502, + "step": 1276 + }, + { + "epoch": 0.45004405286343613, + "grad_norm": 1.5902187054867891, + "learning_rate": 1.8375687866379988e-05, + "loss": 0.656510591506958, + "step": 1277 + }, + { + "epoch": 0.4503964757709251, + "grad_norm": 1.542738255105748, + "learning_rate": 1.8372503299083392e-05, + "loss": 0.7122445106506348, + "step": 1278 + }, + { + "epoch": 0.4507488986784141, + "grad_norm": 1.5368544285826038, + "learning_rate": 1.8369315889582483e-05, + "loss": 0.7402621507644653, + "step": 1279 + }, + { + "epoch": 0.4511013215859031, + "grad_norm": 1.539047411882563, + "learning_rate": 1.8366125638959292e-05, + "loss": 0.79311203956604, + "step": 1280 + }, + { + "epoch": 0.4514537444933921, + "grad_norm": 1.5615114889746888, + "learning_rate": 1.8362932548296815e-05, + "loss": 0.7748456001281738, + "step": 1281 + }, + { + "epoch": 0.45180616740088103, + "grad_norm": 1.4203050333533118, + "learning_rate": 1.8359736618679e-05, + "loss": 0.8285728096961975, + "step": 1282 + }, + { + "epoch": 0.45215859030837, + "grad_norm": 1.5541412727714081, + "learning_rate": 1.835653785119076e-05, + "loss": 0.7874733209609985, + "step": 1283 + }, + { + "epoch": 0.452511013215859, + "grad_norm": 1.990742110424804, + "learning_rate": 1.8353336246917996e-05, + "loss": 0.8984566926956177, + "step": 1284 + }, + { + "epoch": 0.452863436123348, + "grad_norm": 1.5779572276747513, + "learning_rate": 1.8350131806947537e-05, + "loss": 0.7730413675308228, + "step": 1285 + }, + { + "epoch": 0.453215859030837, + "grad_norm": 1.7109096071986905, + "learning_rate": 1.8346924532367195e-05, + "loss": 0.6064612865447998, + "step": 1286 + }, + { + "epoch": 0.453568281938326, + "grad_norm": 1.2417304411100711, + "learning_rate": 1.8343714424265742e-05, + "loss": 0.6946402192115784, + "step": 1287 + }, + { + "epoch": 0.453920704845815, + "grad_norm": 1.4035686433407615, + "learning_rate": 1.8340501483732908e-05, + "loss": 0.6131751537322998, + "step": 1288 + }, + { + "epoch": 0.454273127753304, + "grad_norm": 1.5800587203565855, + "learning_rate": 1.833728571185938e-05, + "loss": 0.7251182794570923, + "step": 1289 + }, + { + "epoch": 0.45462555066079297, + "grad_norm": 1.4036983560957499, + "learning_rate": 1.8334067109736826e-05, + "loss": 0.6548069715499878, + "step": 1290 + }, + { + "epoch": 0.45497797356828196, + "grad_norm": 1.3998869795024185, + "learning_rate": 1.833084567845785e-05, + "loss": 0.7416098117828369, + "step": 1291 + }, + { + "epoch": 0.45533039647577095, + "grad_norm": 2.508404695128388, + "learning_rate": 1.8327621419116034e-05, + "loss": 0.7320964932441711, + "step": 1292 + }, + { + "epoch": 0.4556828193832599, + "grad_norm": 1.3052290617356537, + "learning_rate": 1.8324394332805913e-05, + "loss": 0.5926196575164795, + "step": 1293 + }, + { + "epoch": 0.4560352422907489, + "grad_norm": 1.5674410721277312, + "learning_rate": 1.8321164420622977e-05, + "loss": 0.5294085741043091, + "step": 1294 + }, + { + "epoch": 0.45638766519823787, + "grad_norm": 1.2785938430138426, + "learning_rate": 1.8317931683663688e-05, + "loss": 0.6332723498344421, + "step": 1295 + }, + { + "epoch": 0.45674008810572686, + "grad_norm": 1.5962686180302166, + "learning_rate": 1.8314696123025456e-05, + "loss": 0.8361148834228516, + "step": 1296 + }, + { + "epoch": 0.45709251101321585, + "grad_norm": 1.4587382180744954, + "learning_rate": 1.8311457739806648e-05, + "loss": 0.8097354173660278, + "step": 1297 + }, + { + "epoch": 0.45744493392070484, + "grad_norm": 1.5247898400944095, + "learning_rate": 1.8308216535106606e-05, + "loss": 0.8619102239608765, + "step": 1298 + }, + { + "epoch": 0.45779735682819384, + "grad_norm": 1.7222438621078806, + "learning_rate": 1.8304972510025607e-05, + "loss": 0.8149014711380005, + "step": 1299 + }, + { + "epoch": 0.4581497797356828, + "grad_norm": 1.4821216839710079, + "learning_rate": 1.8301725665664904e-05, + "loss": 0.6217210292816162, + "step": 1300 + }, + { + "epoch": 0.4585022026431718, + "grad_norm": 1.3606031472973286, + "learning_rate": 1.8298476003126695e-05, + "loss": 0.7496612071990967, + "step": 1301 + }, + { + "epoch": 0.4588546255506608, + "grad_norm": 1.3221676149271377, + "learning_rate": 1.8295223523514144e-05, + "loss": 0.743242084980011, + "step": 1302 + }, + { + "epoch": 0.4592070484581498, + "grad_norm": 1.3745674408132749, + "learning_rate": 1.829196822793136e-05, + "loss": 0.6425061821937561, + "step": 1303 + }, + { + "epoch": 0.4595594713656388, + "grad_norm": 1.6216951689157317, + "learning_rate": 1.828871011748342e-05, + "loss": 0.8274835348129272, + "step": 1304 + }, + { + "epoch": 0.45991189427312773, + "grad_norm": 1.2722833909738493, + "learning_rate": 1.828544919327635e-05, + "loss": 0.6403865814208984, + "step": 1305 + }, + { + "epoch": 0.4602643171806167, + "grad_norm": 1.234115960449283, + "learning_rate": 1.828218545641713e-05, + "loss": 0.6585257053375244, + "step": 1306 + }, + { + "epoch": 0.4606167400881057, + "grad_norm": 1.2325421263478973, + "learning_rate": 1.82789189080137e-05, + "loss": 0.6467862129211426, + "step": 1307 + }, + { + "epoch": 0.4609691629955947, + "grad_norm": 1.403654297681647, + "learning_rate": 1.827564954917495e-05, + "loss": 0.8656524419784546, + "step": 1308 + }, + { + "epoch": 0.4613215859030837, + "grad_norm": 1.449712147167455, + "learning_rate": 1.8272377381010726e-05, + "loss": 0.7298469543457031, + "step": 1309 + }, + { + "epoch": 0.4616740088105727, + "grad_norm": 1.575558340533703, + "learning_rate": 1.8269102404631826e-05, + "loss": 0.7342871427536011, + "step": 1310 + }, + { + "epoch": 0.4620264317180617, + "grad_norm": 1.4177026442874099, + "learning_rate": 1.8265824621150005e-05, + "loss": 0.7437269687652588, + "step": 1311 + }, + { + "epoch": 0.4623788546255507, + "grad_norm": 1.370008690924395, + "learning_rate": 1.8262544031677965e-05, + "loss": 0.6761496067047119, + "step": 1312 + }, + { + "epoch": 0.46273127753303966, + "grad_norm": 1.3488719018465838, + "learning_rate": 1.825926063732937e-05, + "loss": 0.6504565477371216, + "step": 1313 + }, + { + "epoch": 0.46308370044052866, + "grad_norm": 1.5002490307110308, + "learning_rate": 1.8255974439218826e-05, + "loss": 0.7058892250061035, + "step": 1314 + }, + { + "epoch": 0.46343612334801765, + "grad_norm": 1.37061056314256, + "learning_rate": 1.8252685438461893e-05, + "loss": 0.704500675201416, + "step": 1315 + }, + { + "epoch": 0.4637885462555066, + "grad_norm": 1.3921050444029468, + "learning_rate": 1.824939363617509e-05, + "loss": 0.7438445091247559, + "step": 1316 + }, + { + "epoch": 0.4641409691629956, + "grad_norm": 1.4372002500080507, + "learning_rate": 1.8246099033475872e-05, + "loss": 0.6610915660858154, + "step": 1317 + }, + { + "epoch": 0.46449339207048457, + "grad_norm": 1.0745723869419856, + "learning_rate": 1.8242801631482666e-05, + "loss": 0.5868711471557617, + "step": 1318 + }, + { + "epoch": 0.46484581497797356, + "grad_norm": 1.192238188456442, + "learning_rate": 1.8239501431314828e-05, + "loss": 0.7403215765953064, + "step": 1319 + }, + { + "epoch": 0.46519823788546255, + "grad_norm": 1.2444894883495399, + "learning_rate": 1.823619843409268e-05, + "loss": 0.6836927533149719, + "step": 1320 + }, + { + "epoch": 0.46555066079295154, + "grad_norm": 1.4619703465719247, + "learning_rate": 1.8232892640937482e-05, + "loss": 0.744488537311554, + "step": 1321 + }, + { + "epoch": 0.46590308370044053, + "grad_norm": 1.6337099192848834, + "learning_rate": 1.822958405297145e-05, + "loss": 0.8203051090240479, + "step": 1322 + }, + { + "epoch": 0.4662555066079295, + "grad_norm": 1.184261838198034, + "learning_rate": 1.8226272671317747e-05, + "loss": 0.6452913284301758, + "step": 1323 + }, + { + "epoch": 0.4666079295154185, + "grad_norm": 1.6458345614686154, + "learning_rate": 1.8222958497100482e-05, + "loss": 0.7362639307975769, + "step": 1324 + }, + { + "epoch": 0.4669603524229075, + "grad_norm": 6.608293048647877, + "learning_rate": 1.8219641531444713e-05, + "loss": 0.8192600011825562, + "step": 1325 + }, + { + "epoch": 0.4673127753303965, + "grad_norm": 1.4257376230679313, + "learning_rate": 1.8216321775476452e-05, + "loss": 0.8391410112380981, + "step": 1326 + }, + { + "epoch": 0.46766519823788544, + "grad_norm": 1.3133795307817668, + "learning_rate": 1.8212999230322648e-05, + "loss": 0.8723593354225159, + "step": 1327 + }, + { + "epoch": 0.46801762114537443, + "grad_norm": 1.4218119484201381, + "learning_rate": 1.8209673897111208e-05, + "loss": 0.6891233921051025, + "step": 1328 + }, + { + "epoch": 0.4683700440528634, + "grad_norm": 1.414801660380672, + "learning_rate": 1.820634577697097e-05, + "loss": 0.6585180759429932, + "step": 1329 + }, + { + "epoch": 0.4687224669603524, + "grad_norm": 1.503205293925671, + "learning_rate": 1.8203014871031732e-05, + "loss": 0.9556418657302856, + "step": 1330 + }, + { + "epoch": 0.4690748898678414, + "grad_norm": 1.491345239113851, + "learning_rate": 1.8199681180424234e-05, + "loss": 0.803380012512207, + "step": 1331 + }, + { + "epoch": 0.4694273127753304, + "grad_norm": 1.6217603270172032, + "learning_rate": 1.819634470628016e-05, + "loss": 0.7090115547180176, + "step": 1332 + }, + { + "epoch": 0.4697797356828194, + "grad_norm": 1.6705712009535991, + "learning_rate": 1.8193005449732134e-05, + "loss": 0.6314720511436462, + "step": 1333 + }, + { + "epoch": 0.4701321585903084, + "grad_norm": 1.4756439095691731, + "learning_rate": 1.8189663411913737e-05, + "loss": 0.72248375415802, + "step": 1334 + }, + { + "epoch": 0.47048458149779737, + "grad_norm": 1.2477075880097683, + "learning_rate": 1.818631859395948e-05, + "loss": 0.6192474961280823, + "step": 1335 + }, + { + "epoch": 0.47083700440528636, + "grad_norm": 1.4944381119847567, + "learning_rate": 1.818297099700483e-05, + "loss": 0.6354564428329468, + "step": 1336 + }, + { + "epoch": 0.47118942731277536, + "grad_norm": 1.3129251382794922, + "learning_rate": 1.817962062218619e-05, + "loss": 0.7577195167541504, + "step": 1337 + }, + { + "epoch": 0.4715418502202643, + "grad_norm": 1.5504293722974503, + "learning_rate": 1.8176267470640908e-05, + "loss": 0.8064994812011719, + "step": 1338 + }, + { + "epoch": 0.4718942731277533, + "grad_norm": 1.211182925950848, + "learning_rate": 1.8172911543507276e-05, + "loss": 0.5994154214859009, + "step": 1339 + }, + { + "epoch": 0.4722466960352423, + "grad_norm": 1.701641381957404, + "learning_rate": 1.8169552841924524e-05, + "loss": 0.7483634948730469, + "step": 1340 + }, + { + "epoch": 0.47259911894273127, + "grad_norm": 1.5346948984560977, + "learning_rate": 1.8166191367032828e-05, + "loss": 0.817699134349823, + "step": 1341 + }, + { + "epoch": 0.47295154185022026, + "grad_norm": 1.4634504483386954, + "learning_rate": 1.8162827119973305e-05, + "loss": 0.7262923717498779, + "step": 1342 + }, + { + "epoch": 0.47330396475770925, + "grad_norm": 1.6796646988667925, + "learning_rate": 1.8159460101888013e-05, + "loss": 0.6097851991653442, + "step": 1343 + }, + { + "epoch": 0.47365638766519824, + "grad_norm": 1.3148094915971675, + "learning_rate": 1.8156090313919944e-05, + "loss": 0.7258971929550171, + "step": 1344 + }, + { + "epoch": 0.47400881057268723, + "grad_norm": 1.198607169385478, + "learning_rate": 1.8152717757213045e-05, + "loss": 0.6300361156463623, + "step": 1345 + }, + { + "epoch": 0.4743612334801762, + "grad_norm": 1.397827708634256, + "learning_rate": 1.8149342432912184e-05, + "loss": 0.7339942455291748, + "step": 1346 + }, + { + "epoch": 0.4747136563876652, + "grad_norm": 1.4524082687419129, + "learning_rate": 1.8145964342163188e-05, + "loss": 0.7520095109939575, + "step": 1347 + }, + { + "epoch": 0.4750660792951542, + "grad_norm": 1.6587168399408485, + "learning_rate": 1.814258348611281e-05, + "loss": 0.7276853322982788, + "step": 1348 + }, + { + "epoch": 0.47541850220264315, + "grad_norm": 1.4463166573664321, + "learning_rate": 1.8139199865908742e-05, + "loss": 0.8004029989242554, + "step": 1349 + }, + { + "epoch": 0.47577092511013214, + "grad_norm": 1.4508723815154267, + "learning_rate": 1.8135813482699623e-05, + "loss": 0.6932536363601685, + "step": 1350 + }, + { + "epoch": 0.47612334801762113, + "grad_norm": 1.8868515127553653, + "learning_rate": 1.8132424337635026e-05, + "loss": 0.7697082161903381, + "step": 1351 + }, + { + "epoch": 0.4764757709251101, + "grad_norm": 1.246718000700102, + "learning_rate": 1.8129032431865453e-05, + "loss": 0.6472513675689697, + "step": 1352 + }, + { + "epoch": 0.4768281938325991, + "grad_norm": 1.413046013449196, + "learning_rate": 1.8125637766542353e-05, + "loss": 0.6483110785484314, + "step": 1353 + }, + { + "epoch": 0.4771806167400881, + "grad_norm": 1.4854860856809686, + "learning_rate": 1.8122240342818113e-05, + "loss": 0.5495485067367554, + "step": 1354 + }, + { + "epoch": 0.4775330396475771, + "grad_norm": 1.2801602602197804, + "learning_rate": 1.811884016184605e-05, + "loss": 0.5235577821731567, + "step": 1355 + }, + { + "epoch": 0.4778854625550661, + "grad_norm": 1.734412256759482, + "learning_rate": 1.811543722478042e-05, + "loss": 0.7852121591567993, + "step": 1356 + }, + { + "epoch": 0.4782378854625551, + "grad_norm": 1.3650060645350073, + "learning_rate": 1.811203153277641e-05, + "loss": 0.6704862713813782, + "step": 1357 + }, + { + "epoch": 0.47859030837004407, + "grad_norm": 1.6553040991032588, + "learning_rate": 1.8108623086990156e-05, + "loss": 0.5964453220367432, + "step": 1358 + }, + { + "epoch": 0.47894273127753306, + "grad_norm": 1.3936312619950861, + "learning_rate": 1.8105211888578708e-05, + "loss": 0.6697995066642761, + "step": 1359 + }, + { + "epoch": 0.479295154185022, + "grad_norm": 1.5031130965144783, + "learning_rate": 1.810179793870007e-05, + "loss": 0.6335821151733398, + "step": 1360 + }, + { + "epoch": 0.479647577092511, + "grad_norm": 1.5635708705560234, + "learning_rate": 1.8098381238513173e-05, + "loss": 0.7925145626068115, + "step": 1361 + }, + { + "epoch": 0.48, + "grad_norm": 1.3011545804458011, + "learning_rate": 1.809496178917787e-05, + "loss": 0.6567563414573669, + "step": 1362 + }, + { + "epoch": 0.480352422907489, + "grad_norm": 1.6816341182204335, + "learning_rate": 1.809153959185497e-05, + "loss": 0.6318811178207397, + "step": 1363 + }, + { + "epoch": 0.48070484581497797, + "grad_norm": 1.6781349693525882, + "learning_rate": 1.8088114647706195e-05, + "loss": 0.7309727668762207, + "step": 1364 + }, + { + "epoch": 0.48105726872246696, + "grad_norm": 1.689289351270497, + "learning_rate": 1.8084686957894207e-05, + "loss": 0.7109836339950562, + "step": 1365 + }, + { + "epoch": 0.48140969162995595, + "grad_norm": 1.5638040238741844, + "learning_rate": 1.8081256523582604e-05, + "loss": 0.7475707530975342, + "step": 1366 + }, + { + "epoch": 0.48176211453744494, + "grad_norm": 1.492251829838995, + "learning_rate": 1.8077823345935904e-05, + "loss": 0.7149914503097534, + "step": 1367 + }, + { + "epoch": 0.48211453744493393, + "grad_norm": 1.5575297411632822, + "learning_rate": 1.8074387426119574e-05, + "loss": 0.7294478416442871, + "step": 1368 + }, + { + "epoch": 0.4824669603524229, + "grad_norm": 1.4689289799329066, + "learning_rate": 1.8070948765299995e-05, + "loss": 0.7115635871887207, + "step": 1369 + }, + { + "epoch": 0.4828193832599119, + "grad_norm": 1.5506146763507274, + "learning_rate": 1.806750736464449e-05, + "loss": 0.7046270966529846, + "step": 1370 + }, + { + "epoch": 0.4831718061674009, + "grad_norm": 1.42427078791196, + "learning_rate": 1.8064063225321305e-05, + "loss": 0.6206589937210083, + "step": 1371 + }, + { + "epoch": 0.48352422907488984, + "grad_norm": 1.2993997688945442, + "learning_rate": 1.8060616348499612e-05, + "loss": 0.7135940194129944, + "step": 1372 + }, + { + "epoch": 0.48387665198237884, + "grad_norm": 1.4643276104475023, + "learning_rate": 1.8057166735349533e-05, + "loss": 0.8360849618911743, + "step": 1373 + }, + { + "epoch": 0.4842290748898678, + "grad_norm": 1.541707883618089, + "learning_rate": 1.805371438704209e-05, + "loss": 0.6842360496520996, + "step": 1374 + }, + { + "epoch": 0.4845814977973568, + "grad_norm": 1.5840103891509227, + "learning_rate": 1.8050259304749254e-05, + "loss": 0.7615031003952026, + "step": 1375 + }, + { + "epoch": 0.4849339207048458, + "grad_norm": 1.655426139564667, + "learning_rate": 1.804680148964392e-05, + "loss": 0.8019323348999023, + "step": 1376 + }, + { + "epoch": 0.4852863436123348, + "grad_norm": 1.4674909380859245, + "learning_rate": 1.8043340942899906e-05, + "loss": 0.7882958054542542, + "step": 1377 + }, + { + "epoch": 0.4856387665198238, + "grad_norm": 1.2708485815687132, + "learning_rate": 1.8039877665691955e-05, + "loss": 0.7504314184188843, + "step": 1378 + }, + { + "epoch": 0.4859911894273128, + "grad_norm": 1.4258268452315883, + "learning_rate": 1.803641165919575e-05, + "loss": 0.6634547710418701, + "step": 1379 + }, + { + "epoch": 0.4863436123348018, + "grad_norm": 1.5654330696713128, + "learning_rate": 1.803294292458789e-05, + "loss": 0.7744965553283691, + "step": 1380 + }, + { + "epoch": 0.48669603524229077, + "grad_norm": 1.4537322254817193, + "learning_rate": 1.8029471463045904e-05, + "loss": 0.6322098970413208, + "step": 1381 + }, + { + "epoch": 0.48704845814977976, + "grad_norm": 1.4410033770501562, + "learning_rate": 1.8025997275748237e-05, + "loss": 0.7675940990447998, + "step": 1382 + }, + { + "epoch": 0.4874008810572687, + "grad_norm": 1.4138967124963124, + "learning_rate": 1.8022520363874275e-05, + "loss": 0.805001974105835, + "step": 1383 + }, + { + "epoch": 0.4877533039647577, + "grad_norm": 1.4447058519334661, + "learning_rate": 1.8019040728604322e-05, + "loss": 0.7647902369499207, + "step": 1384 + }, + { + "epoch": 0.4881057268722467, + "grad_norm": 1.4676048165311881, + "learning_rate": 1.8015558371119604e-05, + "loss": 0.7267208099365234, + "step": 1385 + }, + { + "epoch": 0.4884581497797357, + "grad_norm": 1.698344162431053, + "learning_rate": 1.801207329260227e-05, + "loss": 0.9259899854660034, + "step": 1386 + }, + { + "epoch": 0.48881057268722466, + "grad_norm": 1.6007666753359713, + "learning_rate": 1.8008585494235398e-05, + "loss": 0.7127895951271057, + "step": 1387 + }, + { + "epoch": 0.48916299559471366, + "grad_norm": 1.3612199688450533, + "learning_rate": 1.8005094977202987e-05, + "loss": 0.5890867710113525, + "step": 1388 + }, + { + "epoch": 0.48951541850220265, + "grad_norm": 1.355680060820382, + "learning_rate": 1.800160174268996e-05, + "loss": 0.9388052225112915, + "step": 1389 + }, + { + "epoch": 0.48986784140969164, + "grad_norm": 1.3938222391852138, + "learning_rate": 1.799810579188216e-05, + "loss": 0.7282747626304626, + "step": 1390 + }, + { + "epoch": 0.49022026431718063, + "grad_norm": 1.3481077360000804, + "learning_rate": 1.7994607125966354e-05, + "loss": 0.743558943271637, + "step": 1391 + }, + { + "epoch": 0.4905726872246696, + "grad_norm": 1.5830453320245632, + "learning_rate": 1.7991105746130234e-05, + "loss": 0.794719934463501, + "step": 1392 + }, + { + "epoch": 0.4909251101321586, + "grad_norm": 1.2758935421604947, + "learning_rate": 1.7987601653562402e-05, + "loss": 0.7320685982704163, + "step": 1393 + }, + { + "epoch": 0.49127753303964755, + "grad_norm": 1.7642547814838838, + "learning_rate": 1.798409484945239e-05, + "loss": 0.7376105785369873, + "step": 1394 + }, + { + "epoch": 0.49162995594713654, + "grad_norm": 1.2029848235346605, + "learning_rate": 1.7980585334990652e-05, + "loss": 0.7474706172943115, + "step": 1395 + }, + { + "epoch": 0.49198237885462553, + "grad_norm": 1.2018884579546327, + "learning_rate": 1.797707311136856e-05, + "loss": 0.5799805521965027, + "step": 1396 + }, + { + "epoch": 0.4923348017621145, + "grad_norm": 1.4260726798049534, + "learning_rate": 1.79735581797784e-05, + "loss": 0.7515959739685059, + "step": 1397 + }, + { + "epoch": 0.4926872246696035, + "grad_norm": 1.4843732287701579, + "learning_rate": 1.797004054141339e-05, + "loss": 0.6035799980163574, + "step": 1398 + }, + { + "epoch": 0.4930396475770925, + "grad_norm": 1.4699634461145672, + "learning_rate": 1.796652019746765e-05, + "loss": 0.7613668441772461, + "step": 1399 + }, + { + "epoch": 0.4933920704845815, + "grad_norm": 1.5395256627563776, + "learning_rate": 1.7962997149136226e-05, + "loss": 0.8780882954597473, + "step": 1400 + }, + { + "epoch": 0.4937444933920705, + "grad_norm": 1.4849311758521768, + "learning_rate": 1.795947139761509e-05, + "loss": 0.8661091327667236, + "step": 1401 + }, + { + "epoch": 0.4940969162995595, + "grad_norm": 1.2531714361223334, + "learning_rate": 1.7955942944101124e-05, + "loss": 0.6893571019172668, + "step": 1402 + }, + { + "epoch": 0.4944493392070485, + "grad_norm": 1.4079915487364913, + "learning_rate": 1.7952411789792125e-05, + "loss": 0.787032961845398, + "step": 1403 + }, + { + "epoch": 0.49480176211453747, + "grad_norm": 1.3474472991478739, + "learning_rate": 1.7948877935886812e-05, + "loss": 0.5346347689628601, + "step": 1404 + }, + { + "epoch": 0.4951541850220264, + "grad_norm": 1.5512557601329955, + "learning_rate": 1.7945341383584818e-05, + "loss": 0.8090060949325562, + "step": 1405 + }, + { + "epoch": 0.4955066079295154, + "grad_norm": 1.4268796756971738, + "learning_rate": 1.7941802134086695e-05, + "loss": 0.6321496963500977, + "step": 1406 + }, + { + "epoch": 0.4958590308370044, + "grad_norm": 1.4602718850691796, + "learning_rate": 1.7938260188593903e-05, + "loss": 0.6405632495880127, + "step": 1407 + }, + { + "epoch": 0.4962114537444934, + "grad_norm": 1.3838752085896924, + "learning_rate": 1.7934715548308825e-05, + "loss": 0.7665356397628784, + "step": 1408 + }, + { + "epoch": 0.49656387665198237, + "grad_norm": 1.6983169415711221, + "learning_rate": 1.7931168214434757e-05, + "loss": 0.7960416078567505, + "step": 1409 + }, + { + "epoch": 0.49691629955947136, + "grad_norm": 1.3842449461014021, + "learning_rate": 1.7927618188175908e-05, + "loss": 0.8080639839172363, + "step": 1410 + }, + { + "epoch": 0.49726872246696036, + "grad_norm": 1.3034648934851016, + "learning_rate": 1.79240654707374e-05, + "loss": 0.6503266096115112, + "step": 1411 + }, + { + "epoch": 0.49762114537444935, + "grad_norm": 1.3378534420648176, + "learning_rate": 1.792051006332527e-05, + "loss": 0.6063007116317749, + "step": 1412 + }, + { + "epoch": 0.49797356828193834, + "grad_norm": 1.849150255820523, + "learning_rate": 1.791695196714647e-05, + "loss": 0.6861660480499268, + "step": 1413 + }, + { + "epoch": 0.49832599118942733, + "grad_norm": 1.2217791382902905, + "learning_rate": 1.791339118340886e-05, + "loss": 0.7064980268478394, + "step": 1414 + }, + { + "epoch": 0.4986784140969163, + "grad_norm": 1.4370359000865323, + "learning_rate": 1.7909827713321214e-05, + "loss": 0.6102496981620789, + "step": 1415 + }, + { + "epoch": 0.49903083700440526, + "grad_norm": 1.446734818664789, + "learning_rate": 1.790626155809323e-05, + "loss": 0.7460618019104004, + "step": 1416 + }, + { + "epoch": 0.49938325991189425, + "grad_norm": 1.2988677548719765, + "learning_rate": 1.7902692718935496e-05, + "loss": 0.7124448418617249, + "step": 1417 + }, + { + "epoch": 0.49973568281938324, + "grad_norm": 1.322744101240627, + "learning_rate": 1.7899121197059525e-05, + "loss": 0.7194923162460327, + "step": 1418 + }, + { + "epoch": 0.5000881057268722, + "grad_norm": 1.4429377947794157, + "learning_rate": 1.7895546993677736e-05, + "loss": 0.6633901596069336, + "step": 1419 + }, + { + "epoch": 0.5004405286343613, + "grad_norm": 1.5531583469807302, + "learning_rate": 1.7891970110003463e-05, + "loss": 0.8554216623306274, + "step": 1420 + }, + { + "epoch": 0.5007929515418502, + "grad_norm": 1.4541421669927512, + "learning_rate": 1.7888390547250944e-05, + "loss": 0.7259502410888672, + "step": 1421 + }, + { + "epoch": 0.5011453744493392, + "grad_norm": 1.4299229413313208, + "learning_rate": 1.788480830663533e-05, + "loss": 0.7330816984176636, + "step": 1422 + }, + { + "epoch": 0.5014977973568282, + "grad_norm": 1.5727227347094554, + "learning_rate": 1.7881223389372678e-05, + "loss": 0.7793391346931458, + "step": 1423 + }, + { + "epoch": 0.5018502202643171, + "grad_norm": 1.5101282054621992, + "learning_rate": 1.787763579667996e-05, + "loss": 0.7387483716011047, + "step": 1424 + }, + { + "epoch": 0.5022026431718062, + "grad_norm": 1.2875272836020812, + "learning_rate": 1.787404552977505e-05, + "loss": 0.6665850877761841, + "step": 1425 + }, + { + "epoch": 0.5025550660792951, + "grad_norm": 1.6443234538305773, + "learning_rate": 1.7870452589876733e-05, + "loss": 0.7487791180610657, + "step": 1426 + }, + { + "epoch": 0.5029074889867842, + "grad_norm": 1.5494170755115177, + "learning_rate": 1.78668569782047e-05, + "loss": 0.6048247814178467, + "step": 1427 + }, + { + "epoch": 0.5032599118942731, + "grad_norm": 1.2664597501734751, + "learning_rate": 1.786325869597955e-05, + "loss": 0.7196261882781982, + "step": 1428 + }, + { + "epoch": 0.5036123348017622, + "grad_norm": 1.8296774166979555, + "learning_rate": 1.785965774442278e-05, + "loss": 0.6845135688781738, + "step": 1429 + }, + { + "epoch": 0.5039647577092511, + "grad_norm": 1.4157663102240734, + "learning_rate": 1.785605412475681e-05, + "loss": 0.7314398288726807, + "step": 1430 + }, + { + "epoch": 0.5043171806167401, + "grad_norm": 1.4666969447710358, + "learning_rate": 1.7852447838204957e-05, + "loss": 0.7171268463134766, + "step": 1431 + }, + { + "epoch": 0.5046696035242291, + "grad_norm": 1.33657009662446, + "learning_rate": 1.784883888599144e-05, + "loss": 0.8349916338920593, + "step": 1432 + }, + { + "epoch": 0.505022026431718, + "grad_norm": 1.3501942895276628, + "learning_rate": 1.7845227269341387e-05, + "loss": 0.6375530958175659, + "step": 1433 + }, + { + "epoch": 0.505374449339207, + "grad_norm": 1.42707048545369, + "learning_rate": 1.7841612989480824e-05, + "loss": 0.8156824707984924, + "step": 1434 + }, + { + "epoch": 0.505726872246696, + "grad_norm": 1.4408580248696123, + "learning_rate": 1.7837996047636696e-05, + "loss": 0.7186283469200134, + "step": 1435 + }, + { + "epoch": 0.506079295154185, + "grad_norm": 1.3439268630529597, + "learning_rate": 1.7834376445036834e-05, + "loss": 0.6130756139755249, + "step": 1436 + }, + { + "epoch": 0.506431718061674, + "grad_norm": 1.5285659623162418, + "learning_rate": 1.7830754182909985e-05, + "loss": 0.6948508024215698, + "step": 1437 + }, + { + "epoch": 0.506784140969163, + "grad_norm": 1.3759453423428971, + "learning_rate": 1.7827129262485793e-05, + "loss": 0.7049688100814819, + "step": 1438 + }, + { + "epoch": 0.507136563876652, + "grad_norm": 1.457151343686531, + "learning_rate": 1.7823501684994805e-05, + "loss": 0.7491527795791626, + "step": 1439 + }, + { + "epoch": 0.507488986784141, + "grad_norm": 1.6101324796455516, + "learning_rate": 1.781987145166847e-05, + "loss": 0.8718780279159546, + "step": 1440 + }, + { + "epoch": 0.5078414096916299, + "grad_norm": 1.2572878912363772, + "learning_rate": 1.7816238563739144e-05, + "loss": 0.5675592422485352, + "step": 1441 + }, + { + "epoch": 0.508193832599119, + "grad_norm": 1.4044509323540495, + "learning_rate": 1.7812603022440076e-05, + "loss": 0.7472085952758789, + "step": 1442 + }, + { + "epoch": 0.5085462555066079, + "grad_norm": 1.3371129648202849, + "learning_rate": 1.7808964829005416e-05, + "loss": 0.7673810720443726, + "step": 1443 + }, + { + "epoch": 0.5088986784140969, + "grad_norm": 1.5506550607349072, + "learning_rate": 1.7805323984670224e-05, + "loss": 0.8245630264282227, + "step": 1444 + }, + { + "epoch": 0.5092511013215859, + "grad_norm": 1.488734758513416, + "learning_rate": 1.780168049067045e-05, + "loss": 0.8578429222106934, + "step": 1445 + }, + { + "epoch": 0.5096035242290748, + "grad_norm": 1.3892444083620181, + "learning_rate": 1.7798034348242944e-05, + "loss": 0.6631708145141602, + "step": 1446 + }, + { + "epoch": 0.5099559471365639, + "grad_norm": 1.3121030116229568, + "learning_rate": 1.779438555862546e-05, + "loss": 0.8106615543365479, + "step": 1447 + }, + { + "epoch": 0.5103083700440528, + "grad_norm": 1.1486822439059632, + "learning_rate": 1.7790734123056654e-05, + "loss": 0.7033256888389587, + "step": 1448 + }, + { + "epoch": 0.5106607929515419, + "grad_norm": 1.2259259255559172, + "learning_rate": 1.7787080042776065e-05, + "loss": 0.7124278545379639, + "step": 1449 + }, + { + "epoch": 0.5110132158590308, + "grad_norm": 1.4546377837760451, + "learning_rate": 1.7783423319024144e-05, + "loss": 0.7834827899932861, + "step": 1450 + }, + { + "epoch": 0.5113656387665199, + "grad_norm": 1.4580618513432573, + "learning_rate": 1.777976395304224e-05, + "loss": 0.6762892603874207, + "step": 1451 + }, + { + "epoch": 0.5117180616740088, + "grad_norm": 1.4220157860300873, + "learning_rate": 1.7776101946072586e-05, + "loss": 0.7317261695861816, + "step": 1452 + }, + { + "epoch": 0.5120704845814978, + "grad_norm": 1.3265767127223091, + "learning_rate": 1.7772437299358324e-05, + "loss": 0.6278417110443115, + "step": 1453 + }, + { + "epoch": 0.5124229074889868, + "grad_norm": 1.319373459720871, + "learning_rate": 1.7768770014143485e-05, + "loss": 0.6638025045394897, + "step": 1454 + }, + { + "epoch": 0.5127753303964758, + "grad_norm": 1.400198364176684, + "learning_rate": 1.7765100091673e-05, + "loss": 0.786564826965332, + "step": 1455 + }, + { + "epoch": 0.5131277533039648, + "grad_norm": 1.4579007616104753, + "learning_rate": 1.776142753319269e-05, + "loss": 0.7483570575714111, + "step": 1456 + }, + { + "epoch": 0.5134801762114537, + "grad_norm": 1.5755517235246568, + "learning_rate": 1.7757752339949284e-05, + "loss": 0.7036221027374268, + "step": 1457 + }, + { + "epoch": 0.5138325991189427, + "grad_norm": 1.4840022330643747, + "learning_rate": 1.7754074513190384e-05, + "loss": 0.6903718709945679, + "step": 1458 + }, + { + "epoch": 0.5141850220264317, + "grad_norm": 1.2882226376562813, + "learning_rate": 1.77503940541645e-05, + "loss": 0.7728221416473389, + "step": 1459 + }, + { + "epoch": 0.5145374449339207, + "grad_norm": 1.327669814898394, + "learning_rate": 1.774671096412104e-05, + "loss": 0.7127183675765991, + "step": 1460 + }, + { + "epoch": 0.5148898678414097, + "grad_norm": 1.6330052955229915, + "learning_rate": 1.7743025244310293e-05, + "loss": 0.7801295518875122, + "step": 1461 + }, + { + "epoch": 0.5152422907488987, + "grad_norm": 1.1623220195345323, + "learning_rate": 1.773933689598345e-05, + "loss": 0.632892906665802, + "step": 1462 + }, + { + "epoch": 0.5155947136563876, + "grad_norm": 1.2497961025206838, + "learning_rate": 1.7735645920392587e-05, + "loss": 0.7347458600997925, + "step": 1463 + }, + { + "epoch": 0.5159471365638767, + "grad_norm": 1.5115996209276181, + "learning_rate": 1.7731952318790673e-05, + "loss": 0.6705365777015686, + "step": 1464 + }, + { + "epoch": 0.5162995594713656, + "grad_norm": 1.4475904564128834, + "learning_rate": 1.7728256092431577e-05, + "loss": 0.696006715297699, + "step": 1465 + }, + { + "epoch": 0.5166519823788547, + "grad_norm": 1.3978951424570836, + "learning_rate": 1.7724557242570045e-05, + "loss": 0.5922254323959351, + "step": 1466 + }, + { + "epoch": 0.5170044052863436, + "grad_norm": 1.2709448074189098, + "learning_rate": 1.7720855770461733e-05, + "loss": 0.6162985563278198, + "step": 1467 + }, + { + "epoch": 0.5173568281938326, + "grad_norm": 1.432801158502027, + "learning_rate": 1.7717151677363164e-05, + "loss": 0.7319275140762329, + "step": 1468 + }, + { + "epoch": 0.5177092511013216, + "grad_norm": 1.3993642551309304, + "learning_rate": 1.771344496453177e-05, + "loss": 0.7349969148635864, + "step": 1469 + }, + { + "epoch": 0.5180616740088105, + "grad_norm": 1.287285806622758, + "learning_rate": 1.7709735633225863e-05, + "loss": 0.8153162598609924, + "step": 1470 + }, + { + "epoch": 0.5184140969162996, + "grad_norm": 1.3919169087311665, + "learning_rate": 1.7706023684704642e-05, + "loss": 0.6582974195480347, + "step": 1471 + }, + { + "epoch": 0.5187665198237885, + "grad_norm": 1.704010679983685, + "learning_rate": 1.77023091202282e-05, + "loss": 0.696917712688446, + "step": 1472 + }, + { + "epoch": 0.5191189427312776, + "grad_norm": 1.4310702415713368, + "learning_rate": 1.769859194105752e-05, + "loss": 0.6281285285949707, + "step": 1473 + }, + { + "epoch": 0.5194713656387665, + "grad_norm": 1.6327610148964462, + "learning_rate": 1.7694872148454463e-05, + "loss": 0.7687089443206787, + "step": 1474 + }, + { + "epoch": 0.5198237885462555, + "grad_norm": 1.386403275153257, + "learning_rate": 1.7691149743681783e-05, + "loss": 0.6928491592407227, + "step": 1475 + }, + { + "epoch": 0.5201762114537445, + "grad_norm": 1.3972840676283895, + "learning_rate": 1.7687424728003126e-05, + "loss": 0.63843834400177, + "step": 1476 + }, + { + "epoch": 0.5205286343612335, + "grad_norm": 1.7893361899671325, + "learning_rate": 1.7683697102683012e-05, + "loss": 0.8987904787063599, + "step": 1477 + }, + { + "epoch": 0.5208810572687225, + "grad_norm": 1.3513150269139367, + "learning_rate": 1.767996686898686e-05, + "loss": 0.7027539014816284, + "step": 1478 + }, + { + "epoch": 0.5212334801762114, + "grad_norm": 1.5924485741299983, + "learning_rate": 1.7676234028180964e-05, + "loss": 0.8490183353424072, + "step": 1479 + }, + { + "epoch": 0.5215859030837005, + "grad_norm": 1.251712414046886, + "learning_rate": 1.7672498581532508e-05, + "loss": 0.5885729789733887, + "step": 1480 + }, + { + "epoch": 0.5219383259911894, + "grad_norm": 1.289415742432068, + "learning_rate": 1.766876053030956e-05, + "loss": 0.627627968788147, + "step": 1481 + }, + { + "epoch": 0.5222907488986784, + "grad_norm": 1.2948972408498374, + "learning_rate": 1.766501987578108e-05, + "loss": 0.6441413164138794, + "step": 1482 + }, + { + "epoch": 0.5226431718061674, + "grad_norm": 1.3508329997529829, + "learning_rate": 1.7661276619216888e-05, + "loss": 0.6199722290039062, + "step": 1483 + }, + { + "epoch": 0.5229955947136564, + "grad_norm": 1.2931208995237342, + "learning_rate": 1.7657530761887715e-05, + "loss": 0.6364887952804565, + "step": 1484 + }, + { + "epoch": 0.5233480176211454, + "grad_norm": 1.281527242811407, + "learning_rate": 1.7653782305065158e-05, + "loss": 0.7279890775680542, + "step": 1485 + }, + { + "epoch": 0.5237004405286344, + "grad_norm": 1.5228486275670003, + "learning_rate": 1.7650031250021704e-05, + "loss": 0.6552719473838806, + "step": 1486 + }, + { + "epoch": 0.5240528634361233, + "grad_norm": 1.4461703633182712, + "learning_rate": 1.7646277598030717e-05, + "loss": 0.6778907775878906, + "step": 1487 + }, + { + "epoch": 0.5244052863436124, + "grad_norm": 1.3941119820637071, + "learning_rate": 1.7642521350366447e-05, + "loss": 0.6581870317459106, + "step": 1488 + }, + { + "epoch": 0.5247577092511013, + "grad_norm": 1.6198722329881745, + "learning_rate": 1.7638762508304025e-05, + "loss": 0.8529388904571533, + "step": 1489 + }, + { + "epoch": 0.5251101321585903, + "grad_norm": 1.659639253256808, + "learning_rate": 1.7635001073119458e-05, + "loss": 0.6601512432098389, + "step": 1490 + }, + { + "epoch": 0.5254625550660793, + "grad_norm": 1.5073764890239725, + "learning_rate": 1.7631237046089637e-05, + "loss": 0.6347510814666748, + "step": 1491 + }, + { + "epoch": 0.5258149779735682, + "grad_norm": 1.3256786239827452, + "learning_rate": 1.762747042849233e-05, + "loss": 0.700560986995697, + "step": 1492 + }, + { + "epoch": 0.5261674008810573, + "grad_norm": 1.4060287838972836, + "learning_rate": 1.762370122160619e-05, + "loss": 0.5641219019889832, + "step": 1493 + }, + { + "epoch": 0.5265198237885462, + "grad_norm": 1.3124518756577959, + "learning_rate": 1.761992942671074e-05, + "loss": 0.8017370700836182, + "step": 1494 + }, + { + "epoch": 0.5268722466960353, + "grad_norm": 1.334442798992846, + "learning_rate": 1.7616155045086394e-05, + "loss": 0.6345353126525879, + "step": 1495 + }, + { + "epoch": 0.5272246696035242, + "grad_norm": 1.6841165394853315, + "learning_rate": 1.7612378078014432e-05, + "loss": 0.7118426561355591, + "step": 1496 + }, + { + "epoch": 0.5275770925110133, + "grad_norm": 1.6748084277774182, + "learning_rate": 1.7608598526777017e-05, + "loss": 0.6186550855636597, + "step": 1497 + }, + { + "epoch": 0.5279295154185022, + "grad_norm": 1.4676540893387768, + "learning_rate": 1.7604816392657195e-05, + "loss": 0.8351110219955444, + "step": 1498 + }, + { + "epoch": 0.5282819383259912, + "grad_norm": 1.3183866002309903, + "learning_rate": 1.7601031676938877e-05, + "loss": 0.638684868812561, + "step": 1499 + }, + { + "epoch": 0.5286343612334802, + "grad_norm": 1.291067085285626, + "learning_rate": 1.7597244380906856e-05, + "loss": 0.5118356943130493, + "step": 1500 + }, + { + "epoch": 0.5289867841409691, + "grad_norm": 1.2880504132355877, + "learning_rate": 1.7593454505846807e-05, + "loss": 0.637636125087738, + "step": 1501 + }, + { + "epoch": 0.5293392070484582, + "grad_norm": 1.3905967147162603, + "learning_rate": 1.7589662053045264e-05, + "loss": 0.8412563800811768, + "step": 1502 + }, + { + "epoch": 0.5296916299559471, + "grad_norm": 1.6432072453017084, + "learning_rate": 1.758586702378966e-05, + "loss": 0.7940464019775391, + "step": 1503 + }, + { + "epoch": 0.5300440528634361, + "grad_norm": 1.4898667206132308, + "learning_rate": 1.7582069419368276e-05, + "loss": 0.7136783599853516, + "step": 1504 + }, + { + "epoch": 0.5303964757709251, + "grad_norm": 1.5677232979916986, + "learning_rate": 1.757826924107029e-05, + "loss": 0.7212727069854736, + "step": 1505 + }, + { + "epoch": 0.5307488986784141, + "grad_norm": 2.968905268892082, + "learning_rate": 1.757446649018574e-05, + "loss": 0.7026032209396362, + "step": 1506 + }, + { + "epoch": 0.5311013215859031, + "grad_norm": 1.3050484980835664, + "learning_rate": 1.7570661168005544e-05, + "loss": 0.541954755783081, + "step": 1507 + }, + { + "epoch": 0.5314537444933921, + "grad_norm": 1.3422366313712581, + "learning_rate": 1.7566853275821488e-05, + "loss": 0.6927075386047363, + "step": 1508 + }, + { + "epoch": 0.531806167400881, + "grad_norm": 1.4069640098530838, + "learning_rate": 1.7563042814926237e-05, + "loss": 0.6556441783905029, + "step": 1509 + }, + { + "epoch": 0.5321585903083701, + "grad_norm": 1.710258111864569, + "learning_rate": 1.7559229786613317e-05, + "loss": 0.6895149946212769, + "step": 1510 + }, + { + "epoch": 0.532511013215859, + "grad_norm": 1.3737730722509855, + "learning_rate": 1.755541419217714e-05, + "loss": 0.6178139448165894, + "step": 1511 + }, + { + "epoch": 0.532863436123348, + "grad_norm": 1.5162737493672722, + "learning_rate": 1.7551596032912975e-05, + "loss": 0.7645368576049805, + "step": 1512 + }, + { + "epoch": 0.533215859030837, + "grad_norm": 1.3652252290806937, + "learning_rate": 1.7547775310116973e-05, + "loss": 0.8247367143630981, + "step": 1513 + }, + { + "epoch": 0.533568281938326, + "grad_norm": 1.2941657542151124, + "learning_rate": 1.7543952025086147e-05, + "loss": 0.535837709903717, + "step": 1514 + }, + { + "epoch": 0.533920704845815, + "grad_norm": 1.3232982615818571, + "learning_rate": 1.7540126179118387e-05, + "loss": 0.51450514793396, + "step": 1515 + }, + { + "epoch": 0.5342731277533039, + "grad_norm": 1.3863880461090508, + "learning_rate": 1.7536297773512444e-05, + "loss": 0.7962276935577393, + "step": 1516 + }, + { + "epoch": 0.534625550660793, + "grad_norm": 1.4799750544282257, + "learning_rate": 1.753246680956795e-05, + "loss": 0.7586444616317749, + "step": 1517 + }, + { + "epoch": 0.5349779735682819, + "grad_norm": 1.4967875396536634, + "learning_rate": 1.752863328858539e-05, + "loss": 0.7074990272521973, + "step": 1518 + }, + { + "epoch": 0.535330396475771, + "grad_norm": 1.3158842776684478, + "learning_rate": 1.7524797211866126e-05, + "loss": 0.7409921884536743, + "step": 1519 + }, + { + "epoch": 0.5356828193832599, + "grad_norm": 1.3752676962962187, + "learning_rate": 1.7520958580712394e-05, + "loss": 0.7889251708984375, + "step": 1520 + }, + { + "epoch": 0.536035242290749, + "grad_norm": 2.0871001645404776, + "learning_rate": 1.751711739642728e-05, + "loss": 0.8244975209236145, + "step": 1521 + }, + { + "epoch": 0.5363876651982379, + "grad_norm": 1.441127888748836, + "learning_rate": 1.7513273660314753e-05, + "loss": 0.6573888659477234, + "step": 1522 + }, + { + "epoch": 0.5367400881057268, + "grad_norm": 1.3793459185222714, + "learning_rate": 1.7509427373679643e-05, + "loss": 0.71816086769104, + "step": 1523 + }, + { + "epoch": 0.5370925110132159, + "grad_norm": 1.5200593368820163, + "learning_rate": 1.750557853782764e-05, + "loss": 0.7681000232696533, + "step": 1524 + }, + { + "epoch": 0.5374449339207048, + "grad_norm": 1.443442982592023, + "learning_rate": 1.7501727154065304e-05, + "loss": 0.6777461767196655, + "step": 1525 + }, + { + "epoch": 0.5377973568281939, + "grad_norm": 1.5538840121485165, + "learning_rate": 1.7497873223700063e-05, + "loss": 0.714499831199646, + "step": 1526 + }, + { + "epoch": 0.5381497797356828, + "grad_norm": 1.6085901588908644, + "learning_rate": 1.7494016748040206e-05, + "loss": 0.6587036848068237, + "step": 1527 + }, + { + "epoch": 0.5385022026431718, + "grad_norm": 1.5748960305246453, + "learning_rate": 1.7490157728394887e-05, + "loss": 0.7256105542182922, + "step": 1528 + }, + { + "epoch": 0.5388546255506608, + "grad_norm": 1.7818844853131433, + "learning_rate": 1.7486296166074116e-05, + "loss": 0.6551185846328735, + "step": 1529 + }, + { + "epoch": 0.5392070484581498, + "grad_norm": 1.5961201900224617, + "learning_rate": 1.7482432062388782e-05, + "loss": 0.710479736328125, + "step": 1530 + }, + { + "epoch": 0.5395594713656388, + "grad_norm": 1.3063302832130508, + "learning_rate": 1.7478565418650625e-05, + "loss": 0.7882527709007263, + "step": 1531 + }, + { + "epoch": 0.5399118942731278, + "grad_norm": 1.4227949027781848, + "learning_rate": 1.7474696236172247e-05, + "loss": 0.6163671612739563, + "step": 1532 + }, + { + "epoch": 0.5402643171806167, + "grad_norm": 1.3516530648193832, + "learning_rate": 1.7470824516267125e-05, + "loss": 0.6923140287399292, + "step": 1533 + }, + { + "epoch": 0.5406167400881057, + "grad_norm": 1.259724627030556, + "learning_rate": 1.7466950260249573e-05, + "loss": 0.6473938226699829, + "step": 1534 + }, + { + "epoch": 0.5409691629955947, + "grad_norm": 1.7187178399062975, + "learning_rate": 1.7463073469434792e-05, + "loss": 0.631247878074646, + "step": 1535 + }, + { + "epoch": 0.5413215859030837, + "grad_norm": 1.8932273669088504, + "learning_rate": 1.745919414513883e-05, + "loss": 0.8113377690315247, + "step": 1536 + }, + { + "epoch": 0.5416740088105727, + "grad_norm": 1.4958838672098407, + "learning_rate": 1.7455312288678588e-05, + "loss": 0.7950010299682617, + "step": 1537 + }, + { + "epoch": 0.5420264317180616, + "grad_norm": 1.5066443226404773, + "learning_rate": 1.7451427901371843e-05, + "loss": 0.7279125452041626, + "step": 1538 + }, + { + "epoch": 0.5423788546255507, + "grad_norm": 1.304013044677209, + "learning_rate": 1.7447540984537225e-05, + "loss": 0.6746084690093994, + "step": 1539 + }, + { + "epoch": 0.5427312775330396, + "grad_norm": 1.2714028280363416, + "learning_rate": 1.744365153949422e-05, + "loss": 0.5818569660186768, + "step": 1540 + }, + { + "epoch": 0.5430837004405287, + "grad_norm": 1.3192138998270364, + "learning_rate": 1.743975956756317e-05, + "loss": 0.7408111095428467, + "step": 1541 + }, + { + "epoch": 0.5434361233480176, + "grad_norm": 1.4913068245941434, + "learning_rate": 1.7435865070065282e-05, + "loss": 0.6842402815818787, + "step": 1542 + }, + { + "epoch": 0.5437885462555067, + "grad_norm": 1.3316441616353596, + "learning_rate": 1.7431968048322617e-05, + "loss": 0.6179615259170532, + "step": 1543 + }, + { + "epoch": 0.5441409691629956, + "grad_norm": 1.3347896582759051, + "learning_rate": 1.7428068503658085e-05, + "loss": 0.5943140983581543, + "step": 1544 + }, + { + "epoch": 0.5444933920704845, + "grad_norm": 1.797026236227536, + "learning_rate": 1.742416643739547e-05, + "loss": 0.7901419401168823, + "step": 1545 + }, + { + "epoch": 0.5448458149779736, + "grad_norm": 1.4636864955433957, + "learning_rate": 1.74202618508594e-05, + "loss": 0.7630521655082703, + "step": 1546 + }, + { + "epoch": 0.5451982378854625, + "grad_norm": 1.5322711938826543, + "learning_rate": 1.7416354745375355e-05, + "loss": 0.7662566900253296, + "step": 1547 + }, + { + "epoch": 0.5455506607929516, + "grad_norm": 1.52556111103991, + "learning_rate": 1.7412445122269683e-05, + "loss": 0.5758601427078247, + "step": 1548 + }, + { + "epoch": 0.5459030837004405, + "grad_norm": 1.3681670353760105, + "learning_rate": 1.7408532982869573e-05, + "loss": 0.753425121307373, + "step": 1549 + }, + { + "epoch": 0.5462555066079295, + "grad_norm": 1.7147625296386437, + "learning_rate": 1.7404618328503082e-05, + "loss": 0.6954981088638306, + "step": 1550 + }, + { + "epoch": 0.5466079295154185, + "grad_norm": 1.7209819459128521, + "learning_rate": 1.7400701160499107e-05, + "loss": 0.7608321905136108, + "step": 1551 + }, + { + "epoch": 0.5469603524229075, + "grad_norm": 1.3289181862839086, + "learning_rate": 1.7396781480187403e-05, + "loss": 0.679731011390686, + "step": 1552 + }, + { + "epoch": 0.5473127753303965, + "grad_norm": 1.547015119464835, + "learning_rate": 1.7392859288898586e-05, + "loss": 0.7101309299468994, + "step": 1553 + }, + { + "epoch": 0.5476651982378855, + "grad_norm": 1.3483315531721025, + "learning_rate": 1.7388934587964114e-05, + "loss": 0.7975757122039795, + "step": 1554 + }, + { + "epoch": 0.5480176211453744, + "grad_norm": 1.612241763199232, + "learning_rate": 1.73850073787163e-05, + "loss": 0.9132372140884399, + "step": 1555 + }, + { + "epoch": 0.5483700440528634, + "grad_norm": 1.2910456692590726, + "learning_rate": 1.7381077662488313e-05, + "loss": 0.7375202178955078, + "step": 1556 + }, + { + "epoch": 0.5487224669603524, + "grad_norm": 1.387618503105513, + "learning_rate": 1.7377145440614165e-05, + "loss": 0.7066434025764465, + "step": 1557 + }, + { + "epoch": 0.5490748898678414, + "grad_norm": 1.3715154650071018, + "learning_rate": 1.737321071442873e-05, + "loss": 0.8217945098876953, + "step": 1558 + }, + { + "epoch": 0.5494273127753304, + "grad_norm": 1.629646959297134, + "learning_rate": 1.7369273485267716e-05, + "loss": 0.6946529150009155, + "step": 1559 + }, + { + "epoch": 0.5497797356828193, + "grad_norm": 1.525535566210846, + "learning_rate": 1.7365333754467694e-05, + "loss": 0.7671442627906799, + "step": 1560 + }, + { + "epoch": 0.5501321585903084, + "grad_norm": 1.3988607690634036, + "learning_rate": 1.736139152336608e-05, + "loss": 0.7044692039489746, + "step": 1561 + }, + { + "epoch": 0.5504845814977973, + "grad_norm": 1.368099127753493, + "learning_rate": 1.735744679330114e-05, + "loss": 0.6654937267303467, + "step": 1562 + }, + { + "epoch": 0.5508370044052864, + "grad_norm": 1.2951614076743367, + "learning_rate": 1.7353499565611986e-05, + "loss": 0.6683400869369507, + "step": 1563 + }, + { + "epoch": 0.5511894273127753, + "grad_norm": 1.236687311626723, + "learning_rate": 1.734954984163858e-05, + "loss": 0.6360758543014526, + "step": 1564 + }, + { + "epoch": 0.5515418502202644, + "grad_norm": 1.3363599776517268, + "learning_rate": 1.7345597622721727e-05, + "loss": 0.6982365846633911, + "step": 1565 + }, + { + "epoch": 0.5518942731277533, + "grad_norm": 1.559537817461735, + "learning_rate": 1.7341642910203087e-05, + "loss": 0.8011882305145264, + "step": 1566 + }, + { + "epoch": 0.5522466960352423, + "grad_norm": 2.285492930360211, + "learning_rate": 1.7337685705425156e-05, + "loss": 0.8203347325325012, + "step": 1567 + }, + { + "epoch": 0.5525991189427313, + "grad_norm": 1.2535380811715755, + "learning_rate": 1.7333726009731288e-05, + "loss": 0.653145432472229, + "step": 1568 + }, + { + "epoch": 0.5529515418502202, + "grad_norm": 1.4482870728586805, + "learning_rate": 1.7329763824465676e-05, + "loss": 0.6527417302131653, + "step": 1569 + }, + { + "epoch": 0.5533039647577093, + "grad_norm": 1.44377376134513, + "learning_rate": 1.7325799150973353e-05, + "loss": 0.6965492963790894, + "step": 1570 + }, + { + "epoch": 0.5536563876651982, + "grad_norm": 1.64534067222521, + "learning_rate": 1.7321831990600206e-05, + "loss": 0.6779811382293701, + "step": 1571 + }, + { + "epoch": 0.5540088105726872, + "grad_norm": 1.4062562776851213, + "learning_rate": 1.731786234469296e-05, + "loss": 0.733130931854248, + "step": 1572 + }, + { + "epoch": 0.5543612334801762, + "grad_norm": 1.3711228848627866, + "learning_rate": 1.7313890214599195e-05, + "loss": 0.6482118964195251, + "step": 1573 + }, + { + "epoch": 0.5547136563876652, + "grad_norm": 1.300564860417972, + "learning_rate": 1.7309915601667312e-05, + "loss": 0.5167185068130493, + "step": 1574 + }, + { + "epoch": 0.5550660792951542, + "grad_norm": 1.5636119075248611, + "learning_rate": 1.730593850724658e-05, + "loss": 0.7172712087631226, + "step": 1575 + }, + { + "epoch": 0.5554185022026432, + "grad_norm": 1.3031139662778384, + "learning_rate": 1.730195893268709e-05, + "loss": 0.6786075830459595, + "step": 1576 + }, + { + "epoch": 0.5557709251101322, + "grad_norm": 1.272621756820605, + "learning_rate": 1.7297976879339787e-05, + "loss": 0.6823022365570068, + "step": 1577 + }, + { + "epoch": 0.5561233480176212, + "grad_norm": 1.5517603954080275, + "learning_rate": 1.7293992348556462e-05, + "loss": 0.6882521510124207, + "step": 1578 + }, + { + "epoch": 0.5564757709251101, + "grad_norm": 1.3633741314626187, + "learning_rate": 1.7290005341689726e-05, + "loss": 0.6028990745544434, + "step": 1579 + }, + { + "epoch": 0.5568281938325991, + "grad_norm": 1.6493192591020644, + "learning_rate": 1.728601586009305e-05, + "loss": 0.7759981155395508, + "step": 1580 + }, + { + "epoch": 0.5571806167400881, + "grad_norm": 1.4356044858306343, + "learning_rate": 1.7282023905120743e-05, + "loss": 0.7067322134971619, + "step": 1581 + }, + { + "epoch": 0.5575330396475771, + "grad_norm": 1.6158791701222606, + "learning_rate": 1.727802947812794e-05, + "loss": 0.7972309589385986, + "step": 1582 + }, + { + "epoch": 0.5578854625550661, + "grad_norm": 1.7662811513100274, + "learning_rate": 1.7274032580470634e-05, + "loss": 0.780463457107544, + "step": 1583 + }, + { + "epoch": 0.558237885462555, + "grad_norm": 1.4053617141185595, + "learning_rate": 1.7270033213505638e-05, + "loss": 0.647217869758606, + "step": 1584 + }, + { + "epoch": 0.5585903083700441, + "grad_norm": 1.3125952525291176, + "learning_rate": 1.7266031378590624e-05, + "loss": 0.6253752112388611, + "step": 1585 + }, + { + "epoch": 0.558942731277533, + "grad_norm": 11.7060219187992, + "learning_rate": 1.7262027077084083e-05, + "loss": 0.8427211046218872, + "step": 1586 + }, + { + "epoch": 0.5592951541850221, + "grad_norm": 1.344046568539196, + "learning_rate": 1.7258020310345348e-05, + "loss": 0.6763455867767334, + "step": 1587 + }, + { + "epoch": 0.559647577092511, + "grad_norm": 1.593422172771999, + "learning_rate": 1.72540110797346e-05, + "loss": 0.7333850264549255, + "step": 1588 + }, + { + "epoch": 0.56, + "grad_norm": 1.6040079500892586, + "learning_rate": 1.7249999386612844e-05, + "loss": 0.8572328090667725, + "step": 1589 + }, + { + "epoch": 0.560352422907489, + "grad_norm": 1.5035390542036942, + "learning_rate": 1.7245985232341923e-05, + "loss": 0.7960183620452881, + "step": 1590 + }, + { + "epoch": 0.5607048458149779, + "grad_norm": 1.484772075429922, + "learning_rate": 1.7241968618284518e-05, + "loss": 0.6750795841217041, + "step": 1591 + }, + { + "epoch": 0.561057268722467, + "grad_norm": 1.9239116239416003, + "learning_rate": 1.7237949545804145e-05, + "loss": 0.7828525304794312, + "step": 1592 + }, + { + "epoch": 0.5614096916299559, + "grad_norm": 1.4415449299886975, + "learning_rate": 1.7233928016265158e-05, + "loss": 0.7414604425430298, + "step": 1593 + }, + { + "epoch": 0.561762114537445, + "grad_norm": 1.4483242479736562, + "learning_rate": 1.7229904031032736e-05, + "loss": 0.6853663921356201, + "step": 1594 + }, + { + "epoch": 0.5621145374449339, + "grad_norm": 1.9067676423331832, + "learning_rate": 1.72258775914729e-05, + "loss": 0.7923493385314941, + "step": 1595 + }, + { + "epoch": 0.5624669603524229, + "grad_norm": 1.6239202976244251, + "learning_rate": 1.7221848698952496e-05, + "loss": 0.6776527166366577, + "step": 1596 + }, + { + "epoch": 0.5628193832599119, + "grad_norm": 1.4721879083766742, + "learning_rate": 1.721781735483921e-05, + "loss": 0.6036615371704102, + "step": 1597 + }, + { + "epoch": 0.5631718061674009, + "grad_norm": 1.271294238053108, + "learning_rate": 1.7213783560501564e-05, + "loss": 0.7175784111022949, + "step": 1598 + }, + { + "epoch": 0.5635242290748899, + "grad_norm": 1.609537856897954, + "learning_rate": 1.7209747317308897e-05, + "loss": 0.790808379650116, + "step": 1599 + }, + { + "epoch": 0.5638766519823789, + "grad_norm": 1.211639696248482, + "learning_rate": 1.7205708626631392e-05, + "loss": 0.6230301856994629, + "step": 1600 + }, + { + "epoch": 0.5642290748898678, + "grad_norm": 1.120326299832536, + "learning_rate": 1.720166748984006e-05, + "loss": 0.712124228477478, + "step": 1601 + }, + { + "epoch": 0.5645814977973568, + "grad_norm": 1.1185092917911836, + "learning_rate": 1.719762390830674e-05, + "loss": 0.543883740901947, + "step": 1602 + }, + { + "epoch": 0.5649339207048458, + "grad_norm": 1.3866183721479424, + "learning_rate": 1.71935778834041e-05, + "loss": 0.7619644999504089, + "step": 1603 + }, + { + "epoch": 0.5652863436123348, + "grad_norm": 1.3869247346305908, + "learning_rate": 1.718952941650564e-05, + "loss": 0.6447019577026367, + "step": 1604 + }, + { + "epoch": 0.5656387665198238, + "grad_norm": 1.4175373147115695, + "learning_rate": 1.718547850898569e-05, + "loss": 0.7254266738891602, + "step": 1605 + }, + { + "epoch": 0.5659911894273127, + "grad_norm": 1.3621762521360266, + "learning_rate": 1.7181425162219406e-05, + "loss": 0.632878839969635, + "step": 1606 + }, + { + "epoch": 0.5663436123348018, + "grad_norm": 1.3921274088807207, + "learning_rate": 1.7177369377582776e-05, + "loss": 0.7711806893348694, + "step": 1607 + }, + { + "epoch": 0.5666960352422907, + "grad_norm": 1.1613347832568823, + "learning_rate": 1.7173311156452607e-05, + "loss": 0.6639282703399658, + "step": 1608 + }, + { + "epoch": 0.5670484581497798, + "grad_norm": 1.4423463303361395, + "learning_rate": 1.7169250500206544e-05, + "loss": 0.6918407082557678, + "step": 1609 + }, + { + "epoch": 0.5674008810572687, + "grad_norm": 2.283192950596924, + "learning_rate": 1.716518741022305e-05, + "loss": 0.6602861881256104, + "step": 1610 + }, + { + "epoch": 0.5677533039647578, + "grad_norm": 1.401616641880741, + "learning_rate": 1.7161121887881424e-05, + "loss": 0.5853942632675171, + "step": 1611 + }, + { + "epoch": 0.5681057268722467, + "grad_norm": 1.4206445071697613, + "learning_rate": 1.7157053934561775e-05, + "loss": 0.6793895959854126, + "step": 1612 + }, + { + "epoch": 0.5684581497797356, + "grad_norm": 1.43055320760408, + "learning_rate": 1.7152983551645054e-05, + "loss": 0.7882634401321411, + "step": 1613 + }, + { + "epoch": 0.5688105726872247, + "grad_norm": 1.4602086959676452, + "learning_rate": 1.7148910740513023e-05, + "loss": 0.6530553698539734, + "step": 1614 + }, + { + "epoch": 0.5691629955947136, + "grad_norm": 1.2905537135464573, + "learning_rate": 1.714483550254828e-05, + "loss": 0.6405597925186157, + "step": 1615 + }, + { + "epoch": 0.5695154185022027, + "grad_norm": 1.4236330365126968, + "learning_rate": 1.714075783913424e-05, + "loss": 0.7356796860694885, + "step": 1616 + }, + { + "epoch": 0.5698678414096916, + "grad_norm": 1.3877607090316109, + "learning_rate": 1.7136677751655142e-05, + "loss": 0.7393465042114258, + "step": 1617 + }, + { + "epoch": 0.5702202643171806, + "grad_norm": 1.6092126006316967, + "learning_rate": 1.7132595241496045e-05, + "loss": 0.7205296158790588, + "step": 1618 + }, + { + "epoch": 0.5705726872246696, + "grad_norm": 1.291376266983401, + "learning_rate": 1.7128510310042842e-05, + "loss": 0.7359808683395386, + "step": 1619 + }, + { + "epoch": 0.5709251101321586, + "grad_norm": 1.3759135749970453, + "learning_rate": 1.712442295868224e-05, + "loss": 0.7097065448760986, + "step": 1620 + }, + { + "epoch": 0.5712775330396476, + "grad_norm": 1.3905917375530226, + "learning_rate": 1.7120333188801756e-05, + "loss": 0.66839599609375, + "step": 1621 + }, + { + "epoch": 0.5716299559471366, + "grad_norm": 1.7035593754714837, + "learning_rate": 1.7116241001789753e-05, + "loss": 0.8373857736587524, + "step": 1622 + }, + { + "epoch": 0.5719823788546256, + "grad_norm": 1.4514044348034505, + "learning_rate": 1.7112146399035393e-05, + "loss": 0.6405144333839417, + "step": 1623 + }, + { + "epoch": 0.5723348017621145, + "grad_norm": 1.3537498495813336, + "learning_rate": 1.710804938192867e-05, + "loss": 0.622218906879425, + "step": 1624 + }, + { + "epoch": 0.5726872246696035, + "grad_norm": 1.3235233015291856, + "learning_rate": 1.710394995186039e-05, + "loss": 0.6728596687316895, + "step": 1625 + }, + { + "epoch": 0.5730396475770925, + "grad_norm": 1.457353775792826, + "learning_rate": 1.7099848110222188e-05, + "loss": 0.7749369144439697, + "step": 1626 + }, + { + "epoch": 0.5733920704845815, + "grad_norm": 1.5414707611626788, + "learning_rate": 1.7095743858406506e-05, + "loss": 0.7230759859085083, + "step": 1627 + }, + { + "epoch": 0.5737444933920705, + "grad_norm": 1.540981219180448, + "learning_rate": 1.7091637197806614e-05, + "loss": 0.8243547677993774, + "step": 1628 + }, + { + "epoch": 0.5740969162995595, + "grad_norm": 1.38043003521811, + "learning_rate": 1.708752812981659e-05, + "loss": 0.5860315561294556, + "step": 1629 + }, + { + "epoch": 0.5744493392070484, + "grad_norm": 1.6273061636094053, + "learning_rate": 1.708341665583134e-05, + "loss": 0.6623368859291077, + "step": 1630 + }, + { + "epoch": 0.5748017621145375, + "grad_norm": 1.8119651381751527, + "learning_rate": 1.7079302777246577e-05, + "loss": 0.6467370986938477, + "step": 1631 + }, + { + "epoch": 0.5751541850220264, + "grad_norm": 1.5119118761679917, + "learning_rate": 1.707518649545884e-05, + "loss": 0.6443271636962891, + "step": 1632 + }, + { + "epoch": 0.5755066079295155, + "grad_norm": 1.3128080413830525, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.6995208263397217, + "step": 1633 + }, + { + "epoch": 0.5758590308370044, + "grad_norm": 1.4660315838841709, + "learning_rate": 1.706694672786465e-05, + "loss": 0.698627233505249, + "step": 1634 + }, + { + "epoch": 0.5762114537444933, + "grad_norm": 1.3788458614759633, + "learning_rate": 1.706282324485534e-05, + "loss": 0.713565468788147, + "step": 1635 + }, + { + "epoch": 0.5765638766519824, + "grad_norm": 1.4050651409728825, + "learning_rate": 1.7058697364237342e-05, + "loss": 0.7978894710540771, + "step": 1636 + }, + { + "epoch": 0.5769162995594713, + "grad_norm": 1.374012134646938, + "learning_rate": 1.7054569087411262e-05, + "loss": 0.7361177206039429, + "step": 1637 + }, + { + "epoch": 0.5772687224669604, + "grad_norm": 1.3640656150089683, + "learning_rate": 1.705043841577853e-05, + "loss": 0.5904364585876465, + "step": 1638 + }, + { + "epoch": 0.5776211453744493, + "grad_norm": 1.4706525609098695, + "learning_rate": 1.7046305350741365e-05, + "loss": 0.7122133374214172, + "step": 1639 + }, + { + "epoch": 0.5779735682819384, + "grad_norm": 1.5208627357939872, + "learning_rate": 1.7042169893702826e-05, + "loss": 0.6350806951522827, + "step": 1640 + }, + { + "epoch": 0.5783259911894273, + "grad_norm": 1.4511692718944456, + "learning_rate": 1.7038032046066767e-05, + "loss": 0.6332669258117676, + "step": 1641 + }, + { + "epoch": 0.5786784140969163, + "grad_norm": 1.415207402865657, + "learning_rate": 1.7033891809237865e-05, + "loss": 0.6645903587341309, + "step": 1642 + }, + { + "epoch": 0.5790308370044053, + "grad_norm": 1.6697269215763402, + "learning_rate": 1.7029749184621593e-05, + "loss": 0.8156411051750183, + "step": 1643 + }, + { + "epoch": 0.5793832599118943, + "grad_norm": 1.3789808786486863, + "learning_rate": 1.7025604173624247e-05, + "loss": 0.6778720617294312, + "step": 1644 + }, + { + "epoch": 0.5797356828193833, + "grad_norm": 1.5882994058774447, + "learning_rate": 1.702145677765293e-05, + "loss": 0.6774875521659851, + "step": 1645 + }, + { + "epoch": 0.5800881057268722, + "grad_norm": 1.7790432286964633, + "learning_rate": 1.701730699811555e-05, + "loss": 0.9239652156829834, + "step": 1646 + }, + { + "epoch": 0.5804405286343612, + "grad_norm": 1.3647594896468807, + "learning_rate": 1.701315483642083e-05, + "loss": 0.6841437816619873, + "step": 1647 + }, + { + "epoch": 0.5807929515418502, + "grad_norm": 1.7199469103031315, + "learning_rate": 1.7009000293978308e-05, + "loss": 0.7540775537490845, + "step": 1648 + }, + { + "epoch": 0.5811453744493392, + "grad_norm": 1.0742597088843755, + "learning_rate": 1.7004843372198306e-05, + "loss": 0.5534735321998596, + "step": 1649 + }, + { + "epoch": 0.5814977973568282, + "grad_norm": 1.326312979627632, + "learning_rate": 1.7000684072491984e-05, + "loss": 0.5398745536804199, + "step": 1650 + }, + { + "epoch": 0.5818502202643172, + "grad_norm": 1.583833147288038, + "learning_rate": 1.6996522396271285e-05, + "loss": 0.7249305248260498, + "step": 1651 + }, + { + "epoch": 0.5822026431718061, + "grad_norm": 2.3893378173132973, + "learning_rate": 1.6992358344948976e-05, + "loss": 0.819263219833374, + "step": 1652 + }, + { + "epoch": 0.5825550660792952, + "grad_norm": 1.4489156713328724, + "learning_rate": 1.6988191919938618e-05, + "loss": 0.7421448826789856, + "step": 1653 + }, + { + "epoch": 0.5829074889867841, + "grad_norm": 1.832209725536692, + "learning_rate": 1.6984023122654584e-05, + "loss": 0.7665672302246094, + "step": 1654 + }, + { + "epoch": 0.5832599118942732, + "grad_norm": 1.390589552129084, + "learning_rate": 1.697985195451205e-05, + "loss": 0.7226558327674866, + "step": 1655 + }, + { + "epoch": 0.5836123348017621, + "grad_norm": 1.5091001050977364, + "learning_rate": 1.6975678416926995e-05, + "loss": 0.6702080965042114, + "step": 1656 + }, + { + "epoch": 0.583964757709251, + "grad_norm": 1.460442381139403, + "learning_rate": 1.697150251131621e-05, + "loss": 0.5843878984451294, + "step": 1657 + }, + { + "epoch": 0.5843171806167401, + "grad_norm": 1.37517469234843, + "learning_rate": 1.6967324239097287e-05, + "loss": 0.707448422908783, + "step": 1658 + }, + { + "epoch": 0.584669603524229, + "grad_norm": 1.8436282149841139, + "learning_rate": 1.6963143601688615e-05, + "loss": 0.7619093060493469, + "step": 1659 + }, + { + "epoch": 0.5850220264317181, + "grad_norm": 1.5399166464925174, + "learning_rate": 1.695896060050939e-05, + "loss": 0.6550310850143433, + "step": 1660 + }, + { + "epoch": 0.585374449339207, + "grad_norm": 1.6689625417691945, + "learning_rate": 1.6954775236979616e-05, + "loss": 0.7202504277229309, + "step": 1661 + }, + { + "epoch": 0.5857268722466961, + "grad_norm": 1.4936106294591966, + "learning_rate": 1.6950587512520085e-05, + "loss": 0.7941907644271851, + "step": 1662 + }, + { + "epoch": 0.586079295154185, + "grad_norm": 1.3939181305394832, + "learning_rate": 1.6946397428552406e-05, + "loss": 0.6349755525588989, + "step": 1663 + }, + { + "epoch": 0.586431718061674, + "grad_norm": 1.4663377684980818, + "learning_rate": 1.6942204986498978e-05, + "loss": 0.6220123171806335, + "step": 1664 + }, + { + "epoch": 0.586784140969163, + "grad_norm": 1.3729457618271874, + "learning_rate": 1.693801018778301e-05, + "loss": 0.6617282629013062, + "step": 1665 + }, + { + "epoch": 0.587136563876652, + "grad_norm": 1.6745607368825612, + "learning_rate": 1.6933813033828496e-05, + "loss": 0.7424415349960327, + "step": 1666 + }, + { + "epoch": 0.587488986784141, + "grad_norm": 1.4332695932293307, + "learning_rate": 1.6929613526060254e-05, + "loss": 0.7245291471481323, + "step": 1667 + }, + { + "epoch": 0.5878414096916299, + "grad_norm": 1.7631957554533126, + "learning_rate": 1.692541166590387e-05, + "loss": 0.7037352323532104, + "step": 1668 + }, + { + "epoch": 0.588193832599119, + "grad_norm": 1.563153866597813, + "learning_rate": 1.6921207454785754e-05, + "loss": 0.7452583312988281, + "step": 1669 + }, + { + "epoch": 0.5885462555066079, + "grad_norm": 1.8223456889525438, + "learning_rate": 1.6917000894133106e-05, + "loss": 0.7773720026016235, + "step": 1670 + }, + { + "epoch": 0.5888986784140969, + "grad_norm": 1.6663522681826546, + "learning_rate": 1.6912791985373916e-05, + "loss": 0.5820617079734802, + "step": 1671 + }, + { + "epoch": 0.5892511013215859, + "grad_norm": 1.4638050818442514, + "learning_rate": 1.6908580729936983e-05, + "loss": 0.7513154745101929, + "step": 1672 + }, + { + "epoch": 0.5896035242290749, + "grad_norm": 1.4916906386520274, + "learning_rate": 1.6904367129251898e-05, + "loss": 0.6741763949394226, + "step": 1673 + }, + { + "epoch": 0.5899559471365639, + "grad_norm": 1.4430048165358413, + "learning_rate": 1.690015118474904e-05, + "loss": 0.7290149331092834, + "step": 1674 + }, + { + "epoch": 0.5903083700440529, + "grad_norm": 1.431209358109114, + "learning_rate": 1.6895932897859596e-05, + "loss": 0.651113748550415, + "step": 1675 + }, + { + "epoch": 0.5906607929515418, + "grad_norm": 1.5475090754915908, + "learning_rate": 1.6891712270015546e-05, + "loss": 0.8062121272087097, + "step": 1676 + }, + { + "epoch": 0.5910132158590309, + "grad_norm": 1.6532405105419041, + "learning_rate": 1.6887489302649657e-05, + "loss": 0.7168683409690857, + "step": 1677 + }, + { + "epoch": 0.5913656387665198, + "grad_norm": 1.5137609810465338, + "learning_rate": 1.6883263997195497e-05, + "loss": 0.6751970052719116, + "step": 1678 + }, + { + "epoch": 0.5917180616740088, + "grad_norm": 1.3244566227591112, + "learning_rate": 1.687903635508742e-05, + "loss": 0.5176222324371338, + "step": 1679 + }, + { + "epoch": 0.5920704845814978, + "grad_norm": 1.532290203616517, + "learning_rate": 1.6874806377760587e-05, + "loss": 0.605686366558075, + "step": 1680 + }, + { + "epoch": 0.5924229074889867, + "grad_norm": 1.55000273332987, + "learning_rate": 1.6870574066650945e-05, + "loss": 0.6927961111068726, + "step": 1681 + }, + { + "epoch": 0.5927753303964758, + "grad_norm": 1.7129667821490024, + "learning_rate": 1.6866339423195223e-05, + "loss": 0.7434122562408447, + "step": 1682 + }, + { + "epoch": 0.5931277533039647, + "grad_norm": 1.6508583062240207, + "learning_rate": 1.6862102448830956e-05, + "loss": 0.5646539926528931, + "step": 1683 + }, + { + "epoch": 0.5934801762114538, + "grad_norm": 1.6845514517525704, + "learning_rate": 1.6857863144996464e-05, + "loss": 0.6666921377182007, + "step": 1684 + }, + { + "epoch": 0.5938325991189427, + "grad_norm": 1.7487162446625693, + "learning_rate": 1.6853621513130857e-05, + "loss": 0.6630325317382812, + "step": 1685 + }, + { + "epoch": 0.5941850220264318, + "grad_norm": 1.6744610818707069, + "learning_rate": 1.6849377554674042e-05, + "loss": 0.6519981622695923, + "step": 1686 + }, + { + "epoch": 0.5945374449339207, + "grad_norm": 1.523793082989738, + "learning_rate": 1.6845131271066705e-05, + "loss": 0.7958102822303772, + "step": 1687 + }, + { + "epoch": 0.5948898678414097, + "grad_norm": 1.6258620864429363, + "learning_rate": 1.6840882663750333e-05, + "loss": 0.6136632561683655, + "step": 1688 + }, + { + "epoch": 0.5952422907488987, + "grad_norm": 1.5696515726783535, + "learning_rate": 1.683663173416719e-05, + "loss": 0.6177657842636108, + "step": 1689 + }, + { + "epoch": 0.5955947136563877, + "grad_norm": 1.3990187120155009, + "learning_rate": 1.683237848376034e-05, + "loss": 0.7489751577377319, + "step": 1690 + }, + { + "epoch": 0.5959471365638767, + "grad_norm": 1.7037734397554838, + "learning_rate": 1.6828122913973625e-05, + "loss": 0.6749632954597473, + "step": 1691 + }, + { + "epoch": 0.5962995594713656, + "grad_norm": 1.7564038851615957, + "learning_rate": 1.682386502625168e-05, + "loss": 0.6340545415878296, + "step": 1692 + }, + { + "epoch": 0.5966519823788546, + "grad_norm": 1.2684465272191359, + "learning_rate": 1.6819604822039924e-05, + "loss": 0.6141117811203003, + "step": 1693 + }, + { + "epoch": 0.5970044052863436, + "grad_norm": 1.6665435860950566, + "learning_rate": 1.681534230278457e-05, + "loss": 0.7937319874763489, + "step": 1694 + }, + { + "epoch": 0.5973568281938326, + "grad_norm": 1.376760638279742, + "learning_rate": 1.68110774699326e-05, + "loss": 0.6196104288101196, + "step": 1695 + }, + { + "epoch": 0.5977092511013216, + "grad_norm": 1.755256295612453, + "learning_rate": 1.68068103249318e-05, + "loss": 0.6856463551521301, + "step": 1696 + }, + { + "epoch": 0.5980616740088106, + "grad_norm": 1.423055172614558, + "learning_rate": 1.680254086923073e-05, + "loss": 0.754359245300293, + "step": 1697 + }, + { + "epoch": 0.5984140969162995, + "grad_norm": 1.5540819723583295, + "learning_rate": 1.6798269104278738e-05, + "loss": 0.6663862466812134, + "step": 1698 + }, + { + "epoch": 0.5987665198237886, + "grad_norm": 1.8192134096199304, + "learning_rate": 1.6793995031525955e-05, + "loss": 0.7072615027427673, + "step": 1699 + }, + { + "epoch": 0.5991189427312775, + "grad_norm": 1.3664015344189913, + "learning_rate": 1.678971865242329e-05, + "loss": 0.5722007751464844, + "step": 1700 + }, + { + "epoch": 0.5994713656387666, + "grad_norm": 1.5146739460913152, + "learning_rate": 1.6785439968422456e-05, + "loss": 0.8254455327987671, + "step": 1701 + }, + { + "epoch": 0.5998237885462555, + "grad_norm": 1.6221807995806083, + "learning_rate": 1.678115898097592e-05, + "loss": 0.5726041793823242, + "step": 1702 + }, + { + "epoch": 0.6001762114537444, + "grad_norm": 1.7173506198717712, + "learning_rate": 1.6776875691536946e-05, + "loss": 0.6480926275253296, + "step": 1703 + }, + { + "epoch": 0.6005286343612335, + "grad_norm": 1.801703791100917, + "learning_rate": 1.677259010155958e-05, + "loss": 0.6469742059707642, + "step": 1704 + }, + { + "epoch": 0.6008810572687224, + "grad_norm": 1.6534691770392222, + "learning_rate": 1.6768302212498647e-05, + "loss": 0.814565896987915, + "step": 1705 + }, + { + "epoch": 0.6012334801762115, + "grad_norm": 1.6212192399903926, + "learning_rate": 1.6764012025809745e-05, + "loss": 0.7063060402870178, + "step": 1706 + }, + { + "epoch": 0.6015859030837004, + "grad_norm": 1.2993416112883407, + "learning_rate": 1.6759719542949268e-05, + "loss": 0.6523685455322266, + "step": 1707 + }, + { + "epoch": 0.6019383259911895, + "grad_norm": 1.7291371377992661, + "learning_rate": 1.6755424765374378e-05, + "loss": 0.7361165285110474, + "step": 1708 + }, + { + "epoch": 0.6022907488986784, + "grad_norm": 1.1307969866596985, + "learning_rate": 1.6751127694543012e-05, + "loss": 0.45241934061050415, + "step": 1709 + }, + { + "epoch": 0.6026431718061674, + "grad_norm": 1.3734078208692269, + "learning_rate": 1.6746828331913903e-05, + "loss": 0.6610431671142578, + "step": 1710 + }, + { + "epoch": 0.6029955947136564, + "grad_norm": 1.6659887779271019, + "learning_rate": 1.674252667894654e-05, + "loss": 0.7572601437568665, + "step": 1711 + }, + { + "epoch": 0.6033480176211454, + "grad_norm": 1.3828996049540105, + "learning_rate": 1.6738222737101205e-05, + "loss": 0.7021572589874268, + "step": 1712 + }, + { + "epoch": 0.6037004405286344, + "grad_norm": 1.5581462402658262, + "learning_rate": 1.6733916507838952e-05, + "loss": 0.7742347121238708, + "step": 1713 + }, + { + "epoch": 0.6040528634361233, + "grad_norm": 1.5666267075277038, + "learning_rate": 1.6729607992621613e-05, + "loss": 0.6453407406806946, + "step": 1714 + }, + { + "epoch": 0.6044052863436123, + "grad_norm": 1.279025328652212, + "learning_rate": 1.6725297192911793e-05, + "loss": 0.7004555463790894, + "step": 1715 + }, + { + "epoch": 0.6047577092511013, + "grad_norm": 1.3482721305547676, + "learning_rate": 1.6720984110172875e-05, + "loss": 0.6979051232337952, + "step": 1716 + }, + { + "epoch": 0.6051101321585903, + "grad_norm": 1.5059245296578512, + "learning_rate": 1.671666874586902e-05, + "loss": 0.6387851238250732, + "step": 1717 + }, + { + "epoch": 0.6054625550660793, + "grad_norm": 1.5397561778856637, + "learning_rate": 1.671235110146515e-05, + "loss": 0.9083811044692993, + "step": 1718 + }, + { + "epoch": 0.6058149779735683, + "grad_norm": 1.637790853716126, + "learning_rate": 1.6708031178426984e-05, + "loss": 0.747002363204956, + "step": 1719 + }, + { + "epoch": 0.6061674008810573, + "grad_norm": 1.6617583077406621, + "learning_rate": 1.6703708978220986e-05, + "loss": 0.7553372383117676, + "step": 1720 + }, + { + "epoch": 0.6065198237885463, + "grad_norm": 1.72002611544435, + "learning_rate": 1.669938450231442e-05, + "loss": 0.762795090675354, + "step": 1721 + }, + { + "epoch": 0.6068722466960352, + "grad_norm": 1.3894206198813077, + "learning_rate": 1.669505775217531e-05, + "loss": 0.739936113357544, + "step": 1722 + }, + { + "epoch": 0.6072246696035243, + "grad_norm": 1.625344781935558, + "learning_rate": 1.6690728729272456e-05, + "loss": 0.8439112305641174, + "step": 1723 + }, + { + "epoch": 0.6075770925110132, + "grad_norm": 1.5345011506472854, + "learning_rate": 1.6686397435075416e-05, + "loss": 0.6144756078720093, + "step": 1724 + }, + { + "epoch": 0.6079295154185022, + "grad_norm": 1.3674442510472364, + "learning_rate": 1.6682063871054534e-05, + "loss": 0.569161057472229, + "step": 1725 + }, + { + "epoch": 0.6082819383259912, + "grad_norm": 1.6372827589624075, + "learning_rate": 1.6677728038680926e-05, + "loss": 0.7523979544639587, + "step": 1726 + }, + { + "epoch": 0.6086343612334801, + "grad_norm": 1.453986649514636, + "learning_rate": 1.6673389939426463e-05, + "loss": 0.6394520401954651, + "step": 1727 + }, + { + "epoch": 0.6089867841409692, + "grad_norm": 1.358198647287584, + "learning_rate": 1.66690495747638e-05, + "loss": 0.5975633859634399, + "step": 1728 + }, + { + "epoch": 0.6093392070484581, + "grad_norm": 1.6192297143942058, + "learning_rate": 1.666470694616636e-05, + "loss": 0.736790657043457, + "step": 1729 + }, + { + "epoch": 0.6096916299559472, + "grad_norm": 1.4234241508654442, + "learning_rate": 1.6660362055108316e-05, + "loss": 0.7693831920623779, + "step": 1730 + }, + { + "epoch": 0.6100440528634361, + "grad_norm": 1.8032471376275176, + "learning_rate": 1.665601490306464e-05, + "loss": 0.7322608232498169, + "step": 1731 + }, + { + "epoch": 0.6103964757709252, + "grad_norm": 1.3709677099617412, + "learning_rate": 1.6651665491511043e-05, + "loss": 0.6478679180145264, + "step": 1732 + }, + { + "epoch": 0.6107488986784141, + "grad_norm": 1.8838571148858527, + "learning_rate": 1.6647313821924022e-05, + "loss": 0.7125877141952515, + "step": 1733 + }, + { + "epoch": 0.6111013215859031, + "grad_norm": 1.5594770538222507, + "learning_rate": 1.664295989578083e-05, + "loss": 0.8999321460723877, + "step": 1734 + }, + { + "epoch": 0.6114537444933921, + "grad_norm": 1.618421596120734, + "learning_rate": 1.663860371455949e-05, + "loss": 0.6908334493637085, + "step": 1735 + }, + { + "epoch": 0.611806167400881, + "grad_norm": 1.5552403174407248, + "learning_rate": 1.663424527973879e-05, + "loss": 0.6708767414093018, + "step": 1736 + }, + { + "epoch": 0.61215859030837, + "grad_norm": 1.4907630752773764, + "learning_rate": 1.6629884592798283e-05, + "loss": 0.6991565823554993, + "step": 1737 + }, + { + "epoch": 0.612511013215859, + "grad_norm": 1.430459100414143, + "learning_rate": 1.6625521655218287e-05, + "loss": 0.6224193572998047, + "step": 1738 + }, + { + "epoch": 0.612863436123348, + "grad_norm": 1.6355889531807317, + "learning_rate": 1.662115646847988e-05, + "loss": 0.701459527015686, + "step": 1739 + }, + { + "epoch": 0.613215859030837, + "grad_norm": 1.508424771304017, + "learning_rate": 1.6616789034064914e-05, + "loss": 0.784063458442688, + "step": 1740 + }, + { + "epoch": 0.613568281938326, + "grad_norm": 1.4868333492675876, + "learning_rate": 1.661241935345599e-05, + "loss": 0.7604146003723145, + "step": 1741 + }, + { + "epoch": 0.613920704845815, + "grad_norm": 1.7090188741959023, + "learning_rate": 1.6608047428136482e-05, + "loss": 0.6347941160202026, + "step": 1742 + }, + { + "epoch": 0.614273127753304, + "grad_norm": 1.6487656059998825, + "learning_rate": 1.6603673259590524e-05, + "loss": 0.7559434175491333, + "step": 1743 + }, + { + "epoch": 0.6146255506607929, + "grad_norm": 1.5969979245345363, + "learning_rate": 1.6599296849303007e-05, + "loss": 0.742524266242981, + "step": 1744 + }, + { + "epoch": 0.614977973568282, + "grad_norm": 1.2238633556789393, + "learning_rate": 1.6594918198759586e-05, + "loss": 0.697594165802002, + "step": 1745 + }, + { + "epoch": 0.6153303964757709, + "grad_norm": 1.4536023257551807, + "learning_rate": 1.659053730944668e-05, + "loss": 0.7876765131950378, + "step": 1746 + }, + { + "epoch": 0.6156828193832599, + "grad_norm": 1.489887595585156, + "learning_rate": 1.658615418285146e-05, + "loss": 0.7514386177062988, + "step": 1747 + }, + { + "epoch": 0.6160352422907489, + "grad_norm": 1.6935500501856253, + "learning_rate": 1.658176882046187e-05, + "loss": 0.6220899820327759, + "step": 1748 + }, + { + "epoch": 0.6163876651982378, + "grad_norm": 1.9395284146525182, + "learning_rate": 1.6577381223766592e-05, + "loss": 0.7376539707183838, + "step": 1749 + }, + { + "epoch": 0.6167400881057269, + "grad_norm": 1.6373866531670291, + "learning_rate": 1.6572991394255084e-05, + "loss": 0.8296281099319458, + "step": 1750 + }, + { + "epoch": 0.6170925110132158, + "grad_norm": 1.545978766740828, + "learning_rate": 1.656859933341756e-05, + "loss": 0.7316757440567017, + "step": 1751 + }, + { + "epoch": 0.6174449339207049, + "grad_norm": 1.5280854263636194, + "learning_rate": 1.6564205042744986e-05, + "loss": 0.6933871507644653, + "step": 1752 + }, + { + "epoch": 0.6177973568281938, + "grad_norm": 1.890269396017501, + "learning_rate": 1.655980852372908e-05, + "loss": 0.6835601329803467, + "step": 1753 + }, + { + "epoch": 0.6181497797356829, + "grad_norm": 1.3967466693425752, + "learning_rate": 1.655540977786233e-05, + "loss": 0.6752027869224548, + "step": 1754 + }, + { + "epoch": 0.6185022026431718, + "grad_norm": 1.4944496246124994, + "learning_rate": 1.6551008806637976e-05, + "loss": 0.6092851758003235, + "step": 1755 + }, + { + "epoch": 0.6188546255506608, + "grad_norm": 1.3266652259646856, + "learning_rate": 1.6546605611550008e-05, + "loss": 0.682563066482544, + "step": 1756 + }, + { + "epoch": 0.6192070484581498, + "grad_norm": 1.5302981352911342, + "learning_rate": 1.654220019409317e-05, + "loss": 0.8674311637878418, + "step": 1757 + }, + { + "epoch": 0.6195594713656387, + "grad_norm": 1.4437314589210788, + "learning_rate": 1.6537792555762966e-05, + "loss": 0.7209165096282959, + "step": 1758 + }, + { + "epoch": 0.6199118942731278, + "grad_norm": 1.5958855115050472, + "learning_rate": 1.6533382698055655e-05, + "loss": 0.7795991897583008, + "step": 1759 + }, + { + "epoch": 0.6202643171806167, + "grad_norm": 1.6392261912532398, + "learning_rate": 1.6528970622468245e-05, + "loss": 0.6749448776245117, + "step": 1760 + }, + { + "epoch": 0.6206167400881057, + "grad_norm": 1.5291165267411688, + "learning_rate": 1.6524556330498494e-05, + "loss": 0.9127920866012573, + "step": 1761 + }, + { + "epoch": 0.6209691629955947, + "grad_norm": 1.5402491362904795, + "learning_rate": 1.6520139823644922e-05, + "loss": 0.6224071979522705, + "step": 1762 + }, + { + "epoch": 0.6213215859030837, + "grad_norm": 1.426673111398807, + "learning_rate": 1.6515721103406798e-05, + "loss": 0.6955251693725586, + "step": 1763 + }, + { + "epoch": 0.6216740088105727, + "grad_norm": 1.7187740007003602, + "learning_rate": 1.6511300171284132e-05, + "loss": 0.676613986492157, + "step": 1764 + }, + { + "epoch": 0.6220264317180617, + "grad_norm": 1.4024924612217573, + "learning_rate": 1.65068770287777e-05, + "loss": 0.7482033967971802, + "step": 1765 + }, + { + "epoch": 0.6223788546255506, + "grad_norm": 1.4659804586317469, + "learning_rate": 1.6502451677389015e-05, + "loss": 0.6019684076309204, + "step": 1766 + }, + { + "epoch": 0.6227312775330397, + "grad_norm": 1.419796458872072, + "learning_rate": 1.649802411862035e-05, + "loss": 0.6796068549156189, + "step": 1767 + }, + { + "epoch": 0.6230837004405286, + "grad_norm": 2.234008541241949, + "learning_rate": 1.6493594353974724e-05, + "loss": 0.6351302862167358, + "step": 1768 + }, + { + "epoch": 0.6234361233480176, + "grad_norm": 1.4257561009443, + "learning_rate": 1.6489162384955906e-05, + "loss": 0.6093732714653015, + "step": 1769 + }, + { + "epoch": 0.6237885462555066, + "grad_norm": 1.842168854503522, + "learning_rate": 1.6484728213068405e-05, + "loss": 0.8181271553039551, + "step": 1770 + }, + { + "epoch": 0.6241409691629956, + "grad_norm": 1.821206401126196, + "learning_rate": 1.6480291839817488e-05, + "loss": 0.7093993425369263, + "step": 1771 + }, + { + "epoch": 0.6244933920704846, + "grad_norm": 1.416340976430299, + "learning_rate": 1.6475853266709165e-05, + "loss": 0.6895081996917725, + "step": 1772 + }, + { + "epoch": 0.6248458149779735, + "grad_norm": 1.5970315552720198, + "learning_rate": 1.6471412495250195e-05, + "loss": 0.6706013679504395, + "step": 1773 + }, + { + "epoch": 0.6251982378854626, + "grad_norm": 1.5170788749866242, + "learning_rate": 1.6466969526948082e-05, + "loss": 0.6700015664100647, + "step": 1774 + }, + { + "epoch": 0.6255506607929515, + "grad_norm": 1.5173815641058028, + "learning_rate": 1.6462524363311072e-05, + "loss": 0.6591087579727173, + "step": 1775 + }, + { + "epoch": 0.6259030837004406, + "grad_norm": 1.6219345446237772, + "learning_rate": 1.6458077005848164e-05, + "loss": 0.7775006294250488, + "step": 1776 + }, + { + "epoch": 0.6262555066079295, + "grad_norm": 1.6260525304572828, + "learning_rate": 1.6453627456069093e-05, + "loss": 0.8459682464599609, + "step": 1777 + }, + { + "epoch": 0.6266079295154185, + "grad_norm": 1.4031571304990242, + "learning_rate": 1.6449175715484346e-05, + "loss": 0.6536898612976074, + "step": 1778 + }, + { + "epoch": 0.6269603524229075, + "grad_norm": 1.5129603585000657, + "learning_rate": 1.6444721785605148e-05, + "loss": 0.7543610334396362, + "step": 1779 + }, + { + "epoch": 0.6273127753303964, + "grad_norm": 1.6228520645077271, + "learning_rate": 1.6440265667943474e-05, + "loss": 0.7416362762451172, + "step": 1780 + }, + { + "epoch": 0.6276651982378855, + "grad_norm": 1.4583654660578542, + "learning_rate": 1.6435807364012035e-05, + "loss": 0.5505499839782715, + "step": 1781 + }, + { + "epoch": 0.6280176211453744, + "grad_norm": 1.5252426453600672, + "learning_rate": 1.6431346875324284e-05, + "loss": 0.792723536491394, + "step": 1782 + }, + { + "epoch": 0.6283700440528635, + "grad_norm": 1.3655475423968058, + "learning_rate": 1.6426884203394416e-05, + "loss": 0.6313158273696899, + "step": 1783 + }, + { + "epoch": 0.6287224669603524, + "grad_norm": 1.6057168635576118, + "learning_rate": 1.642241934973738e-05, + "loss": 0.6168874502182007, + "step": 1784 + }, + { + "epoch": 0.6290748898678414, + "grad_norm": 1.78997265433784, + "learning_rate": 1.6417952315868845e-05, + "loss": 0.6995766162872314, + "step": 1785 + }, + { + "epoch": 0.6294273127753304, + "grad_norm": 1.4835625331683349, + "learning_rate": 1.641348310330523e-05, + "loss": 0.8046826124191284, + "step": 1786 + }, + { + "epoch": 0.6297797356828194, + "grad_norm": 1.4892920408023869, + "learning_rate": 1.6409011713563697e-05, + "loss": 0.7227291464805603, + "step": 1787 + }, + { + "epoch": 0.6301321585903084, + "grad_norm": 1.4682105257113767, + "learning_rate": 1.6404538148162145e-05, + "loss": 0.6463631391525269, + "step": 1788 + }, + { + "epoch": 0.6304845814977974, + "grad_norm": 2.4977643907634, + "learning_rate": 1.640006240861921e-05, + "loss": 0.7473348379135132, + "step": 1789 + }, + { + "epoch": 0.6308370044052863, + "grad_norm": 1.4291329366827183, + "learning_rate": 1.6395584496454263e-05, + "loss": 0.7311505079269409, + "step": 1790 + }, + { + "epoch": 0.6311894273127753, + "grad_norm": 1.5618530036111458, + "learning_rate": 1.639110441318742e-05, + "loss": 0.7259535789489746, + "step": 1791 + }, + { + "epoch": 0.6315418502202643, + "grad_norm": 1.515515721890048, + "learning_rate": 1.6386622160339522e-05, + "loss": 0.5777252912521362, + "step": 1792 + }, + { + "epoch": 0.6318942731277533, + "grad_norm": 1.3190322559386176, + "learning_rate": 1.638213773943216e-05, + "loss": 0.5510598421096802, + "step": 1793 + }, + { + "epoch": 0.6322466960352423, + "grad_norm": 1.5085803548323364, + "learning_rate": 1.637765115198766e-05, + "loss": 0.6448229551315308, + "step": 1794 + }, + { + "epoch": 0.6325991189427312, + "grad_norm": 1.5827276696724286, + "learning_rate": 1.6373162399529067e-05, + "loss": 0.7359289526939392, + "step": 1795 + }, + { + "epoch": 0.6329515418502203, + "grad_norm": 1.5346140091491929, + "learning_rate": 1.6368671483580185e-05, + "loss": 0.616656482219696, + "step": 1796 + }, + { + "epoch": 0.6333039647577092, + "grad_norm": 1.4291822350961465, + "learning_rate": 1.6364178405665534e-05, + "loss": 0.5966289043426514, + "step": 1797 + }, + { + "epoch": 0.6336563876651983, + "grad_norm": 1.8727626569458464, + "learning_rate": 1.6359683167310375e-05, + "loss": 0.7475985288619995, + "step": 1798 + }, + { + "epoch": 0.6340088105726872, + "grad_norm": 1.4494645750595028, + "learning_rate": 1.63551857700407e-05, + "loss": 0.6030765771865845, + "step": 1799 + }, + { + "epoch": 0.6343612334801763, + "grad_norm": 1.6637248682130477, + "learning_rate": 1.6350686215383237e-05, + "loss": 0.6193016171455383, + "step": 1800 + }, + { + "epoch": 0.6347136563876652, + "grad_norm": 1.3604775956740969, + "learning_rate": 1.6346184504865442e-05, + "loss": 0.6404513120651245, + "step": 1801 + }, + { + "epoch": 0.6350660792951542, + "grad_norm": 1.5539318450371893, + "learning_rate": 1.6341680640015515e-05, + "loss": 0.8453506231307983, + "step": 1802 + }, + { + "epoch": 0.6354185022026432, + "grad_norm": 1.3642622033336096, + "learning_rate": 1.6337174622362366e-05, + "loss": 0.6094445586204529, + "step": 1803 + }, + { + "epoch": 0.6357709251101321, + "grad_norm": 1.5112522647253264, + "learning_rate": 1.6332666453435653e-05, + "loss": 0.7352159023284912, + "step": 1804 + }, + { + "epoch": 0.6361233480176212, + "grad_norm": 1.4529963307650198, + "learning_rate": 1.632815613476576e-05, + "loss": 0.7395339608192444, + "step": 1805 + }, + { + "epoch": 0.6364757709251101, + "grad_norm": 1.4350925789909401, + "learning_rate": 1.63236436678838e-05, + "loss": 0.7246927618980408, + "step": 1806 + }, + { + "epoch": 0.6368281938325991, + "grad_norm": 1.3653208723694477, + "learning_rate": 1.6319129054321616e-05, + "loss": 0.6913329362869263, + "step": 1807 + }, + { + "epoch": 0.6371806167400881, + "grad_norm": 1.9893098285493216, + "learning_rate": 1.6314612295611772e-05, + "loss": 0.6410515308380127, + "step": 1808 + }, + { + "epoch": 0.6375330396475771, + "grad_norm": 2.6583918764324665, + "learning_rate": 1.6310093393287574e-05, + "loss": 0.690910816192627, + "step": 1809 + }, + { + "epoch": 0.6378854625550661, + "grad_norm": 1.4623649413484192, + "learning_rate": 1.6305572348883044e-05, + "loss": 0.6520562171936035, + "step": 1810 + }, + { + "epoch": 0.6382378854625551, + "grad_norm": 1.6850706181935027, + "learning_rate": 1.630104916393294e-05, + "loss": 0.6966608166694641, + "step": 1811 + }, + { + "epoch": 0.638590308370044, + "grad_norm": 1.7161033790648312, + "learning_rate": 1.6296523839972743e-05, + "loss": 0.826806902885437, + "step": 1812 + }, + { + "epoch": 0.6389427312775331, + "grad_norm": 1.431569634617566, + "learning_rate": 1.6291996378538653e-05, + "loss": 0.6695773601531982, + "step": 1813 + }, + { + "epoch": 0.639295154185022, + "grad_norm": 1.4264708644101765, + "learning_rate": 1.6287466781167607e-05, + "loss": 0.5725491046905518, + "step": 1814 + }, + { + "epoch": 0.639647577092511, + "grad_norm": 1.2779233324378096, + "learning_rate": 1.628293504939727e-05, + "loss": 0.5543544292449951, + "step": 1815 + }, + { + "epoch": 0.64, + "grad_norm": 3.2997728941963564, + "learning_rate": 1.6278401184766007e-05, + "loss": 0.6964641809463501, + "step": 1816 + }, + { + "epoch": 0.640352422907489, + "grad_norm": 1.3065245679172277, + "learning_rate": 1.6273865188812935e-05, + "loss": 0.675407886505127, + "step": 1817 + }, + { + "epoch": 0.640704845814978, + "grad_norm": 1.4883059032141013, + "learning_rate": 1.626932706307788e-05, + "loss": 0.6304433345794678, + "step": 1818 + }, + { + "epoch": 0.6410572687224669, + "grad_norm": 1.5529882690454875, + "learning_rate": 1.62647868091014e-05, + "loss": 0.7432112693786621, + "step": 1819 + }, + { + "epoch": 0.641409691629956, + "grad_norm": 1.5761551228008874, + "learning_rate": 1.6260244428424763e-05, + "loss": 0.730377197265625, + "step": 1820 + }, + { + "epoch": 0.6417621145374449, + "grad_norm": 1.7239403694554825, + "learning_rate": 1.6255699922589968e-05, + "loss": 0.694229006767273, + "step": 1821 + }, + { + "epoch": 0.642114537444934, + "grad_norm": 1.5664915948077012, + "learning_rate": 1.6251153293139735e-05, + "loss": 0.7284739017486572, + "step": 1822 + }, + { + "epoch": 0.6424669603524229, + "grad_norm": 1.4047714992661522, + "learning_rate": 1.6246604541617507e-05, + "loss": 0.6028950214385986, + "step": 1823 + }, + { + "epoch": 0.642819383259912, + "grad_norm": 1.65079248713073, + "learning_rate": 1.6242053669567432e-05, + "loss": 0.6776808500289917, + "step": 1824 + }, + { + "epoch": 0.6431718061674009, + "grad_norm": 1.7695857292474644, + "learning_rate": 1.6237500678534396e-05, + "loss": 0.7743366956710815, + "step": 1825 + }, + { + "epoch": 0.6435242290748898, + "grad_norm": 1.594351471613888, + "learning_rate": 1.6232945570064e-05, + "loss": 0.6356723308563232, + "step": 1826 + }, + { + "epoch": 0.6438766519823789, + "grad_norm": 1.4846113103688028, + "learning_rate": 1.622838834570256e-05, + "loss": 0.7356402277946472, + "step": 1827 + }, + { + "epoch": 0.6442290748898678, + "grad_norm": 1.455165750941624, + "learning_rate": 1.622382900699711e-05, + "loss": 0.7639342546463013, + "step": 1828 + }, + { + "epoch": 0.6445814977973569, + "grad_norm": 2.0823946019481987, + "learning_rate": 1.6219267555495407e-05, + "loss": 0.6969513297080994, + "step": 1829 + }, + { + "epoch": 0.6449339207048458, + "grad_norm": 1.418146430885783, + "learning_rate": 1.621470399274592e-05, + "loss": 0.7532765865325928, + "step": 1830 + }, + { + "epoch": 0.6452863436123348, + "grad_norm": 1.3893974330709622, + "learning_rate": 1.6210138320297835e-05, + "loss": 0.5801100730895996, + "step": 1831 + }, + { + "epoch": 0.6456387665198238, + "grad_norm": 1.5780391931120195, + "learning_rate": 1.6205570539701056e-05, + "loss": 0.8006102442741394, + "step": 1832 + }, + { + "epoch": 0.6459911894273128, + "grad_norm": 1.4094927188728377, + "learning_rate": 1.6201000652506203e-05, + "loss": 0.6507089138031006, + "step": 1833 + }, + { + "epoch": 0.6463436123348018, + "grad_norm": 1.9684758989320281, + "learning_rate": 1.619642866026461e-05, + "loss": 0.7407999634742737, + "step": 1834 + }, + { + "epoch": 0.6466960352422908, + "grad_norm": 1.4160609898798358, + "learning_rate": 1.619185456452833e-05, + "loss": 0.6964670419692993, + "step": 1835 + }, + { + "epoch": 0.6470484581497797, + "grad_norm": 1.6614634508995256, + "learning_rate": 1.6187278366850122e-05, + "loss": 0.7095489501953125, + "step": 1836 + }, + { + "epoch": 0.6474008810572687, + "grad_norm": 2.0391949894277017, + "learning_rate": 1.6182700068783463e-05, + "loss": 0.6968166828155518, + "step": 1837 + }, + { + "epoch": 0.6477533039647577, + "grad_norm": 1.3206477384834772, + "learning_rate": 1.617811967188254e-05, + "loss": 0.7745821475982666, + "step": 1838 + }, + { + "epoch": 0.6481057268722467, + "grad_norm": 1.4803456865319338, + "learning_rate": 1.6173537177702266e-05, + "loss": 0.7071934938430786, + "step": 1839 + }, + { + "epoch": 0.6484581497797357, + "grad_norm": 1.7225763324537737, + "learning_rate": 1.6168952587798242e-05, + "loss": 0.6481701135635376, + "step": 1840 + }, + { + "epoch": 0.6488105726872246, + "grad_norm": 1.4447543914645467, + "learning_rate": 1.6164365903726805e-05, + "loss": 0.6349890232086182, + "step": 1841 + }, + { + "epoch": 0.6491629955947137, + "grad_norm": 1.3913908457554178, + "learning_rate": 1.6159777127044982e-05, + "loss": 0.6067368388175964, + "step": 1842 + }, + { + "epoch": 0.6495154185022026, + "grad_norm": 1.3943413375617566, + "learning_rate": 1.6155186259310523e-05, + "loss": 0.7170778512954712, + "step": 1843 + }, + { + "epoch": 0.6498678414096917, + "grad_norm": 1.4309397568408155, + "learning_rate": 1.6150593302081888e-05, + "loss": 0.5623376965522766, + "step": 1844 + }, + { + "epoch": 0.6502202643171806, + "grad_norm": 1.442096873601557, + "learning_rate": 1.6145998256918238e-05, + "loss": 0.7295233607292175, + "step": 1845 + }, + { + "epoch": 0.6505726872246697, + "grad_norm": 1.513681766461532, + "learning_rate": 1.6141401125379454e-05, + "loss": 0.6991151571273804, + "step": 1846 + }, + { + "epoch": 0.6509251101321586, + "grad_norm": 1.568060173563952, + "learning_rate": 1.6136801909026113e-05, + "loss": 0.7553545236587524, + "step": 1847 + }, + { + "epoch": 0.6512775330396475, + "grad_norm": 1.560177534517688, + "learning_rate": 1.613220060941951e-05, + "loss": 0.8280071020126343, + "step": 1848 + }, + { + "epoch": 0.6516299559471366, + "grad_norm": 1.3846780543862842, + "learning_rate": 1.6127597228121636e-05, + "loss": 0.662299633026123, + "step": 1849 + }, + { + "epoch": 0.6519823788546255, + "grad_norm": 1.519733781984336, + "learning_rate": 1.6122991766695206e-05, + "loss": 0.6493197679519653, + "step": 1850 + }, + { + "epoch": 0.6523348017621146, + "grad_norm": 1.5074834442694671, + "learning_rate": 1.6118384226703623e-05, + "loss": 0.5910629034042358, + "step": 1851 + }, + { + "epoch": 0.6526872246696035, + "grad_norm": 1.5082942143966174, + "learning_rate": 1.611377460971101e-05, + "loss": 0.7124426364898682, + "step": 1852 + }, + { + "epoch": 0.6530396475770925, + "grad_norm": 1.6734021483912949, + "learning_rate": 1.610916291728218e-05, + "loss": 0.6081063747406006, + "step": 1853 + }, + { + "epoch": 0.6533920704845815, + "grad_norm": 1.5485445677219123, + "learning_rate": 1.6104549150982666e-05, + "loss": 0.7536673545837402, + "step": 1854 + }, + { + "epoch": 0.6537444933920705, + "grad_norm": 1.5239612944966212, + "learning_rate": 1.6099933312378695e-05, + "loss": 0.6514976024627686, + "step": 1855 + }, + { + "epoch": 0.6540969162995595, + "grad_norm": 1.3951117738157057, + "learning_rate": 1.6095315403037205e-05, + "loss": 0.6595193147659302, + "step": 1856 + }, + { + "epoch": 0.6544493392070485, + "grad_norm": 1.5562205804379312, + "learning_rate": 1.6090695424525826e-05, + "loss": 0.666920006275177, + "step": 1857 + }, + { + "epoch": 0.6548017621145374, + "grad_norm": 1.5350434119319913, + "learning_rate": 1.6086073378412902e-05, + "loss": 0.5984979271888733, + "step": 1858 + }, + { + "epoch": 0.6551541850220264, + "grad_norm": 1.8541188470544154, + "learning_rate": 1.608144926626747e-05, + "loss": 0.8021191358566284, + "step": 1859 + }, + { + "epoch": 0.6555066079295154, + "grad_norm": 1.5029675710659876, + "learning_rate": 1.6076823089659272e-05, + "loss": 0.7368075847625732, + "step": 1860 + }, + { + "epoch": 0.6558590308370044, + "grad_norm": 1.596711606351331, + "learning_rate": 1.6072194850158755e-05, + "loss": 0.7923766374588013, + "step": 1861 + }, + { + "epoch": 0.6562114537444934, + "grad_norm": 1.6332800469997777, + "learning_rate": 1.606756454933706e-05, + "loss": 0.6907824873924255, + "step": 1862 + }, + { + "epoch": 0.6565638766519823, + "grad_norm": 1.5674543537069574, + "learning_rate": 1.606293218876603e-05, + "loss": 0.7366634607315063, + "step": 1863 + }, + { + "epoch": 0.6569162995594714, + "grad_norm": 1.7550517656533429, + "learning_rate": 1.6058297770018208e-05, + "loss": 0.7166022658348083, + "step": 1864 + }, + { + "epoch": 0.6572687224669603, + "grad_norm": 1.5153527205809505, + "learning_rate": 1.6053661294666833e-05, + "loss": 0.6969404220581055, + "step": 1865 + }, + { + "epoch": 0.6576211453744494, + "grad_norm": 1.5681332930444218, + "learning_rate": 1.6049022764285846e-05, + "loss": 0.7182974815368652, + "step": 1866 + }, + { + "epoch": 0.6579735682819383, + "grad_norm": 2.620263422686914, + "learning_rate": 1.6044382180449886e-05, + "loss": 0.7469301819801331, + "step": 1867 + }, + { + "epoch": 0.6583259911894274, + "grad_norm": 1.458082221775431, + "learning_rate": 1.603973954473428e-05, + "loss": 0.7097122073173523, + "step": 1868 + }, + { + "epoch": 0.6586784140969163, + "grad_norm": 1.3404337000381439, + "learning_rate": 1.6035094858715065e-05, + "loss": 0.6907291412353516, + "step": 1869 + }, + { + "epoch": 0.6590308370044052, + "grad_norm": 1.5576579616406543, + "learning_rate": 1.6030448123968963e-05, + "loss": 0.6259130239486694, + "step": 1870 + }, + { + "epoch": 0.6593832599118943, + "grad_norm": 1.6431810286043311, + "learning_rate": 1.6025799342073397e-05, + "loss": 0.6948051452636719, + "step": 1871 + }, + { + "epoch": 0.6597356828193832, + "grad_norm": 1.3540961323396474, + "learning_rate": 1.602114851460648e-05, + "loss": 0.7037572264671326, + "step": 1872 + }, + { + "epoch": 0.6600881057268723, + "grad_norm": 1.565352238933419, + "learning_rate": 1.6016495643147036e-05, + "loss": 0.7728864550590515, + "step": 1873 + }, + { + "epoch": 0.6604405286343612, + "grad_norm": 1.4345290675539004, + "learning_rate": 1.601184072927456e-05, + "loss": 0.7782067060470581, + "step": 1874 + }, + { + "epoch": 0.6607929515418502, + "grad_norm": 1.4505913839056241, + "learning_rate": 1.6007183774569246e-05, + "loss": 0.6168591976165771, + "step": 1875 + }, + { + "epoch": 0.6611453744493392, + "grad_norm": 1.6465062301007323, + "learning_rate": 1.6002524780611995e-05, + "loss": 0.702346920967102, + "step": 1876 + }, + { + "epoch": 0.6614977973568282, + "grad_norm": 1.6478258582343996, + "learning_rate": 1.5997863748984384e-05, + "loss": 0.6084239482879639, + "step": 1877 + }, + { + "epoch": 0.6618502202643172, + "grad_norm": 1.5841429013244157, + "learning_rate": 1.5993200681268696e-05, + "loss": 0.8307315707206726, + "step": 1878 + }, + { + "epoch": 0.6622026431718062, + "grad_norm": 1.8073980879357947, + "learning_rate": 1.5988535579047888e-05, + "loss": 0.6465811729431152, + "step": 1879 + }, + { + "epoch": 0.6625550660792952, + "grad_norm": 1.5593829827457022, + "learning_rate": 1.598386844390562e-05, + "loss": 0.71415114402771, + "step": 1880 + }, + { + "epoch": 0.6629074889867841, + "grad_norm": 6.602062472303997, + "learning_rate": 1.5979199277426243e-05, + "loss": 0.7135012149810791, + "step": 1881 + }, + { + "epoch": 0.6632599118942731, + "grad_norm": 1.584805815321856, + "learning_rate": 1.597452808119479e-05, + "loss": 0.840306282043457, + "step": 1882 + }, + { + "epoch": 0.6636123348017621, + "grad_norm": 1.454651140369818, + "learning_rate": 1.596985485679699e-05, + "loss": 0.622429609298706, + "step": 1883 + }, + { + "epoch": 0.6639647577092511, + "grad_norm": 1.5798478269154124, + "learning_rate": 1.5965179605819248e-05, + "loss": 0.6505612134933472, + "step": 1884 + }, + { + "epoch": 0.66431718061674, + "grad_norm": 1.4292089389404006, + "learning_rate": 1.5960502329848683e-05, + "loss": 0.7665247917175293, + "step": 1885 + }, + { + "epoch": 0.6646696035242291, + "grad_norm": 1.614107737492675, + "learning_rate": 1.5955823030473068e-05, + "loss": 0.7780051231384277, + "step": 1886 + }, + { + "epoch": 0.665022026431718, + "grad_norm": 1.4074097920809756, + "learning_rate": 1.5951141709280886e-05, + "loss": 0.6311650276184082, + "step": 1887 + }, + { + "epoch": 0.6653744493392071, + "grad_norm": 1.287734360896639, + "learning_rate": 1.5946458367861302e-05, + "loss": 0.7126712799072266, + "step": 1888 + }, + { + "epoch": 0.665726872246696, + "grad_norm": 1.3823278268773909, + "learning_rate": 1.5941773007804165e-05, + "loss": 0.6979397535324097, + "step": 1889 + }, + { + "epoch": 0.6660792951541851, + "grad_norm": 1.5067230035216896, + "learning_rate": 1.5937085630700003e-05, + "loss": 0.7065495252609253, + "step": 1890 + }, + { + "epoch": 0.666431718061674, + "grad_norm": 1.373677820269664, + "learning_rate": 1.593239623814004e-05, + "loss": 0.6157221794128418, + "step": 1891 + }, + { + "epoch": 0.6667841409691629, + "grad_norm": 1.6157271272896285, + "learning_rate": 1.5927704831716177e-05, + "loss": 0.6835625171661377, + "step": 1892 + }, + { + "epoch": 0.667136563876652, + "grad_norm": 1.5002309814069255, + "learning_rate": 1.5923011413021e-05, + "loss": 0.6416822671890259, + "step": 1893 + }, + { + "epoch": 0.6674889867841409, + "grad_norm": 1.4507514621746327, + "learning_rate": 1.5918315983647782e-05, + "loss": 0.7307168245315552, + "step": 1894 + }, + { + "epoch": 0.66784140969163, + "grad_norm": 1.3321086634513644, + "learning_rate": 1.5913618545190468e-05, + "loss": 0.5464824438095093, + "step": 1895 + }, + { + "epoch": 0.6681938325991189, + "grad_norm": 1.544912001907108, + "learning_rate": 1.5908919099243698e-05, + "loss": 0.6634502410888672, + "step": 1896 + }, + { + "epoch": 0.668546255506608, + "grad_norm": 1.2985703589965545, + "learning_rate": 1.5904217647402788e-05, + "loss": 0.719158411026001, + "step": 1897 + }, + { + "epoch": 0.6688986784140969, + "grad_norm": 1.5083721998375157, + "learning_rate": 1.5899514191263733e-05, + "loss": 0.7547527551651001, + "step": 1898 + }, + { + "epoch": 0.6692511013215859, + "grad_norm": 1.6226125781851348, + "learning_rate": 1.5894808732423207e-05, + "loss": 0.7549886703491211, + "step": 1899 + }, + { + "epoch": 0.6696035242290749, + "grad_norm": 1.5327056521201368, + "learning_rate": 1.589010127247857e-05, + "loss": 0.7107831239700317, + "step": 1900 + }, + { + "epoch": 0.6699559471365639, + "grad_norm": 1.5679371113552734, + "learning_rate": 1.588539181302786e-05, + "loss": 0.855078935623169, + "step": 1901 + }, + { + "epoch": 0.6703083700440529, + "grad_norm": 1.4970896726818788, + "learning_rate": 1.5880680355669792e-05, + "loss": 0.8235266208648682, + "step": 1902 + }, + { + "epoch": 0.6706607929515418, + "grad_norm": 1.339674008175079, + "learning_rate": 1.587596690200375e-05, + "loss": 0.6060166358947754, + "step": 1903 + }, + { + "epoch": 0.6710132158590308, + "grad_norm": 1.4603163291197105, + "learning_rate": 1.5871251453629817e-05, + "loss": 0.7325272560119629, + "step": 1904 + }, + { + "epoch": 0.6713656387665198, + "grad_norm": 1.5470128203990354, + "learning_rate": 1.586653401214873e-05, + "loss": 0.674901008605957, + "step": 1905 + }, + { + "epoch": 0.6717180616740088, + "grad_norm": 1.3515017914848853, + "learning_rate": 1.5861814579161928e-05, + "loss": 0.767164945602417, + "step": 1906 + }, + { + "epoch": 0.6720704845814978, + "grad_norm": 1.3633425183694836, + "learning_rate": 1.5857093156271496e-05, + "loss": 0.5691556930541992, + "step": 1907 + }, + { + "epoch": 0.6724229074889868, + "grad_norm": 1.3106038540183678, + "learning_rate": 1.585236974508022e-05, + "loss": 0.6885931491851807, + "step": 1908 + }, + { + "epoch": 0.6727753303964757, + "grad_norm": 1.143239709830434, + "learning_rate": 1.5847644347191545e-05, + "loss": 0.6227391958236694, + "step": 1909 + }, + { + "epoch": 0.6731277533039648, + "grad_norm": 1.4883434470080177, + "learning_rate": 1.5842916964209602e-05, + "loss": 0.6084527969360352, + "step": 1910 + }, + { + "epoch": 0.6734801762114537, + "grad_norm": 1.7178691294348742, + "learning_rate": 1.583818759773919e-05, + "loss": 0.7001935243606567, + "step": 1911 + }, + { + "epoch": 0.6738325991189428, + "grad_norm": 1.684468384573203, + "learning_rate": 1.5833456249385774e-05, + "loss": 0.8263465166091919, + "step": 1912 + }, + { + "epoch": 0.6741850220264317, + "grad_norm": 1.6085564780466834, + "learning_rate": 1.582872292075551e-05, + "loss": 0.662792444229126, + "step": 1913 + }, + { + "epoch": 0.6745374449339208, + "grad_norm": 1.7464203558320361, + "learning_rate": 1.582398761345521e-05, + "loss": 0.7093051075935364, + "step": 1914 + }, + { + "epoch": 0.6748898678414097, + "grad_norm": 1.4885122105608484, + "learning_rate": 1.5819250329092364e-05, + "loss": 0.7264106273651123, + "step": 1915 + }, + { + "epoch": 0.6752422907488986, + "grad_norm": 1.5383309179609377, + "learning_rate": 1.581451106927513e-05, + "loss": 0.6561543345451355, + "step": 1916 + }, + { + "epoch": 0.6755947136563877, + "grad_norm": 1.634971670239321, + "learning_rate": 1.580976983561235e-05, + "loss": 0.6563262939453125, + "step": 1917 + }, + { + "epoch": 0.6759471365638766, + "grad_norm": 1.2931579342976025, + "learning_rate": 1.5805026629713512e-05, + "loss": 0.5224509239196777, + "step": 1918 + }, + { + "epoch": 0.6762995594713657, + "grad_norm": 1.4840746720603137, + "learning_rate": 1.5800281453188793e-05, + "loss": 0.6565898656845093, + "step": 1919 + }, + { + "epoch": 0.6766519823788546, + "grad_norm": 1.4375600407888718, + "learning_rate": 1.5795534307649032e-05, + "loss": 0.7954028844833374, + "step": 1920 + }, + { + "epoch": 0.6770044052863436, + "grad_norm": 1.3454762773409146, + "learning_rate": 1.579078519470574e-05, + "loss": 0.6624404788017273, + "step": 1921 + }, + { + "epoch": 0.6773568281938326, + "grad_norm": 1.5514355338443828, + "learning_rate": 1.5786034115971083e-05, + "loss": 0.840311586856842, + "step": 1922 + }, + { + "epoch": 0.6777092511013216, + "grad_norm": 1.5163172495660509, + "learning_rate": 1.578128107305792e-05, + "loss": 0.6967859864234924, + "step": 1923 + }, + { + "epoch": 0.6780616740088106, + "grad_norm": 1.0735596232953704, + "learning_rate": 1.5776526067579746e-05, + "loss": 0.5295379161834717, + "step": 1924 + }, + { + "epoch": 0.6784140969162996, + "grad_norm": 1.8118747234451476, + "learning_rate": 1.5771769101150752e-05, + "loss": 0.6758475303649902, + "step": 1925 + }, + { + "epoch": 0.6787665198237885, + "grad_norm": 1.3510918406813899, + "learning_rate": 1.576701017538577e-05, + "loss": 0.6891785860061646, + "step": 1926 + }, + { + "epoch": 0.6791189427312775, + "grad_norm": 1.4115910497948105, + "learning_rate": 1.5762249291900304e-05, + "loss": 0.6507086157798767, + "step": 1927 + }, + { + "epoch": 0.6794713656387665, + "grad_norm": 1.4168935733459347, + "learning_rate": 1.5757486452310537e-05, + "loss": 0.6220029592514038, + "step": 1928 + }, + { + "epoch": 0.6798237885462555, + "grad_norm": 1.5134078284046213, + "learning_rate": 1.5752721658233294e-05, + "loss": 0.7742874622344971, + "step": 1929 + }, + { + "epoch": 0.6801762114537445, + "grad_norm": 1.513809055671425, + "learning_rate": 1.5747954911286085e-05, + "loss": 0.6895851492881775, + "step": 1930 + }, + { + "epoch": 0.6805286343612335, + "grad_norm": 1.6367265924041048, + "learning_rate": 1.5743186213087062e-05, + "loss": 0.71466064453125, + "step": 1931 + }, + { + "epoch": 0.6808810572687225, + "grad_norm": 1.506916023064254, + "learning_rate": 1.5738415565255056e-05, + "loss": 0.6465627551078796, + "step": 1932 + }, + { + "epoch": 0.6812334801762114, + "grad_norm": 1.3796886447957644, + "learning_rate": 1.5733642969409553e-05, + "loss": 0.7592962980270386, + "step": 1933 + }, + { + "epoch": 0.6815859030837005, + "grad_norm": 1.662185742102518, + "learning_rate": 1.57288684271707e-05, + "loss": 0.7641816735267639, + "step": 1934 + }, + { + "epoch": 0.6819383259911894, + "grad_norm": 1.5600426648231815, + "learning_rate": 1.5724091940159306e-05, + "loss": 0.7015130519866943, + "step": 1935 + }, + { + "epoch": 0.6822907488986785, + "grad_norm": 1.5031237824980206, + "learning_rate": 1.5719313509996833e-05, + "loss": 0.7851461172103882, + "step": 1936 + }, + { + "epoch": 0.6826431718061674, + "grad_norm": 1.5670991097913773, + "learning_rate": 1.571453313830542e-05, + "loss": 0.7924813628196716, + "step": 1937 + }, + { + "epoch": 0.6829955947136563, + "grad_norm": 1.3030215719290177, + "learning_rate": 1.570975082670785e-05, + "loss": 0.6082741022109985, + "step": 1938 + }, + { + "epoch": 0.6833480176211454, + "grad_norm": 1.5878638287998994, + "learning_rate": 1.5704966576827563e-05, + "loss": 0.7307756543159485, + "step": 1939 + }, + { + "epoch": 0.6837004405286343, + "grad_norm": 1.421111197077357, + "learning_rate": 1.570018039028867e-05, + "loss": 0.6877273917198181, + "step": 1940 + }, + { + "epoch": 0.6840528634361234, + "grad_norm": 3.222041323215856, + "learning_rate": 1.5695392268715934e-05, + "loss": 0.7702943086624146, + "step": 1941 + }, + { + "epoch": 0.6844052863436123, + "grad_norm": 1.3182333231384877, + "learning_rate": 1.569060221373477e-05, + "loss": 0.6576820611953735, + "step": 1942 + }, + { + "epoch": 0.6847577092511014, + "grad_norm": 1.6178003008675335, + "learning_rate": 1.568581022697125e-05, + "loss": 0.6605322360992432, + "step": 1943 + }, + { + "epoch": 0.6851101321585903, + "grad_norm": 1.5479637201173908, + "learning_rate": 1.568101631005211e-05, + "loss": 0.8065364360809326, + "step": 1944 + }, + { + "epoch": 0.6854625550660793, + "grad_norm": 1.5909483515555374, + "learning_rate": 1.5676220464604726e-05, + "loss": 0.8018748164176941, + "step": 1945 + }, + { + "epoch": 0.6858149779735683, + "grad_norm": 1.4496461628107289, + "learning_rate": 1.567142269225715e-05, + "loss": 0.6114683151245117, + "step": 1946 + }, + { + "epoch": 0.6861674008810573, + "grad_norm": 1.4567709922330223, + "learning_rate": 1.566662299463807e-05, + "loss": 0.8470789194107056, + "step": 1947 + }, + { + "epoch": 0.6865198237885463, + "grad_norm": 1.4716494157627575, + "learning_rate": 1.5661821373376837e-05, + "loss": 0.7133561372756958, + "step": 1948 + }, + { + "epoch": 0.6868722466960352, + "grad_norm": 1.6398709503866558, + "learning_rate": 1.5657017830103448e-05, + "loss": 0.9101625084877014, + "step": 1949 + }, + { + "epoch": 0.6872246696035242, + "grad_norm": 1.8312595153810016, + "learning_rate": 1.565221236644856e-05, + "loss": 0.7395101189613342, + "step": 1950 + }, + { + "epoch": 0.6875770925110132, + "grad_norm": 1.4532682115054107, + "learning_rate": 1.5647404984043474e-05, + "loss": 0.7421061992645264, + "step": 1951 + }, + { + "epoch": 0.6879295154185022, + "grad_norm": 1.4495130982943423, + "learning_rate": 1.5642595684520154e-05, + "loss": 0.8744432330131531, + "step": 1952 + }, + { + "epoch": 0.6882819383259912, + "grad_norm": 1.6475850419823541, + "learning_rate": 1.56377844695112e-05, + "loss": 0.8043868541717529, + "step": 1953 + }, + { + "epoch": 0.6886343612334802, + "grad_norm": 1.444538108927131, + "learning_rate": 1.5632971340649873e-05, + "loss": 0.6231396198272705, + "step": 1954 + }, + { + "epoch": 0.6889867841409691, + "grad_norm": 1.3765988847280666, + "learning_rate": 1.562815629957008e-05, + "loss": 0.7791434526443481, + "step": 1955 + }, + { + "epoch": 0.6893392070484582, + "grad_norm": 1.2135950275511538, + "learning_rate": 1.5623339347906383e-05, + "loss": 0.5652475357055664, + "step": 1956 + }, + { + "epoch": 0.6896916299559471, + "grad_norm": 1.4607959644694648, + "learning_rate": 1.561852048729398e-05, + "loss": 0.611067533493042, + "step": 1957 + }, + { + "epoch": 0.6900440528634362, + "grad_norm": 1.2569255893474116, + "learning_rate": 1.5613699719368724e-05, + "loss": 0.7580389976501465, + "step": 1958 + }, + { + "epoch": 0.6903964757709251, + "grad_norm": 1.516048041026883, + "learning_rate": 1.560887704576712e-05, + "loss": 0.6841205954551697, + "step": 1959 + }, + { + "epoch": 0.690748898678414, + "grad_norm": 1.7678860610521125, + "learning_rate": 1.5604052468126315e-05, + "loss": 0.7600575089454651, + "step": 1960 + }, + { + "epoch": 0.6911013215859031, + "grad_norm": 1.458096987341084, + "learning_rate": 1.55992259880841e-05, + "loss": 0.7547114491462708, + "step": 1961 + }, + { + "epoch": 0.691453744493392, + "grad_norm": 1.3490975617996133, + "learning_rate": 1.5594397607278912e-05, + "loss": 0.6917474865913391, + "step": 1962 + }, + { + "epoch": 0.6918061674008811, + "grad_norm": 1.378212312699651, + "learning_rate": 1.5589567327349845e-05, + "loss": 0.6820487976074219, + "step": 1963 + }, + { + "epoch": 0.69215859030837, + "grad_norm": 1.4687305992297937, + "learning_rate": 1.5584735149936628e-05, + "loss": 0.6513597965240479, + "step": 1964 + }, + { + "epoch": 0.6925110132158591, + "grad_norm": 1.4807223837447299, + "learning_rate": 1.5579901076679625e-05, + "loss": 0.668257474899292, + "step": 1965 + }, + { + "epoch": 0.692863436123348, + "grad_norm": 1.5130451892313703, + "learning_rate": 1.5575065109219864e-05, + "loss": 0.7600705623626709, + "step": 1966 + }, + { + "epoch": 0.693215859030837, + "grad_norm": 1.5218611988458295, + "learning_rate": 1.5570227249198993e-05, + "loss": 0.8140011429786682, + "step": 1967 + }, + { + "epoch": 0.693568281938326, + "grad_norm": 1.1438716908088957, + "learning_rate": 1.556538749825933e-05, + "loss": 0.610436201095581, + "step": 1968 + }, + { + "epoch": 0.693920704845815, + "grad_norm": 1.7706616264872619, + "learning_rate": 1.556054585804381e-05, + "loss": 0.7745693922042847, + "step": 1969 + }, + { + "epoch": 0.694273127753304, + "grad_norm": 1.4076568647110412, + "learning_rate": 1.5555702330196024e-05, + "loss": 0.5809592008590698, + "step": 1970 + }, + { + "epoch": 0.6946255506607929, + "grad_norm": 1.220751429593537, + "learning_rate": 1.5550856916360195e-05, + "loss": 0.6354515552520752, + "step": 1971 + }, + { + "epoch": 0.694977973568282, + "grad_norm": 1.4513364815061058, + "learning_rate": 1.5546009618181194e-05, + "loss": 0.8076149225234985, + "step": 1972 + }, + { + "epoch": 0.6953303964757709, + "grad_norm": 1.6702158357132753, + "learning_rate": 1.5541160437304524e-05, + "loss": 0.7553249597549438, + "step": 1973 + }, + { + "epoch": 0.6956828193832599, + "grad_norm": 1.4495619596653457, + "learning_rate": 1.5536309375376332e-05, + "loss": 0.6109169125556946, + "step": 1974 + }, + { + "epoch": 0.6960352422907489, + "grad_norm": 1.4052818449921982, + "learning_rate": 1.5531456434043404e-05, + "loss": 0.8184436559677124, + "step": 1975 + }, + { + "epoch": 0.6963876651982379, + "grad_norm": 1.3611746850672197, + "learning_rate": 1.5526601614953164e-05, + "loss": 0.6823909878730774, + "step": 1976 + }, + { + "epoch": 0.6967400881057269, + "grad_norm": 1.3254402340100906, + "learning_rate": 1.5521744919753668e-05, + "loss": 0.6669045090675354, + "step": 1977 + }, + { + "epoch": 0.6970925110132159, + "grad_norm": 1.7752129025350782, + "learning_rate": 1.5516886350093617e-05, + "loss": 0.8054187297821045, + "step": 1978 + }, + { + "epoch": 0.6974449339207048, + "grad_norm": 1.6379915816078137, + "learning_rate": 1.551202590762234e-05, + "loss": 0.7089184522628784, + "step": 1979 + }, + { + "epoch": 0.6977973568281939, + "grad_norm": 1.5207382048575195, + "learning_rate": 1.5507163593989804e-05, + "loss": 0.7908214330673218, + "step": 1980 + }, + { + "epoch": 0.6981497797356828, + "grad_norm": 1.454323961299799, + "learning_rate": 1.5502299410846626e-05, + "loss": 0.8859039545059204, + "step": 1981 + }, + { + "epoch": 0.6985022026431718, + "grad_norm": 1.5085321450966587, + "learning_rate": 1.549743335984403e-05, + "loss": 0.7156866788864136, + "step": 1982 + }, + { + "epoch": 0.6988546255506608, + "grad_norm": 1.4496904801370623, + "learning_rate": 1.5492565442633894e-05, + "loss": 0.6158934831619263, + "step": 1983 + }, + { + "epoch": 0.6992070484581497, + "grad_norm": 1.5453977055484032, + "learning_rate": 1.548769566086873e-05, + "loss": 0.6689192056655884, + "step": 1984 + }, + { + "epoch": 0.6995594713656388, + "grad_norm": 1.4591630403591411, + "learning_rate": 1.548282401620167e-05, + "loss": 0.6695841550827026, + "step": 1985 + }, + { + "epoch": 0.6999118942731277, + "grad_norm": 1.6161480882103554, + "learning_rate": 1.5477950510286488e-05, + "loss": 0.7196098566055298, + "step": 1986 + }, + { + "epoch": 0.7002643171806168, + "grad_norm": 1.5261033448052712, + "learning_rate": 1.5473075144777586e-05, + "loss": 0.7811123132705688, + "step": 1987 + }, + { + "epoch": 0.7006167400881057, + "grad_norm": 1.3902237132074229, + "learning_rate": 1.5468197921330006e-05, + "loss": 0.6341326236724854, + "step": 1988 + }, + { + "epoch": 0.7009691629955948, + "grad_norm": 1.4052051959904983, + "learning_rate": 1.5463318841599408e-05, + "loss": 0.6344352960586548, + "step": 1989 + }, + { + "epoch": 0.7013215859030837, + "grad_norm": 1.5015659436227353, + "learning_rate": 1.5458437907242084e-05, + "loss": 0.6708072423934937, + "step": 1990 + }, + { + "epoch": 0.7016740088105727, + "grad_norm": 1.4551372124338164, + "learning_rate": 1.5453555119914963e-05, + "loss": 0.7018578052520752, + "step": 1991 + }, + { + "epoch": 0.7020264317180617, + "grad_norm": 1.4651591378979865, + "learning_rate": 1.5448670481275604e-05, + "loss": 0.6966190338134766, + "step": 1992 + }, + { + "epoch": 0.7023788546255506, + "grad_norm": 1.2815956936347872, + "learning_rate": 1.5443783992982182e-05, + "loss": 0.6280171871185303, + "step": 1993 + }, + { + "epoch": 0.7027312775330397, + "grad_norm": 1.451492070117077, + "learning_rate": 1.5438895656693512e-05, + "loss": 0.6644559502601624, + "step": 1994 + }, + { + "epoch": 0.7030837004405286, + "grad_norm": 1.5030450433681415, + "learning_rate": 1.543400547406903e-05, + "loss": 0.776411771774292, + "step": 1995 + }, + { + "epoch": 0.7034361233480176, + "grad_norm": 1.428531901666428, + "learning_rate": 1.5429113446768805e-05, + "loss": 0.6353679895401001, + "step": 1996 + }, + { + "epoch": 0.7037885462555066, + "grad_norm": 1.468487936335314, + "learning_rate": 1.5424219576453526e-05, + "loss": 0.686774492263794, + "step": 1997 + }, + { + "epoch": 0.7041409691629956, + "grad_norm": 1.2525683766202464, + "learning_rate": 1.5419323864784508e-05, + "loss": 0.5296701192855835, + "step": 1998 + }, + { + "epoch": 0.7044933920704846, + "grad_norm": 1.264413948230812, + "learning_rate": 1.5414426313423692e-05, + "loss": 0.6246802806854248, + "step": 1999 + }, + { + "epoch": 0.7048458149779736, + "grad_norm": 1.44172793688486, + "learning_rate": 1.5409526924033646e-05, + "loss": 0.6633912920951843, + "step": 2000 + }, + { + "epoch": 0.7051982378854625, + "grad_norm": 1.720413855985522, + "learning_rate": 1.540462569827756e-05, + "loss": 0.7324577569961548, + "step": 2001 + }, + { + "epoch": 0.7055506607929516, + "grad_norm": 1.6372387419200998, + "learning_rate": 1.539972263781925e-05, + "loss": 0.7988085746765137, + "step": 2002 + }, + { + "epoch": 0.7059030837004405, + "grad_norm": 1.4528481393218415, + "learning_rate": 1.539481774432315e-05, + "loss": 0.6761256456375122, + "step": 2003 + }, + { + "epoch": 0.7062555066079295, + "grad_norm": 1.6101005409981786, + "learning_rate": 1.538991101945431e-05, + "loss": 0.6647740006446838, + "step": 2004 + }, + { + "epoch": 0.7066079295154185, + "grad_norm": 1.5047715708456952, + "learning_rate": 1.538500246487843e-05, + "loss": 0.7111536860466003, + "step": 2005 + }, + { + "epoch": 0.7069603524229074, + "grad_norm": 1.8533704165409681, + "learning_rate": 1.5380092082261797e-05, + "loss": 0.7395933270454407, + "step": 2006 + }, + { + "epoch": 0.7073127753303965, + "grad_norm": 1.4630720873509298, + "learning_rate": 1.5375179873271335e-05, + "loss": 0.6158996820449829, + "step": 2007 + }, + { + "epoch": 0.7076651982378854, + "grad_norm": 1.4746770670226905, + "learning_rate": 1.537026583957459e-05, + "loss": 0.7259848117828369, + "step": 2008 + }, + { + "epoch": 0.7080176211453745, + "grad_norm": 1.6674311554666914, + "learning_rate": 1.5365349982839723e-05, + "loss": 0.8370928764343262, + "step": 2009 + }, + { + "epoch": 0.7083700440528634, + "grad_norm": 1.3618230849109776, + "learning_rate": 1.536043230473551e-05, + "loss": 0.6041784882545471, + "step": 2010 + }, + { + "epoch": 0.7087224669603525, + "grad_norm": 1.4112680073946362, + "learning_rate": 1.535551280693135e-05, + "loss": 0.688548743724823, + "step": 2011 + }, + { + "epoch": 0.7090748898678414, + "grad_norm": 1.6056330275270763, + "learning_rate": 1.5350591491097265e-05, + "loss": 0.573681652545929, + "step": 2012 + }, + { + "epoch": 0.7094273127753304, + "grad_norm": 2.0956667904129636, + "learning_rate": 1.5345668358903886e-05, + "loss": 0.6919670104980469, + "step": 2013 + }, + { + "epoch": 0.7097797356828194, + "grad_norm": 1.6440284625605202, + "learning_rate": 1.534074341202246e-05, + "loss": 0.6693999767303467, + "step": 2014 + }, + { + "epoch": 0.7101321585903083, + "grad_norm": 1.5023686452775393, + "learning_rate": 1.533581665212486e-05, + "loss": 0.7204093337059021, + "step": 2015 + }, + { + "epoch": 0.7104845814977974, + "grad_norm": 1.7353596990699613, + "learning_rate": 1.5330888080883555e-05, + "loss": 0.6196314096450806, + "step": 2016 + }, + { + "epoch": 0.7108370044052863, + "grad_norm": 1.4190743094269347, + "learning_rate": 1.5325957699971657e-05, + "loss": 0.7292872071266174, + "step": 2017 + }, + { + "epoch": 0.7111894273127753, + "grad_norm": 1.7578012075664924, + "learning_rate": 1.532102551106287e-05, + "loss": 0.7514410018920898, + "step": 2018 + }, + { + "epoch": 0.7115418502202643, + "grad_norm": 1.329552917806312, + "learning_rate": 1.531609151583152e-05, + "loss": 0.7683345079421997, + "step": 2019 + }, + { + "epoch": 0.7118942731277533, + "grad_norm": 1.8323846391695044, + "learning_rate": 1.5311155715952536e-05, + "loss": 0.6994156837463379, + "step": 2020 + }, + { + "epoch": 0.7122466960352423, + "grad_norm": 1.3407977210543047, + "learning_rate": 1.5306218113101482e-05, + "loss": 0.5530328750610352, + "step": 2021 + }, + { + "epoch": 0.7125991189427313, + "grad_norm": 1.6814720781682417, + "learning_rate": 1.530127870895451e-05, + "loss": 0.6126301884651184, + "step": 2022 + }, + { + "epoch": 0.7129515418502202, + "grad_norm": 1.9618212705640916, + "learning_rate": 1.5296337505188403e-05, + "loss": 0.7514982223510742, + "step": 2023 + }, + { + "epoch": 0.7133039647577093, + "grad_norm": 1.742411408925072, + "learning_rate": 1.529139450348054e-05, + "loss": 0.7087191939353943, + "step": 2024 + }, + { + "epoch": 0.7136563876651982, + "grad_norm": 1.3195305972662899, + "learning_rate": 1.5286449705508914e-05, + "loss": 0.5713562965393066, + "step": 2025 + }, + { + "epoch": 0.7140088105726872, + "grad_norm": 1.3621779724967453, + "learning_rate": 1.5281503112952136e-05, + "loss": 0.6796679496765137, + "step": 2026 + }, + { + "epoch": 0.7143612334801762, + "grad_norm": 1.8247081007192694, + "learning_rate": 1.5276554727489415e-05, + "loss": 0.7902421355247498, + "step": 2027 + }, + { + "epoch": 0.7147136563876652, + "grad_norm": 1.3608050254188053, + "learning_rate": 1.527160455080058e-05, + "loss": 0.6645491123199463, + "step": 2028 + }, + { + "epoch": 0.7150660792951542, + "grad_norm": 1.489658346292968, + "learning_rate": 1.5266652584566056e-05, + "loss": 0.6077255606651306, + "step": 2029 + }, + { + "epoch": 0.7154185022026431, + "grad_norm": 1.412193602346091, + "learning_rate": 1.5261698830466888e-05, + "loss": 0.6219078302383423, + "step": 2030 + }, + { + "epoch": 0.7157709251101322, + "grad_norm": 1.280704281307457, + "learning_rate": 1.5256743290184713e-05, + "loss": 0.5895035266876221, + "step": 2031 + }, + { + "epoch": 0.7161233480176211, + "grad_norm": 1.497416305314063, + "learning_rate": 1.5251785965401786e-05, + "loss": 0.6735520958900452, + "step": 2032 + }, + { + "epoch": 0.7164757709251102, + "grad_norm": 1.353147232010895, + "learning_rate": 1.524682685780097e-05, + "loss": 0.6212488412857056, + "step": 2033 + }, + { + "epoch": 0.7168281938325991, + "grad_norm": 1.5786628078958613, + "learning_rate": 1.524186596906572e-05, + "loss": 0.7181172966957092, + "step": 2034 + }, + { + "epoch": 0.7171806167400882, + "grad_norm": 3.1301800941750906, + "learning_rate": 1.5236903300880107e-05, + "loss": 0.7156587839126587, + "step": 2035 + }, + { + "epoch": 0.7175330396475771, + "grad_norm": 1.513371130481219, + "learning_rate": 1.52319388549288e-05, + "loss": 0.6989034414291382, + "step": 2036 + }, + { + "epoch": 0.7178854625550661, + "grad_norm": 1.5183441818080943, + "learning_rate": 1.5226972632897079e-05, + "loss": 0.7224982976913452, + "step": 2037 + }, + { + "epoch": 0.7182378854625551, + "grad_norm": 1.5033480023563544, + "learning_rate": 1.522200463647082e-05, + "loss": 0.6871547698974609, + "step": 2038 + }, + { + "epoch": 0.718590308370044, + "grad_norm": 1.5898527901911406, + "learning_rate": 1.5217034867336498e-05, + "loss": 0.725049614906311, + "step": 2039 + }, + { + "epoch": 0.718942731277533, + "grad_norm": 2.079980258079047, + "learning_rate": 1.5212063327181197e-05, + "loss": 0.7105863094329834, + "step": 2040 + }, + { + "epoch": 0.719295154185022, + "grad_norm": 1.4720898042575539, + "learning_rate": 1.5207090017692605e-05, + "loss": 0.5823827981948853, + "step": 2041 + }, + { + "epoch": 0.719647577092511, + "grad_norm": 1.9166232714289464, + "learning_rate": 1.5202114940559005e-05, + "loss": 0.7087944746017456, + "step": 2042 + }, + { + "epoch": 0.72, + "grad_norm": 1.40676061171607, + "learning_rate": 1.5197138097469275e-05, + "loss": 0.6678824424743652, + "step": 2043 + }, + { + "epoch": 0.720352422907489, + "grad_norm": 1.8181396920642288, + "learning_rate": 1.5192159490112904e-05, + "loss": 0.7318846583366394, + "step": 2044 + }, + { + "epoch": 0.720704845814978, + "grad_norm": 1.4972370605408583, + "learning_rate": 1.5187179120179969e-05, + "loss": 0.7245825529098511, + "step": 2045 + }, + { + "epoch": 0.721057268722467, + "grad_norm": 1.8554569851295908, + "learning_rate": 1.5182196989361155e-05, + "loss": 0.7691583633422852, + "step": 2046 + }, + { + "epoch": 0.7214096916299559, + "grad_norm": 1.8926959198228865, + "learning_rate": 1.517721309934774e-05, + "loss": 0.7961187362670898, + "step": 2047 + }, + { + "epoch": 0.721762114537445, + "grad_norm": 1.4465824812635413, + "learning_rate": 1.51722274518316e-05, + "loss": 0.7163759469985962, + "step": 2048 + }, + { + "epoch": 0.7221145374449339, + "grad_norm": 1.5931659235074929, + "learning_rate": 1.51672400485052e-05, + "loss": 0.6807754039764404, + "step": 2049 + }, + { + "epoch": 0.7224669603524229, + "grad_norm": 1.6629043788678177, + "learning_rate": 1.516225089106162e-05, + "loss": 0.7026433348655701, + "step": 2050 + }, + { + "epoch": 0.7228193832599119, + "grad_norm": 1.5979782761024863, + "learning_rate": 1.5157259981194514e-05, + "loss": 0.8230476379394531, + "step": 2051 + }, + { + "epoch": 0.7231718061674008, + "grad_norm": 1.7451468269512191, + "learning_rate": 1.5152267320598149e-05, + "loss": 0.6466805934906006, + "step": 2052 + }, + { + "epoch": 0.7235242290748899, + "grad_norm": 1.441654513994546, + "learning_rate": 1.5147272910967368e-05, + "loss": 0.7203368544578552, + "step": 2053 + }, + { + "epoch": 0.7238766519823788, + "grad_norm": 1.3552926542352444, + "learning_rate": 1.5142276753997627e-05, + "loss": 0.6455702781677246, + "step": 2054 + }, + { + "epoch": 0.7242290748898679, + "grad_norm": 1.4569594560235375, + "learning_rate": 1.5137278851384958e-05, + "loss": 0.609260082244873, + "step": 2055 + }, + { + "epoch": 0.7245814977973568, + "grad_norm": 1.8083723333355965, + "learning_rate": 1.5132279204826e-05, + "loss": 0.8320673704147339, + "step": 2056 + }, + { + "epoch": 0.7249339207048459, + "grad_norm": 1.5846751172626037, + "learning_rate": 1.512727781601797e-05, + "loss": 0.8497718572616577, + "step": 2057 + }, + { + "epoch": 0.7252863436123348, + "grad_norm": 1.3523103900088498, + "learning_rate": 1.5122274686658695e-05, + "loss": 0.6398370265960693, + "step": 2058 + }, + { + "epoch": 0.7256387665198238, + "grad_norm": 1.4475161405549521, + "learning_rate": 1.511726981844657e-05, + "loss": 0.7562476396560669, + "step": 2059 + }, + { + "epoch": 0.7259911894273128, + "grad_norm": 1.8369611551341436, + "learning_rate": 1.51122632130806e-05, + "loss": 0.7948570251464844, + "step": 2060 + }, + { + "epoch": 0.7263436123348017, + "grad_norm": 1.9057892039367437, + "learning_rate": 1.5107254872260366e-05, + "loss": 0.7062652111053467, + "step": 2061 + }, + { + "epoch": 0.7266960352422908, + "grad_norm": 1.666793884988277, + "learning_rate": 1.5102244797686049e-05, + "loss": 0.6290205717086792, + "step": 2062 + }, + { + "epoch": 0.7270484581497797, + "grad_norm": 1.7111515682842917, + "learning_rate": 1.5097232991058409e-05, + "loss": 0.727097749710083, + "step": 2063 + }, + { + "epoch": 0.7274008810572687, + "grad_norm": 1.6005396217530683, + "learning_rate": 1.5092219454078803e-05, + "loss": 0.783380389213562, + "step": 2064 + }, + { + "epoch": 0.7277533039647577, + "grad_norm": 1.4872748126751951, + "learning_rate": 1.5087204188449165e-05, + "loss": 0.6190629601478577, + "step": 2065 + }, + { + "epoch": 0.7281057268722467, + "grad_norm": 1.5426042958975894, + "learning_rate": 1.5082187195872026e-05, + "loss": 0.6749798059463501, + "step": 2066 + }, + { + "epoch": 0.7284581497797357, + "grad_norm": 1.524694880675492, + "learning_rate": 1.5077168478050494e-05, + "loss": 0.6581153273582458, + "step": 2067 + }, + { + "epoch": 0.7288105726872247, + "grad_norm": 1.433767292714838, + "learning_rate": 1.5072148036688279e-05, + "loss": 0.6886252760887146, + "step": 2068 + }, + { + "epoch": 0.7291629955947136, + "grad_norm": 1.651630016781231, + "learning_rate": 1.506712587348965e-05, + "loss": 0.6893814206123352, + "step": 2069 + }, + { + "epoch": 0.7295154185022027, + "grad_norm": 1.7840073958291343, + "learning_rate": 1.5062101990159486e-05, + "loss": 0.8242654800415039, + "step": 2070 + }, + { + "epoch": 0.7298678414096916, + "grad_norm": 1.4785860236042563, + "learning_rate": 1.5057076388403229e-05, + "loss": 0.6331228017807007, + "step": 2071 + }, + { + "epoch": 0.7302202643171806, + "grad_norm": 1.999658994203056, + "learning_rate": 1.5052049069926927e-05, + "loss": 0.6440649032592773, + "step": 2072 + }, + { + "epoch": 0.7305726872246696, + "grad_norm": 1.4709264297577982, + "learning_rate": 1.5047020036437187e-05, + "loss": 0.7575498819351196, + "step": 2073 + }, + { + "epoch": 0.7309251101321586, + "grad_norm": 1.8032604054381702, + "learning_rate": 1.5041989289641215e-05, + "loss": 0.7530438899993896, + "step": 2074 + }, + { + "epoch": 0.7312775330396476, + "grad_norm": 1.5344556457224068, + "learning_rate": 1.5036956831246792e-05, + "loss": 0.6035616397857666, + "step": 2075 + }, + { + "epoch": 0.7316299559471365, + "grad_norm": 1.5603807233808964, + "learning_rate": 1.5031922662962279e-05, + "loss": 0.8199492692947388, + "step": 2076 + }, + { + "epoch": 0.7319823788546256, + "grad_norm": 1.4221584765379676, + "learning_rate": 1.5026886786496624e-05, + "loss": 0.7700716257095337, + "step": 2077 + }, + { + "epoch": 0.7323348017621145, + "grad_norm": 1.363028657258907, + "learning_rate": 1.5021849203559347e-05, + "loss": 0.6147816777229309, + "step": 2078 + }, + { + "epoch": 0.7326872246696036, + "grad_norm": 1.5628142146943151, + "learning_rate": 1.5016809915860549e-05, + "loss": 0.6841654777526855, + "step": 2079 + }, + { + "epoch": 0.7330396475770925, + "grad_norm": 1.7910877668379601, + "learning_rate": 1.5011768925110915e-05, + "loss": 0.7212510108947754, + "step": 2080 + }, + { + "epoch": 0.7333920704845815, + "grad_norm": 1.5222211216380177, + "learning_rate": 1.5006726233021702e-05, + "loss": 0.6695969104766846, + "step": 2081 + }, + { + "epoch": 0.7337444933920705, + "grad_norm": 1.391558192885713, + "learning_rate": 1.500168184130475e-05, + "loss": 0.5991939306259155, + "step": 2082 + }, + { + "epoch": 0.7340969162995594, + "grad_norm": 1.4191544168706896, + "learning_rate": 1.4996635751672467e-05, + "loss": 0.7127671241760254, + "step": 2083 + }, + { + "epoch": 0.7344493392070485, + "grad_norm": 1.6905086418980109, + "learning_rate": 1.4991587965837853e-05, + "loss": 0.6874737739562988, + "step": 2084 + }, + { + "epoch": 0.7348017621145374, + "grad_norm": 1.3584519480933235, + "learning_rate": 1.4986538485514466e-05, + "loss": 0.6695086359977722, + "step": 2085 + }, + { + "epoch": 0.7351541850220265, + "grad_norm": 1.694264564137899, + "learning_rate": 1.4981487312416452e-05, + "loss": 0.8366880416870117, + "step": 2086 + }, + { + "epoch": 0.7355066079295154, + "grad_norm": 1.4589826786561007, + "learning_rate": 1.4976434448258519e-05, + "loss": 0.6448042988777161, + "step": 2087 + }, + { + "epoch": 0.7358590308370044, + "grad_norm": 1.8583566766216881, + "learning_rate": 1.4971379894755969e-05, + "loss": 0.7015181183815002, + "step": 2088 + }, + { + "epoch": 0.7362114537444934, + "grad_norm": 1.702091122213854, + "learning_rate": 1.4966323653624657e-05, + "loss": 0.6842815279960632, + "step": 2089 + }, + { + "epoch": 0.7365638766519824, + "grad_norm": 1.7134163669939546, + "learning_rate": 1.4961265726581025e-05, + "loss": 0.6866877675056458, + "step": 2090 + }, + { + "epoch": 0.7369162995594714, + "grad_norm": 1.537334961209543, + "learning_rate": 1.4956206115342076e-05, + "loss": 0.5486865043640137, + "step": 2091 + }, + { + "epoch": 0.7372687224669604, + "grad_norm": 1.7196744065626985, + "learning_rate": 1.4951144821625396e-05, + "loss": 0.7241986989974976, + "step": 2092 + }, + { + "epoch": 0.7376211453744493, + "grad_norm": 1.647893211532232, + "learning_rate": 1.4946081847149134e-05, + "loss": 0.8400537967681885, + "step": 2093 + }, + { + "epoch": 0.7379735682819383, + "grad_norm": 2.2262132208657146, + "learning_rate": 1.4941017193632013e-05, + "loss": 0.6050147414207458, + "step": 2094 + }, + { + "epoch": 0.7383259911894273, + "grad_norm": 1.337421477916073, + "learning_rate": 1.4935950862793322e-05, + "loss": 0.6744229197502136, + "step": 2095 + }, + { + "epoch": 0.7386784140969163, + "grad_norm": 1.4345512538147223, + "learning_rate": 1.493088285635293e-05, + "loss": 0.6902294158935547, + "step": 2096 + }, + { + "epoch": 0.7390308370044053, + "grad_norm": 1.8712136012401615, + "learning_rate": 1.492581317603126e-05, + "loss": 0.6328809261322021, + "step": 2097 + }, + { + "epoch": 0.7393832599118942, + "grad_norm": 1.4287618993627116, + "learning_rate": 1.4920741823549316e-05, + "loss": 0.5740914344787598, + "step": 2098 + }, + { + "epoch": 0.7397356828193833, + "grad_norm": 2.181624869430245, + "learning_rate": 1.491566880062866e-05, + "loss": 0.676064133644104, + "step": 2099 + }, + { + "epoch": 0.7400881057268722, + "grad_norm": 1.5152586818427025, + "learning_rate": 1.4910594108991427e-05, + "loss": 0.655153751373291, + "step": 2100 + }, + { + "epoch": 0.7404405286343613, + "grad_norm": 1.7534591753196083, + "learning_rate": 1.4905517750360321e-05, + "loss": 0.7406177520751953, + "step": 2101 + }, + { + "epoch": 0.7407929515418502, + "grad_norm": 1.777307095945404, + "learning_rate": 1.4900439726458602e-05, + "loss": 0.6568606495857239, + "step": 2102 + }, + { + "epoch": 0.7411453744493393, + "grad_norm": 1.661203262476052, + "learning_rate": 1.4895360039010101e-05, + "loss": 0.8073545098304749, + "step": 2103 + }, + { + "epoch": 0.7414977973568282, + "grad_norm": 1.6727123321226325, + "learning_rate": 1.4890278689739219e-05, + "loss": 0.6350502967834473, + "step": 2104 + }, + { + "epoch": 0.7418502202643171, + "grad_norm": 1.475293376760879, + "learning_rate": 1.4885195680370915e-05, + "loss": 0.6419750452041626, + "step": 2105 + }, + { + "epoch": 0.7422026431718062, + "grad_norm": 1.5480091112446772, + "learning_rate": 1.4880111012630706e-05, + "loss": 0.72661292552948, + "step": 2106 + }, + { + "epoch": 0.7425550660792951, + "grad_norm": 1.5125479406066336, + "learning_rate": 1.4875024688244683e-05, + "loss": 0.6996778845787048, + "step": 2107 + }, + { + "epoch": 0.7429074889867842, + "grad_norm": 1.7343888178448454, + "learning_rate": 1.4869936708939497e-05, + "loss": 0.8383389711380005, + "step": 2108 + }, + { + "epoch": 0.7432599118942731, + "grad_norm": 1.6950461405964057, + "learning_rate": 1.4864847076442358e-05, + "loss": 0.6863676905632019, + "step": 2109 + }, + { + "epoch": 0.7436123348017621, + "grad_norm": 1.781136801701718, + "learning_rate": 1.4859755792481032e-05, + "loss": 0.8493780493736267, + "step": 2110 + }, + { + "epoch": 0.7439647577092511, + "grad_norm": 1.3754571175527768, + "learning_rate": 1.4854662858783857e-05, + "loss": 0.6172446012496948, + "step": 2111 + }, + { + "epoch": 0.7443171806167401, + "grad_norm": 6.860121931549926, + "learning_rate": 1.4849568277079724e-05, + "loss": 0.8390353918075562, + "step": 2112 + }, + { + "epoch": 0.7446696035242291, + "grad_norm": 1.8563178731324264, + "learning_rate": 1.4844472049098087e-05, + "loss": 0.7108968496322632, + "step": 2113 + }, + { + "epoch": 0.7450220264317181, + "grad_norm": 1.5680406370173388, + "learning_rate": 1.4839374176568956e-05, + "loss": 0.7322912812232971, + "step": 2114 + }, + { + "epoch": 0.745374449339207, + "grad_norm": 1.5999840343791083, + "learning_rate": 1.4834274661222896e-05, + "loss": 0.6371238231658936, + "step": 2115 + }, + { + "epoch": 0.745726872246696, + "grad_norm": 1.6793360349519253, + "learning_rate": 1.4829173504791035e-05, + "loss": 0.8346511125564575, + "step": 2116 + }, + { + "epoch": 0.746079295154185, + "grad_norm": 1.5530745059154032, + "learning_rate": 1.4824070709005063e-05, + "loss": 0.5893645286560059, + "step": 2117 + }, + { + "epoch": 0.746431718061674, + "grad_norm": 1.298803943907695, + "learning_rate": 1.4818966275597213e-05, + "loss": 0.60541832447052, + "step": 2118 + }, + { + "epoch": 0.746784140969163, + "grad_norm": 2.0046684565684108, + "learning_rate": 1.4813860206300286e-05, + "loss": 0.5823955535888672, + "step": 2119 + }, + { + "epoch": 0.747136563876652, + "grad_norm": 1.8094924676670123, + "learning_rate": 1.480875250284763e-05, + "loss": 0.6751007437705994, + "step": 2120 + }, + { + "epoch": 0.747488986784141, + "grad_norm": 1.5760168475146599, + "learning_rate": 1.4803643166973155e-05, + "loss": 0.6878843307495117, + "step": 2121 + }, + { + "epoch": 0.7478414096916299, + "grad_norm": 1.4061876649605263, + "learning_rate": 1.4798532200411319e-05, + "loss": 0.6732173562049866, + "step": 2122 + }, + { + "epoch": 0.748193832599119, + "grad_norm": 1.558565097379613, + "learning_rate": 1.479341960489714e-05, + "loss": 0.6383658647537231, + "step": 2123 + }, + { + "epoch": 0.7485462555066079, + "grad_norm": 1.8120908321553708, + "learning_rate": 1.4788305382166174e-05, + "loss": 0.7444638013839722, + "step": 2124 + }, + { + "epoch": 0.748898678414097, + "grad_norm": 1.7437949253948153, + "learning_rate": 1.4783189533954555e-05, + "loss": 0.5492427349090576, + "step": 2125 + }, + { + "epoch": 0.7492511013215859, + "grad_norm": 1.60343309806789, + "learning_rate": 1.4778072061998944e-05, + "loss": 0.6193333864212036, + "step": 2126 + }, + { + "epoch": 0.7496035242290748, + "grad_norm": 2.019729643045431, + "learning_rate": 1.4772952968036572e-05, + "loss": 0.853213906288147, + "step": 2127 + }, + { + "epoch": 0.7499559471365639, + "grad_norm": 1.4306248677016198, + "learning_rate": 1.4767832253805203e-05, + "loss": 0.6128672361373901, + "step": 2128 + }, + { + "epoch": 0.7503083700440528, + "grad_norm": 1.7550432779472305, + "learning_rate": 1.4762709921043166e-05, + "loss": 0.7298723459243774, + "step": 2129 + }, + { + "epoch": 0.7506607929515419, + "grad_norm": 1.3773404123246435, + "learning_rate": 1.475758597148933e-05, + "loss": 0.6578782796859741, + "step": 2130 + }, + { + "epoch": 0.7510132158590308, + "grad_norm": 1.6603784675007325, + "learning_rate": 1.4752460406883122e-05, + "loss": 0.6490681171417236, + "step": 2131 + }, + { + "epoch": 0.7513656387665198, + "grad_norm": 1.530112138397779, + "learning_rate": 1.4747333228964502e-05, + "loss": 0.657980740070343, + "step": 2132 + }, + { + "epoch": 0.7517180616740088, + "grad_norm": 1.9937499661396574, + "learning_rate": 1.4742204439473999e-05, + "loss": 0.8431578874588013, + "step": 2133 + }, + { + "epoch": 0.7520704845814978, + "grad_norm": 1.7351787739786175, + "learning_rate": 1.4737074040152667e-05, + "loss": 0.7217377424240112, + "step": 2134 + }, + { + "epoch": 0.7524229074889868, + "grad_norm": 2.232953474209366, + "learning_rate": 1.4731942032742127e-05, + "loss": 0.6299912333488464, + "step": 2135 + }, + { + "epoch": 0.7527753303964758, + "grad_norm": 1.6053563211063129, + "learning_rate": 1.4726808418984527e-05, + "loss": 0.6325603723526001, + "step": 2136 + }, + { + "epoch": 0.7531277533039648, + "grad_norm": 1.7427287871247603, + "learning_rate": 1.4721673200622572e-05, + "loss": 0.6785098314285278, + "step": 2137 + }, + { + "epoch": 0.7534801762114537, + "grad_norm": 2.5780020778792068, + "learning_rate": 1.471653637939951e-05, + "loss": 0.7311918139457703, + "step": 2138 + }, + { + "epoch": 0.7538325991189427, + "grad_norm": 1.498799685922224, + "learning_rate": 1.4711397957059132e-05, + "loss": 0.7117096781730652, + "step": 2139 + }, + { + "epoch": 0.7541850220264317, + "grad_norm": 1.4519847744536865, + "learning_rate": 1.4706257935345772e-05, + "loss": 0.6709408760070801, + "step": 2140 + }, + { + "epoch": 0.7545374449339207, + "grad_norm": 1.9629689982019365, + "learning_rate": 1.4701116316004307e-05, + "loss": 0.6478008031845093, + "step": 2141 + }, + { + "epoch": 0.7548898678414097, + "grad_norm": 1.5362345610055923, + "learning_rate": 1.4695973100780154e-05, + "loss": 0.6414140462875366, + "step": 2142 + }, + { + "epoch": 0.7552422907488987, + "grad_norm": 1.7088547501964069, + "learning_rate": 1.4690828291419283e-05, + "loss": 0.6947815418243408, + "step": 2143 + }, + { + "epoch": 0.7555947136563876, + "grad_norm": 1.6244554419934112, + "learning_rate": 1.4685681889668187e-05, + "loss": 0.6614837646484375, + "step": 2144 + }, + { + "epoch": 0.7559471365638767, + "grad_norm": 1.87010430937903, + "learning_rate": 1.4680533897273913e-05, + "loss": 0.7803678512573242, + "step": 2145 + }, + { + "epoch": 0.7562995594713656, + "grad_norm": 1.975192105020327, + "learning_rate": 1.4675384315984045e-05, + "loss": 0.8411567211151123, + "step": 2146 + }, + { + "epoch": 0.7566519823788547, + "grad_norm": 2.4329758477488177, + "learning_rate": 1.4670233147546708e-05, + "loss": 0.8379243016242981, + "step": 2147 + }, + { + "epoch": 0.7570044052863436, + "grad_norm": 1.6153137773652926, + "learning_rate": 1.4665080393710558e-05, + "loss": 0.6419194936752319, + "step": 2148 + }, + { + "epoch": 0.7573568281938327, + "grad_norm": 1.8383077301350303, + "learning_rate": 1.4659926056224798e-05, + "loss": 0.7791979908943176, + "step": 2149 + }, + { + "epoch": 0.7577092511013216, + "grad_norm": 1.72203201226436, + "learning_rate": 1.465477013683916e-05, + "loss": 0.7237389087677002, + "step": 2150 + }, + { + "epoch": 0.7580616740088105, + "grad_norm": 1.5129431088418641, + "learning_rate": 1.464961263730393e-05, + "loss": 0.6750755906105042, + "step": 2151 + }, + { + "epoch": 0.7584140969162996, + "grad_norm": 1.3799525283393634, + "learning_rate": 1.4644453559369904e-05, + "loss": 0.5412150621414185, + "step": 2152 + }, + { + "epoch": 0.7587665198237885, + "grad_norm": 1.7752121571388841, + "learning_rate": 1.463929290478844e-05, + "loss": 0.7009850740432739, + "step": 2153 + }, + { + "epoch": 0.7591189427312776, + "grad_norm": 1.5166585489574307, + "learning_rate": 1.4634130675311411e-05, + "loss": 0.8678998351097107, + "step": 2154 + }, + { + "epoch": 0.7594713656387665, + "grad_norm": 2.0127463717616347, + "learning_rate": 1.4628966872691241e-05, + "loss": 0.7395705580711365, + "step": 2155 + }, + { + "epoch": 0.7598237885462555, + "grad_norm": 1.5739842401493016, + "learning_rate": 1.4623801498680875e-05, + "loss": 0.5950812101364136, + "step": 2156 + }, + { + "epoch": 0.7601762114537445, + "grad_norm": 1.6474041176538503, + "learning_rate": 1.46186345550338e-05, + "loss": 0.7133630514144897, + "step": 2157 + }, + { + "epoch": 0.7605286343612335, + "grad_norm": 1.4644647660974064, + "learning_rate": 1.4613466043504026e-05, + "loss": 0.7551965117454529, + "step": 2158 + }, + { + "epoch": 0.7608810572687225, + "grad_norm": 1.4284086636489846, + "learning_rate": 1.4608295965846111e-05, + "loss": 0.6654022932052612, + "step": 2159 + }, + { + "epoch": 0.7612334801762115, + "grad_norm": 3.5518990487711126, + "learning_rate": 1.460312432381513e-05, + "loss": 0.8081967830657959, + "step": 2160 + }, + { + "epoch": 0.7615859030837004, + "grad_norm": 1.8113760087057564, + "learning_rate": 1.4597951119166696e-05, + "loss": 0.7478348016738892, + "step": 2161 + }, + { + "epoch": 0.7619383259911894, + "grad_norm": 2.9384500423152833, + "learning_rate": 1.4592776353656948e-05, + "loss": 0.7866748571395874, + "step": 2162 + }, + { + "epoch": 0.7622907488986784, + "grad_norm": 1.4185631764668494, + "learning_rate": 1.4587600029042563e-05, + "loss": 0.6675869226455688, + "step": 2163 + }, + { + "epoch": 0.7626431718061674, + "grad_norm": 1.934904377243222, + "learning_rate": 1.4582422147080739e-05, + "loss": 0.6881103515625, + "step": 2164 + }, + { + "epoch": 0.7629955947136564, + "grad_norm": 1.6886719056667128, + "learning_rate": 1.457724270952921e-05, + "loss": 0.7298593521118164, + "step": 2165 + }, + { + "epoch": 0.7633480176211453, + "grad_norm": 1.5123877451607526, + "learning_rate": 1.4572061718146224e-05, + "loss": 0.7102776765823364, + "step": 2166 + }, + { + "epoch": 0.7637004405286344, + "grad_norm": 1.6706836844885837, + "learning_rate": 1.4566879174690576e-05, + "loss": 0.7767213582992554, + "step": 2167 + }, + { + "epoch": 0.7640528634361233, + "grad_norm": 1.4702267439170456, + "learning_rate": 1.4561695080921573e-05, + "loss": 0.7480257749557495, + "step": 2168 + }, + { + "epoch": 0.7644052863436124, + "grad_norm": 1.4326376726611632, + "learning_rate": 1.4556509438599057e-05, + "loss": 0.7419564723968506, + "step": 2169 + }, + { + "epoch": 0.7647577092511013, + "grad_norm": 1.4787079836022163, + "learning_rate": 1.4551322249483388e-05, + "loss": 0.6820264458656311, + "step": 2170 + }, + { + "epoch": 0.7651101321585904, + "grad_norm": 1.3819947250134947, + "learning_rate": 1.4546133515335462e-05, + "loss": 0.5947732329368591, + "step": 2171 + }, + { + "epoch": 0.7654625550660793, + "grad_norm": 1.6478975280830812, + "learning_rate": 1.4540943237916685e-05, + "loss": 0.6772021055221558, + "step": 2172 + }, + { + "epoch": 0.7658149779735682, + "grad_norm": 1.7643629263201115, + "learning_rate": 1.4535751418989e-05, + "loss": 0.7822210192680359, + "step": 2173 + }, + { + "epoch": 0.7661674008810573, + "grad_norm": 1.6079996302057808, + "learning_rate": 1.4530558060314866e-05, + "loss": 0.6208021640777588, + "step": 2174 + }, + { + "epoch": 0.7665198237885462, + "grad_norm": 1.5681481752797541, + "learning_rate": 1.4525363163657264e-05, + "loss": 0.8017063140869141, + "step": 2175 + }, + { + "epoch": 0.7668722466960353, + "grad_norm": 1.4681783580715917, + "learning_rate": 1.4520166730779704e-05, + "loss": 0.738383948802948, + "step": 2176 + }, + { + "epoch": 0.7672246696035242, + "grad_norm": 1.742058488341915, + "learning_rate": 1.4514968763446213e-05, + "loss": 0.7698314785957336, + "step": 2177 + }, + { + "epoch": 0.7675770925110132, + "grad_norm": 1.7037031257568012, + "learning_rate": 1.4509769263421337e-05, + "loss": 0.789836049079895, + "step": 2178 + }, + { + "epoch": 0.7679295154185022, + "grad_norm": 1.8506345351591968, + "learning_rate": 1.4504568232470145e-05, + "loss": 0.6437339782714844, + "step": 2179 + }, + { + "epoch": 0.7682819383259912, + "grad_norm": 2.04999468198658, + "learning_rate": 1.4499365672358226e-05, + "loss": 0.6684735417366028, + "step": 2180 + }, + { + "epoch": 0.7686343612334802, + "grad_norm": 1.5077038126146909, + "learning_rate": 1.4494161584851687e-05, + "loss": 0.6577454805374146, + "step": 2181 + }, + { + "epoch": 0.7689867841409692, + "grad_norm": 1.3277471323795764, + "learning_rate": 1.4488955971717154e-05, + "loss": 0.5975776314735413, + "step": 2182 + }, + { + "epoch": 0.7693392070484582, + "grad_norm": 1.8819815707164231, + "learning_rate": 1.4483748834721767e-05, + "loss": 0.6385577917098999, + "step": 2183 + }, + { + "epoch": 0.7696916299559471, + "grad_norm": 1.4452778349053288, + "learning_rate": 1.4478540175633193e-05, + "loss": 0.6295928955078125, + "step": 2184 + }, + { + "epoch": 0.7700440528634361, + "grad_norm": 1.5790897154124113, + "learning_rate": 1.4473329996219605e-05, + "loss": 0.6848496198654175, + "step": 2185 + }, + { + "epoch": 0.7703964757709251, + "grad_norm": 1.410283277756768, + "learning_rate": 1.44681182982497e-05, + "loss": 0.6476501226425171, + "step": 2186 + }, + { + "epoch": 0.7707488986784141, + "grad_norm": 1.5220085975801703, + "learning_rate": 1.4462905083492683e-05, + "loss": 0.750103235244751, + "step": 2187 + }, + { + "epoch": 0.771101321585903, + "grad_norm": 1.3838063845924222, + "learning_rate": 1.4457690353718285e-05, + "loss": 0.668454110622406, + "step": 2188 + }, + { + "epoch": 0.7714537444933921, + "grad_norm": 1.3695000422583874, + "learning_rate": 1.4452474110696738e-05, + "loss": 0.6671048402786255, + "step": 2189 + }, + { + "epoch": 0.771806167400881, + "grad_norm": 1.404147919130693, + "learning_rate": 1.4447256356198797e-05, + "loss": 0.6261379718780518, + "step": 2190 + }, + { + "epoch": 0.7721585903083701, + "grad_norm": 1.6192228095415668, + "learning_rate": 1.4442037091995726e-05, + "loss": 0.6128308176994324, + "step": 2191 + }, + { + "epoch": 0.772511013215859, + "grad_norm": 1.629684954387357, + "learning_rate": 1.4436816319859306e-05, + "loss": 0.7709108591079712, + "step": 2192 + }, + { + "epoch": 0.7728634361233481, + "grad_norm": 1.7604991326643686, + "learning_rate": 1.4431594041561822e-05, + "loss": 0.6242028474807739, + "step": 2193 + }, + { + "epoch": 0.773215859030837, + "grad_norm": 1.7562103574700596, + "learning_rate": 1.4426370258876079e-05, + "loss": 0.8030718564987183, + "step": 2194 + }, + { + "epoch": 0.7735682819383259, + "grad_norm": 1.5182882363444798, + "learning_rate": 1.4421144973575386e-05, + "loss": 0.7785710692405701, + "step": 2195 + }, + { + "epoch": 0.773920704845815, + "grad_norm": 1.5453752656669346, + "learning_rate": 1.4415918187433564e-05, + "loss": 0.6846014857292175, + "step": 2196 + }, + { + "epoch": 0.7742731277533039, + "grad_norm": 1.6007643935951585, + "learning_rate": 1.4410689902224947e-05, + "loss": 0.7883827686309814, + "step": 2197 + }, + { + "epoch": 0.774625550660793, + "grad_norm": 2.0453745328196065, + "learning_rate": 1.4405460119724377e-05, + "loss": 0.8285650610923767, + "step": 2198 + }, + { + "epoch": 0.7749779735682819, + "grad_norm": 1.5026043059194256, + "learning_rate": 1.4400228841707193e-05, + "loss": 0.6101093292236328, + "step": 2199 + }, + { + "epoch": 0.775330396475771, + "grad_norm": 1.4888885445589903, + "learning_rate": 1.4394996069949262e-05, + "loss": 0.6627891063690186, + "step": 2200 + }, + { + "epoch": 0.7756828193832599, + "grad_norm": 1.4487650646569075, + "learning_rate": 1.4389761806226943e-05, + "loss": 0.6755822896957397, + "step": 2201 + }, + { + "epoch": 0.7760352422907489, + "grad_norm": 1.438634659048083, + "learning_rate": 1.4384526052317106e-05, + "loss": 0.6718465089797974, + "step": 2202 + }, + { + "epoch": 0.7763876651982379, + "grad_norm": 1.4171659147035778, + "learning_rate": 1.4379288809997121e-05, + "loss": 0.5857758522033691, + "step": 2203 + }, + { + "epoch": 0.7767400881057269, + "grad_norm": 1.1200186604200135, + "learning_rate": 1.4374050081044876e-05, + "loss": 0.5861783027648926, + "step": 2204 + }, + { + "epoch": 0.7770925110132159, + "grad_norm": 1.442532656158601, + "learning_rate": 1.4368809867238754e-05, + "loss": 0.6862374544143677, + "step": 2205 + }, + { + "epoch": 0.7774449339207048, + "grad_norm": 1.6455201954220524, + "learning_rate": 1.4363568170357646e-05, + "loss": 0.6787701845169067, + "step": 2206 + }, + { + "epoch": 0.7777973568281938, + "grad_norm": 1.4101038203667695, + "learning_rate": 1.435832499218094e-05, + "loss": 0.5671687126159668, + "step": 2207 + }, + { + "epoch": 0.7781497797356828, + "grad_norm": 1.5479554264257531, + "learning_rate": 1.435308033448854e-05, + "loss": 0.8243429064750671, + "step": 2208 + }, + { + "epoch": 0.7785022026431718, + "grad_norm": 1.3676716888852272, + "learning_rate": 1.4347834199060835e-05, + "loss": 0.5880655646324158, + "step": 2209 + }, + { + "epoch": 0.7788546255506608, + "grad_norm": 2.451624357800272, + "learning_rate": 1.4342586587678734e-05, + "loss": 0.7085679769515991, + "step": 2210 + }, + { + "epoch": 0.7792070484581498, + "grad_norm": 1.546990179750224, + "learning_rate": 1.4337337502123627e-05, + "loss": 0.7011853456497192, + "step": 2211 + }, + { + "epoch": 0.7795594713656387, + "grad_norm": 1.6003260447933962, + "learning_rate": 1.4332086944177426e-05, + "loss": 0.755327582359314, + "step": 2212 + }, + { + "epoch": 0.7799118942731278, + "grad_norm": 1.3917359947430683, + "learning_rate": 1.4326834915622522e-05, + "loss": 0.7152736186981201, + "step": 2213 + }, + { + "epoch": 0.7802643171806167, + "grad_norm": 1.3821995576878587, + "learning_rate": 1.4321581418241825e-05, + "loss": 0.6744083166122437, + "step": 2214 + }, + { + "epoch": 0.7806167400881058, + "grad_norm": 1.5294456027931242, + "learning_rate": 1.4316326453818728e-05, + "loss": 0.6112288236618042, + "step": 2215 + }, + { + "epoch": 0.7809691629955947, + "grad_norm": 1.2620758120071194, + "learning_rate": 1.4311070024137128e-05, + "loss": 0.5569246411323547, + "step": 2216 + }, + { + "epoch": 0.7813215859030836, + "grad_norm": 1.474883531826743, + "learning_rate": 1.4305812130981418e-05, + "loss": 0.6214494705200195, + "step": 2217 + }, + { + "epoch": 0.7816740088105727, + "grad_norm": 1.4094788075709526, + "learning_rate": 1.4300552776136497e-05, + "loss": 0.5401003956794739, + "step": 2218 + }, + { + "epoch": 0.7820264317180616, + "grad_norm": 1.433294268920241, + "learning_rate": 1.4295291961387742e-05, + "loss": 0.5128720998764038, + "step": 2219 + }, + { + "epoch": 0.7823788546255507, + "grad_norm": 1.352265751544302, + "learning_rate": 1.4290029688521043e-05, + "loss": 0.5495916604995728, + "step": 2220 + }, + { + "epoch": 0.7827312775330396, + "grad_norm": 1.6131865642068703, + "learning_rate": 1.4284765959322772e-05, + "loss": 0.628544807434082, + "step": 2221 + }, + { + "epoch": 0.7830837004405287, + "grad_norm": 1.443784571277232, + "learning_rate": 1.427950077557981e-05, + "loss": 0.7171294689178467, + "step": 2222 + }, + { + "epoch": 0.7834361233480176, + "grad_norm": 1.3723589201513293, + "learning_rate": 1.4274234139079513e-05, + "loss": 0.7436389327049255, + "step": 2223 + }, + { + "epoch": 0.7837885462555066, + "grad_norm": 1.5295286402885273, + "learning_rate": 1.426896605160975e-05, + "loss": 0.7154244780540466, + "step": 2224 + }, + { + "epoch": 0.7841409691629956, + "grad_norm": 1.4385555847293963, + "learning_rate": 1.426369651495886e-05, + "loss": 0.6433268189430237, + "step": 2225 + }, + { + "epoch": 0.7844933920704846, + "grad_norm": 1.4177681718218336, + "learning_rate": 1.4258425530915703e-05, + "loss": 0.6612321734428406, + "step": 2226 + }, + { + "epoch": 0.7848458149779736, + "grad_norm": 1.962010974229914, + "learning_rate": 1.42531531012696e-05, + "loss": 0.6384811401367188, + "step": 2227 + }, + { + "epoch": 0.7851982378854625, + "grad_norm": 1.4927220821701634, + "learning_rate": 1.4247879227810384e-05, + "loss": 0.5592762231826782, + "step": 2228 + }, + { + "epoch": 0.7855506607929515, + "grad_norm": 1.6376570609433725, + "learning_rate": 1.4242603912328367e-05, + "loss": 0.6904512643814087, + "step": 2229 + }, + { + "epoch": 0.7859030837004405, + "grad_norm": 1.7784965930873091, + "learning_rate": 1.4237327156614358e-05, + "loss": 0.7165266871452332, + "step": 2230 + }, + { + "epoch": 0.7862555066079295, + "grad_norm": 1.6275397333714936, + "learning_rate": 1.423204896245965e-05, + "loss": 0.8567172288894653, + "step": 2231 + }, + { + "epoch": 0.7866079295154185, + "grad_norm": 1.6554990252792119, + "learning_rate": 1.4226769331656028e-05, + "loss": 0.6595934629440308, + "step": 2232 + }, + { + "epoch": 0.7869603524229075, + "grad_norm": 1.8034278962736743, + "learning_rate": 1.4221488265995755e-05, + "loss": 0.750861644744873, + "step": 2233 + }, + { + "epoch": 0.7873127753303965, + "grad_norm": 1.3674194021669617, + "learning_rate": 1.4216205767271597e-05, + "loss": 0.7146387696266174, + "step": 2234 + }, + { + "epoch": 0.7876651982378855, + "grad_norm": 1.9347692502503655, + "learning_rate": 1.4210921837276792e-05, + "loss": 0.58647221326828, + "step": 2235 + }, + { + "epoch": 0.7880176211453744, + "grad_norm": 1.4888974250205094, + "learning_rate": 1.4205636477805072e-05, + "loss": 0.6893318891525269, + "step": 2236 + }, + { + "epoch": 0.7883700440528635, + "grad_norm": 1.1833417050311776, + "learning_rate": 1.4200349690650654e-05, + "loss": 0.5545464158058167, + "step": 2237 + }, + { + "epoch": 0.7887224669603524, + "grad_norm": 1.6014523598259138, + "learning_rate": 1.4195061477608234e-05, + "loss": 0.6088600158691406, + "step": 2238 + }, + { + "epoch": 0.7890748898678414, + "grad_norm": 1.3513904877886467, + "learning_rate": 1.4189771840472997e-05, + "loss": 0.6330769658088684, + "step": 2239 + }, + { + "epoch": 0.7894273127753304, + "grad_norm": 1.4283770062393895, + "learning_rate": 1.4184480781040613e-05, + "loss": 0.678654670715332, + "step": 2240 + }, + { + "epoch": 0.7897797356828193, + "grad_norm": 1.445633946040222, + "learning_rate": 1.417918830110723e-05, + "loss": 0.6259177923202515, + "step": 2241 + }, + { + "epoch": 0.7901321585903084, + "grad_norm": 1.408151849302333, + "learning_rate": 1.4173894402469477e-05, + "loss": 0.634982168674469, + "step": 2242 + }, + { + "epoch": 0.7904845814977973, + "grad_norm": 1.37778450193705, + "learning_rate": 1.4168599086924473e-05, + "loss": 0.6610612869262695, + "step": 2243 + }, + { + "epoch": 0.7908370044052864, + "grad_norm": 1.386127288755765, + "learning_rate": 1.416330235626981e-05, + "loss": 0.6952961683273315, + "step": 2244 + }, + { + "epoch": 0.7911894273127753, + "grad_norm": 1.6165363001234343, + "learning_rate": 1.4158004212303565e-05, + "loss": 0.5055881142616272, + "step": 2245 + }, + { + "epoch": 0.7915418502202644, + "grad_norm": 1.4841191669035856, + "learning_rate": 1.4152704656824288e-05, + "loss": 0.7284455299377441, + "step": 2246 + }, + { + "epoch": 0.7918942731277533, + "grad_norm": 1.3583334859782668, + "learning_rate": 1.414740369163102e-05, + "loss": 0.6985108852386475, + "step": 2247 + }, + { + "epoch": 0.7922466960352423, + "grad_norm": 1.3664811170856164, + "learning_rate": 1.4142101318523271e-05, + "loss": 0.5967550277709961, + "step": 2248 + }, + { + "epoch": 0.7925991189427313, + "grad_norm": 1.5695298710984633, + "learning_rate": 1.4136797539301033e-05, + "loss": 0.7696695327758789, + "step": 2249 + }, + { + "epoch": 0.7929515418502202, + "grad_norm": 1.3234775564665824, + "learning_rate": 1.413149235576477e-05, + "loss": 0.8131378293037415, + "step": 2250 + }, + { + "epoch": 0.7933039647577093, + "grad_norm": 1.8429663529686, + "learning_rate": 1.4126185769715428e-05, + "loss": 0.8029932975769043, + "step": 2251 + }, + { + "epoch": 0.7936563876651982, + "grad_norm": 1.720051288151631, + "learning_rate": 1.412087778295443e-05, + "loss": 0.7408573031425476, + "step": 2252 + }, + { + "epoch": 0.7940088105726872, + "grad_norm": 1.8037723298533723, + "learning_rate": 1.411556839728367e-05, + "loss": 0.8624325394630432, + "step": 2253 + }, + { + "epoch": 0.7943612334801762, + "grad_norm": 1.5291561523904078, + "learning_rate": 1.411025761450552e-05, + "loss": 0.7635384798049927, + "step": 2254 + }, + { + "epoch": 0.7947136563876652, + "grad_norm": 1.5012301776005823, + "learning_rate": 1.4104945436422832e-05, + "loss": 0.5612920522689819, + "step": 2255 + }, + { + "epoch": 0.7950660792951542, + "grad_norm": 1.5891725973137842, + "learning_rate": 1.4099631864838912e-05, + "loss": 0.5792248845100403, + "step": 2256 + }, + { + "epoch": 0.7954185022026432, + "grad_norm": 1.427703140365858, + "learning_rate": 1.4094316901557563e-05, + "loss": 0.7405142188072205, + "step": 2257 + }, + { + "epoch": 0.7957709251101321, + "grad_norm": 1.5302016454534209, + "learning_rate": 1.4089000548383044e-05, + "loss": 0.630780816078186, + "step": 2258 + }, + { + "epoch": 0.7961233480176212, + "grad_norm": 1.5690685088460359, + "learning_rate": 1.4083682807120092e-05, + "loss": 0.6737201809883118, + "step": 2259 + }, + { + "epoch": 0.7964757709251101, + "grad_norm": 4.158789316506426, + "learning_rate": 1.4078363679573918e-05, + "loss": 0.6469985842704773, + "step": 2260 + }, + { + "epoch": 0.7968281938325992, + "grad_norm": 1.4774582614404035, + "learning_rate": 1.4073043167550198e-05, + "loss": 0.6315224170684814, + "step": 2261 + }, + { + "epoch": 0.7971806167400881, + "grad_norm": 1.1766652256758812, + "learning_rate": 1.4067721272855079e-05, + "loss": 0.6785402297973633, + "step": 2262 + }, + { + "epoch": 0.797533039647577, + "grad_norm": 1.4677269844033833, + "learning_rate": 1.406239799729518e-05, + "loss": 0.7131394147872925, + "step": 2263 + }, + { + "epoch": 0.7978854625550661, + "grad_norm": 1.5575833651180606, + "learning_rate": 1.405707334267759e-05, + "loss": 0.6921142339706421, + "step": 2264 + }, + { + "epoch": 0.798237885462555, + "grad_norm": 1.375694666198905, + "learning_rate": 1.4051747310809863e-05, + "loss": 0.695213794708252, + "step": 2265 + }, + { + "epoch": 0.7985903083700441, + "grad_norm": 1.8529986724322307, + "learning_rate": 1.4046419903500013e-05, + "loss": 0.7081988453865051, + "step": 2266 + }, + { + "epoch": 0.798942731277533, + "grad_norm": 1.4461573292928833, + "learning_rate": 1.4041091122556539e-05, + "loss": 0.6404637098312378, + "step": 2267 + }, + { + "epoch": 0.7992951541850221, + "grad_norm": 1.3566691109367863, + "learning_rate": 1.403576096978839e-05, + "loss": 0.6404134631156921, + "step": 2268 + }, + { + "epoch": 0.799647577092511, + "grad_norm": 1.5118859398886633, + "learning_rate": 1.4030429447004992e-05, + "loss": 0.7963751554489136, + "step": 2269 + }, + { + "epoch": 0.8, + "grad_norm": 1.632997404115334, + "learning_rate": 1.4025096556016224e-05, + "loss": 0.6648174524307251, + "step": 2270 + }, + { + "epoch": 0.800352422907489, + "grad_norm": 1.4103532345019565, + "learning_rate": 1.4019762298632445e-05, + "loss": 0.6661815047264099, + "step": 2271 + }, + { + "epoch": 0.800704845814978, + "grad_norm": 1.7237738440956045, + "learning_rate": 1.4014426676664462e-05, + "loss": 0.6194477081298828, + "step": 2272 + }, + { + "epoch": 0.801057268722467, + "grad_norm": 1.8457235726726873, + "learning_rate": 1.400908969192356e-05, + "loss": 0.6869276762008667, + "step": 2273 + }, + { + "epoch": 0.8014096916299559, + "grad_norm": 1.7545140114513338, + "learning_rate": 1.4003751346221472e-05, + "loss": 0.7352420091629028, + "step": 2274 + }, + { + "epoch": 0.801762114537445, + "grad_norm": 1.5994812918128933, + "learning_rate": 1.3998411641370405e-05, + "loss": 0.8212440609931946, + "step": 2275 + }, + { + "epoch": 0.8021145374449339, + "grad_norm": 1.5868623288152288, + "learning_rate": 1.3993070579183021e-05, + "loss": 0.6897045969963074, + "step": 2276 + }, + { + "epoch": 0.8024669603524229, + "grad_norm": 1.716974382638037, + "learning_rate": 1.3987728161472442e-05, + "loss": 0.8406906127929688, + "step": 2277 + }, + { + "epoch": 0.8028193832599119, + "grad_norm": 1.6664794009014727, + "learning_rate": 1.3982384390052257e-05, + "loss": 0.6236976385116577, + "step": 2278 + }, + { + "epoch": 0.8031718061674009, + "grad_norm": 1.7056031446043847, + "learning_rate": 1.3977039266736508e-05, + "loss": 0.8110965490341187, + "step": 2279 + }, + { + "epoch": 0.8035242290748899, + "grad_norm": 1.6273998334271178, + "learning_rate": 1.3971692793339697e-05, + "loss": 0.635534405708313, + "step": 2280 + }, + { + "epoch": 0.8038766519823789, + "grad_norm": 1.5382566365445476, + "learning_rate": 1.3966344971676789e-05, + "loss": 0.7806028127670288, + "step": 2281 + }, + { + "epoch": 0.8042290748898678, + "grad_norm": 1.7131487498074927, + "learning_rate": 1.3960995803563195e-05, + "loss": 0.6635935306549072, + "step": 2282 + }, + { + "epoch": 0.8045814977973569, + "grad_norm": 1.6068551029738092, + "learning_rate": 1.39556452908148e-05, + "loss": 0.6064634323120117, + "step": 2283 + }, + { + "epoch": 0.8049339207048458, + "grad_norm": 1.7686604234656398, + "learning_rate": 1.3950293435247933e-05, + "loss": 0.760187029838562, + "step": 2284 + }, + { + "epoch": 0.8052863436123348, + "grad_norm": 1.5333245954906318, + "learning_rate": 1.3944940238679384e-05, + "loss": 0.7004644274711609, + "step": 2285 + }, + { + "epoch": 0.8056387665198238, + "grad_norm": 1.9274194313344672, + "learning_rate": 1.393958570292639e-05, + "loss": 0.7662780284881592, + "step": 2286 + }, + { + "epoch": 0.8059911894273127, + "grad_norm": 1.3943181397787612, + "learning_rate": 1.393422982980666e-05, + "loss": 0.7939090132713318, + "step": 2287 + }, + { + "epoch": 0.8063436123348018, + "grad_norm": 1.377559765071464, + "learning_rate": 1.3928872621138337e-05, + "loss": 0.7461861371994019, + "step": 2288 + }, + { + "epoch": 0.8066960352422907, + "grad_norm": 1.4875661773009663, + "learning_rate": 1.3923514078740032e-05, + "loss": 0.5997019410133362, + "step": 2289 + }, + { + "epoch": 0.8070484581497798, + "grad_norm": 1.5379009713311227, + "learning_rate": 1.3918154204430801e-05, + "loss": 0.5437384843826294, + "step": 2290 + }, + { + "epoch": 0.8074008810572687, + "grad_norm": 1.8168415447512607, + "learning_rate": 1.3912793000030154e-05, + "loss": 0.7387127876281738, + "step": 2291 + }, + { + "epoch": 0.8077533039647578, + "grad_norm": 1.305308107523337, + "learning_rate": 1.3907430467358054e-05, + "loss": 0.483035147190094, + "step": 2292 + }, + { + "epoch": 0.8081057268722467, + "grad_norm": 1.3669144351401303, + "learning_rate": 1.3902066608234919e-05, + "loss": 0.6208503842353821, + "step": 2293 + }, + { + "epoch": 0.8084581497797357, + "grad_norm": 1.7196168695476914, + "learning_rate": 1.3896701424481603e-05, + "loss": 0.6691559553146362, + "step": 2294 + }, + { + "epoch": 0.8088105726872247, + "grad_norm": 1.6945751274550964, + "learning_rate": 1.3891334917919422e-05, + "loss": 0.8960802555084229, + "step": 2295 + }, + { + "epoch": 0.8091629955947136, + "grad_norm": 1.7625732291329363, + "learning_rate": 1.388596709037014e-05, + "loss": 0.669715404510498, + "step": 2296 + }, + { + "epoch": 0.8095154185022027, + "grad_norm": 1.4235891674683654, + "learning_rate": 1.3880597943655972e-05, + "loss": 0.7356190085411072, + "step": 2297 + }, + { + "epoch": 0.8098678414096916, + "grad_norm": 1.6403595773987272, + "learning_rate": 1.3875227479599565e-05, + "loss": 0.9158750176429749, + "step": 2298 + }, + { + "epoch": 0.8102202643171806, + "grad_norm": 1.718215094287951, + "learning_rate": 1.3869855700024031e-05, + "loss": 0.7395786643028259, + "step": 2299 + }, + { + "epoch": 0.8105726872246696, + "grad_norm": 1.6360185397225708, + "learning_rate": 1.3864482606752922e-05, + "loss": 0.594106912612915, + "step": 2300 + }, + { + "epoch": 0.8109251101321586, + "grad_norm": 1.6395747499474045, + "learning_rate": 1.3859108201610236e-05, + "loss": 0.7853089570999146, + "step": 2301 + }, + { + "epoch": 0.8112775330396476, + "grad_norm": 1.6313227134249062, + "learning_rate": 1.3853732486420413e-05, + "loss": 0.8346991539001465, + "step": 2302 + }, + { + "epoch": 0.8116299559471366, + "grad_norm": 1.6254363131857819, + "learning_rate": 1.3848355463008344e-05, + "loss": 0.5493819117546082, + "step": 2303 + }, + { + "epoch": 0.8119823788546255, + "grad_norm": 1.566621350016491, + "learning_rate": 1.3842977133199363e-05, + "loss": 0.7474828958511353, + "step": 2304 + }, + { + "epoch": 0.8123348017621146, + "grad_norm": 1.6648296076023164, + "learning_rate": 1.3837597498819242e-05, + "loss": 0.6599621772766113, + "step": 2305 + }, + { + "epoch": 0.8126872246696035, + "grad_norm": 1.5217466732352583, + "learning_rate": 1.38322165616942e-05, + "loss": 0.6751214861869812, + "step": 2306 + }, + { + "epoch": 0.8130396475770925, + "grad_norm": 1.720054765999457, + "learning_rate": 1.3826834323650899e-05, + "loss": 0.7450453042984009, + "step": 2307 + }, + { + "epoch": 0.8133920704845815, + "grad_norm": 1.4739637914592345, + "learning_rate": 1.382145078651644e-05, + "loss": 0.7015345692634583, + "step": 2308 + }, + { + "epoch": 0.8137444933920704, + "grad_norm": 1.4921910425897076, + "learning_rate": 1.3816065952118368e-05, + "loss": 0.7161329984664917, + "step": 2309 + }, + { + "epoch": 0.8140969162995595, + "grad_norm": 1.576440929020717, + "learning_rate": 1.3810679822284665e-05, + "loss": 0.771783709526062, + "step": 2310 + }, + { + "epoch": 0.8144493392070484, + "grad_norm": 1.461165164266228, + "learning_rate": 1.3805292398843755e-05, + "loss": 0.6710794568061829, + "step": 2311 + }, + { + "epoch": 0.8148017621145375, + "grad_norm": 1.6256312715940777, + "learning_rate": 1.3799903683624503e-05, + "loss": 0.6614924669265747, + "step": 2312 + }, + { + "epoch": 0.8151541850220264, + "grad_norm": 1.429649360127197, + "learning_rate": 1.3794513678456203e-05, + "loss": 0.6432225704193115, + "step": 2313 + }, + { + "epoch": 0.8155066079295155, + "grad_norm": 1.233784916709085, + "learning_rate": 1.3789122385168604e-05, + "loss": 0.6228311061859131, + "step": 2314 + }, + { + "epoch": 0.8158590308370044, + "grad_norm": 1.5182036065920572, + "learning_rate": 1.3783729805591875e-05, + "loss": 0.5597498416900635, + "step": 2315 + }, + { + "epoch": 0.8162114537444934, + "grad_norm": 1.954667780900904, + "learning_rate": 1.3778335941556629e-05, + "loss": 0.7651177048683167, + "step": 2316 + }, + { + "epoch": 0.8165638766519824, + "grad_norm": 1.3053642347729657, + "learning_rate": 1.3772940794893916e-05, + "loss": 0.5482406616210938, + "step": 2317 + }, + { + "epoch": 0.8169162995594713, + "grad_norm": 1.4432389735878668, + "learning_rate": 1.3767544367435229e-05, + "loss": 0.767236590385437, + "step": 2318 + }, + { + "epoch": 0.8172687224669604, + "grad_norm": 1.7071036751428772, + "learning_rate": 1.3762146661012471e-05, + "loss": 0.705253541469574, + "step": 2319 + }, + { + "epoch": 0.8176211453744493, + "grad_norm": 1.4969645559129943, + "learning_rate": 1.3756747677458008e-05, + "loss": 0.7800463438034058, + "step": 2320 + }, + { + "epoch": 0.8179735682819383, + "grad_norm": 1.6172262621918039, + "learning_rate": 1.3751347418604623e-05, + "loss": 0.7615088224411011, + "step": 2321 + }, + { + "epoch": 0.8183259911894273, + "grad_norm": 1.6932314886464006, + "learning_rate": 1.3745945886285536e-05, + "loss": 0.8004297614097595, + "step": 2322 + }, + { + "epoch": 0.8186784140969163, + "grad_norm": 1.605867375121777, + "learning_rate": 1.3740543082334399e-05, + "loss": 0.6428912281990051, + "step": 2323 + }, + { + "epoch": 0.8190308370044053, + "grad_norm": 1.4147620040703779, + "learning_rate": 1.3735139008585294e-05, + "loss": 0.6702802777290344, + "step": 2324 + }, + { + "epoch": 0.8193832599118943, + "grad_norm": 1.3127203907182126, + "learning_rate": 1.3729733666872736e-05, + "loss": 0.6003440022468567, + "step": 2325 + }, + { + "epoch": 0.8197356828193832, + "grad_norm": 2.04633486984075, + "learning_rate": 1.3724327059031677e-05, + "loss": 0.8264240622520447, + "step": 2326 + }, + { + "epoch": 0.8200881057268723, + "grad_norm": 1.4037319277657845, + "learning_rate": 1.3718919186897481e-05, + "loss": 0.6974462866783142, + "step": 2327 + }, + { + "epoch": 0.8204405286343612, + "grad_norm": 1.7081986923623933, + "learning_rate": 1.3713510052305962e-05, + "loss": 0.8273947238922119, + "step": 2328 + }, + { + "epoch": 0.8207929515418502, + "grad_norm": 1.5000401588722418, + "learning_rate": 1.3708099657093348e-05, + "loss": 0.6230529546737671, + "step": 2329 + }, + { + "epoch": 0.8211453744493392, + "grad_norm": 1.6377312790274685, + "learning_rate": 1.37026880030963e-05, + "loss": 0.6997084021568298, + "step": 2330 + }, + { + "epoch": 0.8214977973568282, + "grad_norm": 1.582616740422673, + "learning_rate": 1.3697275092151908e-05, + "loss": 0.7212036848068237, + "step": 2331 + }, + { + "epoch": 0.8218502202643172, + "grad_norm": 1.5449017822829925, + "learning_rate": 1.3691860926097685e-05, + "loss": 0.7758737206459045, + "step": 2332 + }, + { + "epoch": 0.8222026431718061, + "grad_norm": 1.7784238395856364, + "learning_rate": 1.368644550677157e-05, + "loss": 0.62369704246521, + "step": 2333 + }, + { + "epoch": 0.8225550660792952, + "grad_norm": 1.6110908974677367, + "learning_rate": 1.3681028836011935e-05, + "loss": 0.8051841855049133, + "step": 2334 + }, + { + "epoch": 0.8229074889867841, + "grad_norm": 1.3626761635443752, + "learning_rate": 1.3675610915657568e-05, + "loss": 0.6087243556976318, + "step": 2335 + }, + { + "epoch": 0.8232599118942732, + "grad_norm": 1.9382202981470131, + "learning_rate": 1.3670191747547685e-05, + "loss": 0.6949581503868103, + "step": 2336 + }, + { + "epoch": 0.8236123348017621, + "grad_norm": 1.5451121537596906, + "learning_rate": 1.3664771333521922e-05, + "loss": 0.5621528029441833, + "step": 2337 + }, + { + "epoch": 0.8239647577092511, + "grad_norm": 1.622327701652298, + "learning_rate": 1.3659349675420346e-05, + "loss": 0.8731498718261719, + "step": 2338 + }, + { + "epoch": 0.8243171806167401, + "grad_norm": 1.5570249925953572, + "learning_rate": 1.3653926775083437e-05, + "loss": 0.6997240781784058, + "step": 2339 + }, + { + "epoch": 0.824669603524229, + "grad_norm": 1.6562463291138314, + "learning_rate": 1.3648502634352104e-05, + "loss": 0.8061426877975464, + "step": 2340 + }, + { + "epoch": 0.8250220264317181, + "grad_norm": 1.7061312576253802, + "learning_rate": 1.3643077255067667e-05, + "loss": 0.6186845302581787, + "step": 2341 + }, + { + "epoch": 0.825374449339207, + "grad_norm": 1.6605971928200247, + "learning_rate": 1.3637650639071884e-05, + "loss": 0.8098937273025513, + "step": 2342 + }, + { + "epoch": 0.825726872246696, + "grad_norm": 1.6091516027269386, + "learning_rate": 1.3632222788206916e-05, + "loss": 0.5810271501541138, + "step": 2343 + }, + { + "epoch": 0.826079295154185, + "grad_norm": 1.4965459276387059, + "learning_rate": 1.3626793704315348e-05, + "loss": 0.48309600353240967, + "step": 2344 + }, + { + "epoch": 0.826431718061674, + "grad_norm": 1.4326274242229946, + "learning_rate": 1.3621363389240188e-05, + "loss": 0.7366980314254761, + "step": 2345 + }, + { + "epoch": 0.826784140969163, + "grad_norm": 1.571199172280502, + "learning_rate": 1.3615931844824859e-05, + "loss": 0.6572252511978149, + "step": 2346 + }, + { + "epoch": 0.827136563876652, + "grad_norm": 1.3078300281358257, + "learning_rate": 1.3610499072913204e-05, + "loss": 0.6776653528213501, + "step": 2347 + }, + { + "epoch": 0.827488986784141, + "grad_norm": 1.772641440888185, + "learning_rate": 1.3605065075349473e-05, + "loss": 0.6536053419113159, + "step": 2348 + }, + { + "epoch": 0.82784140969163, + "grad_norm": 1.600184025362065, + "learning_rate": 1.3599629853978342e-05, + "loss": 0.7000117301940918, + "step": 2349 + }, + { + "epoch": 0.8281938325991189, + "grad_norm": 1.5533713409132957, + "learning_rate": 1.3594193410644902e-05, + "loss": 0.6480045318603516, + "step": 2350 + }, + { + "epoch": 0.8285462555066079, + "grad_norm": 1.5474076871693587, + "learning_rate": 1.3588755747194656e-05, + "loss": 0.6428179740905762, + "step": 2351 + }, + { + "epoch": 0.8288986784140969, + "grad_norm": 1.3886734182652174, + "learning_rate": 1.3583316865473517e-05, + "loss": 0.618633508682251, + "step": 2352 + }, + { + "epoch": 0.8292511013215859, + "grad_norm": 1.5946423674864716, + "learning_rate": 1.357787676732782e-05, + "loss": 0.7289671897888184, + "step": 2353 + }, + { + "epoch": 0.8296035242290749, + "grad_norm": 1.687058159970245, + "learning_rate": 1.3572435454604307e-05, + "loss": 0.6969538927078247, + "step": 2354 + }, + { + "epoch": 0.8299559471365638, + "grad_norm": 1.565248379514886, + "learning_rate": 1.3566992929150137e-05, + "loss": 0.8490859270095825, + "step": 2355 + }, + { + "epoch": 0.8303083700440529, + "grad_norm": 1.532906793366292, + "learning_rate": 1.3561549192812877e-05, + "loss": 0.6883271336555481, + "step": 2356 + }, + { + "epoch": 0.8306607929515418, + "grad_norm": 1.3151000902691472, + "learning_rate": 1.3556104247440504e-05, + "loss": 0.68092280626297, + "step": 2357 + }, + { + "epoch": 0.8310132158590309, + "grad_norm": 1.2591886658215548, + "learning_rate": 1.3550658094881413e-05, + "loss": 0.7077454924583435, + "step": 2358 + }, + { + "epoch": 0.8313656387665198, + "grad_norm": 1.5452673483096302, + "learning_rate": 1.3545210736984393e-05, + "loss": 0.7364591360092163, + "step": 2359 + }, + { + "epoch": 0.8317180616740089, + "grad_norm": 1.4999509926023873, + "learning_rate": 1.3539762175598666e-05, + "loss": 0.8047930002212524, + "step": 2360 + }, + { + "epoch": 0.8320704845814978, + "grad_norm": 1.4862380654794773, + "learning_rate": 1.3534312412573836e-05, + "loss": 0.7717781066894531, + "step": 2361 + }, + { + "epoch": 0.8324229074889867, + "grad_norm": 1.7032828917925678, + "learning_rate": 1.3528861449759938e-05, + "loss": 0.7228613495826721, + "step": 2362 + }, + { + "epoch": 0.8327753303964758, + "grad_norm": 1.5752771060390574, + "learning_rate": 1.3523409289007399e-05, + "loss": 0.8025436401367188, + "step": 2363 + }, + { + "epoch": 0.8331277533039647, + "grad_norm": 1.5214524176303228, + "learning_rate": 1.3517955932167057e-05, + "loss": 0.6653664112091064, + "step": 2364 + }, + { + "epoch": 0.8334801762114538, + "grad_norm": 1.4409217046848606, + "learning_rate": 1.3512501381090158e-05, + "loss": 0.709527313709259, + "step": 2365 + }, + { + "epoch": 0.8338325991189427, + "grad_norm": 1.4678807653581447, + "learning_rate": 1.3507045637628355e-05, + "loss": 0.7317520380020142, + "step": 2366 + }, + { + "epoch": 0.8341850220264317, + "grad_norm": 1.4520344718636113, + "learning_rate": 1.3501588703633703e-05, + "loss": 0.734069287776947, + "step": 2367 + }, + { + "epoch": 0.8345374449339207, + "grad_norm": 1.355050784601881, + "learning_rate": 1.349613058095866e-05, + "loss": 0.5950552225112915, + "step": 2368 + }, + { + "epoch": 0.8348898678414097, + "grad_norm": 1.3916802158941735, + "learning_rate": 1.3490671271456084e-05, + "loss": 0.5958857536315918, + "step": 2369 + }, + { + "epoch": 0.8352422907488987, + "grad_norm": 1.319860830071963, + "learning_rate": 1.348521077697925e-05, + "loss": 0.7094449996948242, + "step": 2370 + }, + { + "epoch": 0.8355947136563877, + "grad_norm": 1.283824481194398, + "learning_rate": 1.3479749099381818e-05, + "loss": 0.6260385513305664, + "step": 2371 + }, + { + "epoch": 0.8359471365638766, + "grad_norm": 1.3546760632082742, + "learning_rate": 1.3474286240517862e-05, + "loss": 0.65608811378479, + "step": 2372 + }, + { + "epoch": 0.8362995594713656, + "grad_norm": 1.5902013950729095, + "learning_rate": 1.346882220224185e-05, + "loss": 0.6942586898803711, + "step": 2373 + }, + { + "epoch": 0.8366519823788546, + "grad_norm": 1.5432700710308092, + "learning_rate": 1.3463356986408653e-05, + "loss": 0.6831374168395996, + "step": 2374 + }, + { + "epoch": 0.8370044052863436, + "grad_norm": 1.2453712902306997, + "learning_rate": 1.3457890594873546e-05, + "loss": 0.6363790035247803, + "step": 2375 + }, + { + "epoch": 0.8373568281938326, + "grad_norm": 1.4407831477600082, + "learning_rate": 1.3452423029492194e-05, + "loss": 0.698935866355896, + "step": 2376 + }, + { + "epoch": 0.8377092511013216, + "grad_norm": 1.6516160077651472, + "learning_rate": 1.3446954292120667e-05, + "loss": 0.8569005727767944, + "step": 2377 + }, + { + "epoch": 0.8380616740088106, + "grad_norm": 1.4963554673760426, + "learning_rate": 1.3441484384615428e-05, + "loss": 0.8461613655090332, + "step": 2378 + }, + { + "epoch": 0.8384140969162995, + "grad_norm": 1.635336062215313, + "learning_rate": 1.343601330883335e-05, + "loss": 0.7481078505516052, + "step": 2379 + }, + { + "epoch": 0.8387665198237886, + "grad_norm": 1.1164155853725835, + "learning_rate": 1.343054106663168e-05, + "loss": 0.5632544755935669, + "step": 2380 + }, + { + "epoch": 0.8391189427312775, + "grad_norm": 1.2387886339726162, + "learning_rate": 1.3425067659868084e-05, + "loss": 0.528980016708374, + "step": 2381 + }, + { + "epoch": 0.8394713656387666, + "grad_norm": 1.2987181937645196, + "learning_rate": 1.341959309040061e-05, + "loss": 0.5520849227905273, + "step": 2382 + }, + { + "epoch": 0.8398237885462555, + "grad_norm": 1.1709661282123542, + "learning_rate": 1.34141173600877e-05, + "loss": 0.569744348526001, + "step": 2383 + }, + { + "epoch": 0.8401762114537445, + "grad_norm": 1.1526596958180186, + "learning_rate": 1.3408640470788202e-05, + "loss": 0.595065712928772, + "step": 2384 + }, + { + "epoch": 0.8405286343612335, + "grad_norm": 1.716530250506247, + "learning_rate": 1.3403162424361342e-05, + "loss": 0.6993277072906494, + "step": 2385 + }, + { + "epoch": 0.8408810572687224, + "grad_norm": 1.467497517918387, + "learning_rate": 1.3397683222666748e-05, + "loss": 0.6183342933654785, + "step": 2386 + }, + { + "epoch": 0.8412334801762115, + "grad_norm": 1.5660447986557493, + "learning_rate": 1.339220286756444e-05, + "loss": 0.7280797362327576, + "step": 2387 + }, + { + "epoch": 0.8415859030837004, + "grad_norm": 1.5538390945999534, + "learning_rate": 1.3386721360914829e-05, + "loss": 0.7377837896347046, + "step": 2388 + }, + { + "epoch": 0.8419383259911895, + "grad_norm": 1.3658202604001934, + "learning_rate": 1.3381238704578718e-05, + "loss": 0.7202758193016052, + "step": 2389 + }, + { + "epoch": 0.8422907488986784, + "grad_norm": 1.4864419338323784, + "learning_rate": 1.3375754900417291e-05, + "loss": 0.5899994969367981, + "step": 2390 + }, + { + "epoch": 0.8426431718061674, + "grad_norm": 1.6545749228929092, + "learning_rate": 1.3370269950292133e-05, + "loss": 0.8128558993339539, + "step": 2391 + }, + { + "epoch": 0.8429955947136564, + "grad_norm": 1.4863580222240895, + "learning_rate": 1.3364783856065213e-05, + "loss": 0.8222962617874146, + "step": 2392 + }, + { + "epoch": 0.8433480176211454, + "grad_norm": 1.5392010225603865, + "learning_rate": 1.3359296619598894e-05, + "loss": 0.7898896932601929, + "step": 2393 + }, + { + "epoch": 0.8437004405286344, + "grad_norm": 1.59106154269148, + "learning_rate": 1.3353808242755912e-05, + "loss": 0.6596726179122925, + "step": 2394 + }, + { + "epoch": 0.8440528634361234, + "grad_norm": 1.6652244607977948, + "learning_rate": 1.3348318727399411e-05, + "loss": 0.8073080778121948, + "step": 2395 + }, + { + "epoch": 0.8444052863436123, + "grad_norm": 1.582055504815832, + "learning_rate": 1.3342828075392902e-05, + "loss": 0.6640043258666992, + "step": 2396 + }, + { + "epoch": 0.8447577092511013, + "grad_norm": 1.415789065826391, + "learning_rate": 1.3337336288600297e-05, + "loss": 0.6067632436752319, + "step": 2397 + }, + { + "epoch": 0.8451101321585903, + "grad_norm": 1.308177796408265, + "learning_rate": 1.3331843368885882e-05, + "loss": 0.6891398429870605, + "step": 2398 + }, + { + "epoch": 0.8454625550660793, + "grad_norm": 1.276250238749864, + "learning_rate": 1.3326349318114335e-05, + "loss": 0.6007423996925354, + "step": 2399 + }, + { + "epoch": 0.8458149779735683, + "grad_norm": 1.6159836309404996, + "learning_rate": 1.3320854138150712e-05, + "loss": 0.7314017415046692, + "step": 2400 + }, + { + "epoch": 0.8461674008810572, + "grad_norm": 1.5060027308979995, + "learning_rate": 1.3315357830860461e-05, + "loss": 0.7352335453033447, + "step": 2401 + }, + { + "epoch": 0.8465198237885463, + "grad_norm": 1.3629774951204896, + "learning_rate": 1.3309860398109402e-05, + "loss": 0.6546785831451416, + "step": 2402 + }, + { + "epoch": 0.8468722466960352, + "grad_norm": 1.4629106252693242, + "learning_rate": 1.3304361841763746e-05, + "loss": 0.590252697467804, + "step": 2403 + }, + { + "epoch": 0.8472246696035243, + "grad_norm": 1.5501476697602834, + "learning_rate": 1.3298862163690078e-05, + "loss": 0.6864089369773865, + "step": 2404 + }, + { + "epoch": 0.8475770925110132, + "grad_norm": 1.452376737172979, + "learning_rate": 1.3293361365755373e-05, + "loss": 0.7818390130996704, + "step": 2405 + }, + { + "epoch": 0.8479295154185023, + "grad_norm": 1.9084475381981967, + "learning_rate": 1.3287859449826977e-05, + "loss": 0.7461166381835938, + "step": 2406 + }, + { + "epoch": 0.8482819383259912, + "grad_norm": 1.7337796671611372, + "learning_rate": 1.3282356417772618e-05, + "loss": 0.7519750595092773, + "step": 2407 + }, + { + "epoch": 0.8486343612334801, + "grad_norm": 1.445619912428175, + "learning_rate": 1.3276852271460406e-05, + "loss": 0.7041791081428528, + "step": 2408 + }, + { + "epoch": 0.8489867841409692, + "grad_norm": 1.3131157575910486, + "learning_rate": 1.327134701275883e-05, + "loss": 0.5649428367614746, + "step": 2409 + }, + { + "epoch": 0.8493392070484581, + "grad_norm": 1.838398891045019, + "learning_rate": 1.3265840643536746e-05, + "loss": 0.6607545614242554, + "step": 2410 + }, + { + "epoch": 0.8496916299559472, + "grad_norm": 1.590568626194504, + "learning_rate": 1.3260333165663406e-05, + "loss": 0.7393547892570496, + "step": 2411 + }, + { + "epoch": 0.8500440528634361, + "grad_norm": 1.660269046740627, + "learning_rate": 1.325482458100842e-05, + "loss": 0.6550742387771606, + "step": 2412 + }, + { + "epoch": 0.8503964757709251, + "grad_norm": 1.3409806360783354, + "learning_rate": 1.324931489144178e-05, + "loss": 0.5104576349258423, + "step": 2413 + }, + { + "epoch": 0.8507488986784141, + "grad_norm": 1.7056036938051933, + "learning_rate": 1.3243804098833859e-05, + "loss": 0.7679733037948608, + "step": 2414 + }, + { + "epoch": 0.8511013215859031, + "grad_norm": 1.3058704920771766, + "learning_rate": 1.3238292205055397e-05, + "loss": 0.6516377925872803, + "step": 2415 + }, + { + "epoch": 0.8514537444933921, + "grad_norm": 1.4749751578789572, + "learning_rate": 1.3232779211977509e-05, + "loss": 0.8509281277656555, + "step": 2416 + }, + { + "epoch": 0.8518061674008811, + "grad_norm": 1.6532741255389543, + "learning_rate": 1.3227265121471691e-05, + "loss": 0.5643317103385925, + "step": 2417 + }, + { + "epoch": 0.85215859030837, + "grad_norm": 1.4681710603298503, + "learning_rate": 1.3221749935409798e-05, + "loss": 0.5294302105903625, + "step": 2418 + }, + { + "epoch": 0.852511013215859, + "grad_norm": 1.4914498870655002, + "learning_rate": 1.3216233655664067e-05, + "loss": 0.6301594972610474, + "step": 2419 + }, + { + "epoch": 0.852863436123348, + "grad_norm": 1.399957922496421, + "learning_rate": 1.32107162841071e-05, + "loss": 0.6930294036865234, + "step": 2420 + }, + { + "epoch": 0.853215859030837, + "grad_norm": 1.4069779391578274, + "learning_rate": 1.3205197822611876e-05, + "loss": 0.6266883611679077, + "step": 2421 + }, + { + "epoch": 0.853568281938326, + "grad_norm": 1.7817063662748283, + "learning_rate": 1.3199678273051743e-05, + "loss": 0.7789868116378784, + "step": 2422 + }, + { + "epoch": 0.853920704845815, + "grad_norm": 1.3387299141459739, + "learning_rate": 1.3194157637300416e-05, + "loss": 0.7148274779319763, + "step": 2423 + }, + { + "epoch": 0.854273127753304, + "grad_norm": 1.4757263125304436, + "learning_rate": 1.3188635917231972e-05, + "loss": 0.550403356552124, + "step": 2424 + }, + { + "epoch": 0.8546255506607929, + "grad_norm": 1.563076871593329, + "learning_rate": 1.3183113114720872e-05, + "loss": 0.6650338768959045, + "step": 2425 + }, + { + "epoch": 0.854977973568282, + "grad_norm": 1.569123753374588, + "learning_rate": 1.317758923164193e-05, + "loss": 0.7774436473846436, + "step": 2426 + }, + { + "epoch": 0.8553303964757709, + "grad_norm": 1.407079429107656, + "learning_rate": 1.3172064269870335e-05, + "loss": 0.6192025542259216, + "step": 2427 + }, + { + "epoch": 0.85568281938326, + "grad_norm": 1.6230407627498116, + "learning_rate": 1.3166538231281635e-05, + "loss": 0.6758309602737427, + "step": 2428 + }, + { + "epoch": 0.8560352422907489, + "grad_norm": 1.6026256588862147, + "learning_rate": 1.3161011117751756e-05, + "loss": 0.7311116456985474, + "step": 2429 + }, + { + "epoch": 0.8563876651982378, + "grad_norm": 1.797024553793142, + "learning_rate": 1.3155482931156977e-05, + "loss": 0.7525666952133179, + "step": 2430 + }, + { + "epoch": 0.8567400881057269, + "grad_norm": 1.7067244433524313, + "learning_rate": 1.3149953673373945e-05, + "loss": 0.6903671026229858, + "step": 2431 + }, + { + "epoch": 0.8570925110132158, + "grad_norm": 1.2833360218942749, + "learning_rate": 1.314442334627967e-05, + "loss": 0.6036638021469116, + "step": 2432 + }, + { + "epoch": 0.8574449339207049, + "grad_norm": 1.6354054518430503, + "learning_rate": 1.3138891951751526e-05, + "loss": 0.6490209698677063, + "step": 2433 + }, + { + "epoch": 0.8577973568281938, + "grad_norm": 1.6970156912379664, + "learning_rate": 1.3133359491667252e-05, + "loss": 0.692024290561676, + "step": 2434 + }, + { + "epoch": 0.8581497797356828, + "grad_norm": 1.4031255607051936, + "learning_rate": 1.3127825967904944e-05, + "loss": 0.6977943181991577, + "step": 2435 + }, + { + "epoch": 0.8585022026431718, + "grad_norm": 1.3842045822286646, + "learning_rate": 1.312229138234306e-05, + "loss": 0.625649094581604, + "step": 2436 + }, + { + "epoch": 0.8588546255506608, + "grad_norm": 1.5910466082409926, + "learning_rate": 1.3116755736860422e-05, + "loss": 0.671939492225647, + "step": 2437 + }, + { + "epoch": 0.8592070484581498, + "grad_norm": 1.3856883940296008, + "learning_rate": 1.3111219033336211e-05, + "loss": 0.700029194355011, + "step": 2438 + }, + { + "epoch": 0.8595594713656388, + "grad_norm": 1.3907118477619378, + "learning_rate": 1.3105681273649959e-05, + "loss": 0.6339718699455261, + "step": 2439 + }, + { + "epoch": 0.8599118942731278, + "grad_norm": 1.306943148235595, + "learning_rate": 1.3100142459681569e-05, + "loss": 0.7105488777160645, + "step": 2440 + }, + { + "epoch": 0.8602643171806167, + "grad_norm": 1.4503861250177865, + "learning_rate": 1.3094602593311294e-05, + "loss": 0.616797924041748, + "step": 2441 + }, + { + "epoch": 0.8606167400881057, + "grad_norm": 1.5110286813274958, + "learning_rate": 1.3089061676419746e-05, + "loss": 0.7167524099349976, + "step": 2442 + }, + { + "epoch": 0.8609691629955947, + "grad_norm": 1.5215961993133658, + "learning_rate": 1.3083519710887895e-05, + "loss": 0.5499090552330017, + "step": 2443 + }, + { + "epoch": 0.8613215859030837, + "grad_norm": 1.4623789546240658, + "learning_rate": 1.3077976698597064e-05, + "loss": 0.5764151811599731, + "step": 2444 + }, + { + "epoch": 0.8616740088105727, + "grad_norm": 1.438510619597336, + "learning_rate": 1.3072432641428931e-05, + "loss": 0.7171419858932495, + "step": 2445 + }, + { + "epoch": 0.8620264317180617, + "grad_norm": 1.3023250448197168, + "learning_rate": 1.3066887541265539e-05, + "loss": 0.7546026706695557, + "step": 2446 + }, + { + "epoch": 0.8623788546255506, + "grad_norm": 1.2250371592811133, + "learning_rate": 1.306134139998927e-05, + "loss": 0.5884296298027039, + "step": 2447 + }, + { + "epoch": 0.8627312775330397, + "grad_norm": 1.3135127283076564, + "learning_rate": 1.3055794219482867e-05, + "loss": 0.6877926588058472, + "step": 2448 + }, + { + "epoch": 0.8630837004405286, + "grad_norm": 1.5935068741769265, + "learning_rate": 1.3050246001629425e-05, + "loss": 0.598037838935852, + "step": 2449 + }, + { + "epoch": 0.8634361233480177, + "grad_norm": 1.4128431939298278, + "learning_rate": 1.3044696748312395e-05, + "loss": 0.6560795307159424, + "step": 2450 + }, + { + "epoch": 0.8637885462555066, + "grad_norm": 1.5856094022002207, + "learning_rate": 1.3039146461415575e-05, + "loss": 0.7130829691886902, + "step": 2451 + }, + { + "epoch": 0.8641409691629955, + "grad_norm": 1.9167144031452974, + "learning_rate": 1.303359514282311e-05, + "loss": 0.7402251958847046, + "step": 2452 + }, + { + "epoch": 0.8644933920704846, + "grad_norm": 1.4143817039312587, + "learning_rate": 1.3028042794419502e-05, + "loss": 0.6610683798789978, + "step": 2453 + }, + { + "epoch": 0.8648458149779735, + "grad_norm": 1.6544654323663863, + "learning_rate": 1.3022489418089606e-05, + "loss": 0.84892737865448, + "step": 2454 + }, + { + "epoch": 0.8651982378854626, + "grad_norm": 1.689285386487206, + "learning_rate": 1.3016935015718612e-05, + "loss": 0.7285948991775513, + "step": 2455 + }, + { + "epoch": 0.8655506607929515, + "grad_norm": 1.46262615014944, + "learning_rate": 1.3011379589192074e-05, + "loss": 0.6800004839897156, + "step": 2456 + }, + { + "epoch": 0.8659030837004406, + "grad_norm": 1.492659523558787, + "learning_rate": 1.3005823140395878e-05, + "loss": 0.618618369102478, + "step": 2457 + }, + { + "epoch": 0.8662555066079295, + "grad_norm": 1.8084387802865425, + "learning_rate": 1.3000265671216278e-05, + "loss": 0.7657757997512817, + "step": 2458 + }, + { + "epoch": 0.8666079295154185, + "grad_norm": 1.5490708834885107, + "learning_rate": 1.2994707183539848e-05, + "loss": 0.7814151644706726, + "step": 2459 + }, + { + "epoch": 0.8669603524229075, + "grad_norm": 1.2899412950022648, + "learning_rate": 1.2989147679253531e-05, + "loss": 0.6494930982589722, + "step": 2460 + }, + { + "epoch": 0.8673127753303965, + "grad_norm": 1.5543724658760723, + "learning_rate": 1.2983587160244602e-05, + "loss": 0.6498425006866455, + "step": 2461 + }, + { + "epoch": 0.8676651982378855, + "grad_norm": 1.5210228165977844, + "learning_rate": 1.2978025628400684e-05, + "loss": 0.635313093662262, + "step": 2462 + }, + { + "epoch": 0.8680176211453744, + "grad_norm": 1.500755936886382, + "learning_rate": 1.2972463085609744e-05, + "loss": 0.6892971992492676, + "step": 2463 + }, + { + "epoch": 0.8683700440528634, + "grad_norm": 1.3872566957567176, + "learning_rate": 1.2966899533760095e-05, + "loss": 0.691922128200531, + "step": 2464 + }, + { + "epoch": 0.8687224669603524, + "grad_norm": 1.773327696286038, + "learning_rate": 1.2961334974740386e-05, + "loss": 0.5764378309249878, + "step": 2465 + }, + { + "epoch": 0.8690748898678414, + "grad_norm": 1.6231464224655543, + "learning_rate": 1.2955769410439616e-05, + "loss": 0.8193005919456482, + "step": 2466 + }, + { + "epoch": 0.8694273127753304, + "grad_norm": 1.4243504226778951, + "learning_rate": 1.2950202842747115e-05, + "loss": 0.6141501665115356, + "step": 2467 + }, + { + "epoch": 0.8697797356828194, + "grad_norm": 1.5061592811010869, + "learning_rate": 1.2944635273552565e-05, + "loss": 0.7464454174041748, + "step": 2468 + }, + { + "epoch": 0.8701321585903083, + "grad_norm": 1.3349759192393535, + "learning_rate": 1.293906670474598e-05, + "loss": 0.5970025062561035, + "step": 2469 + }, + { + "epoch": 0.8704845814977974, + "grad_norm": 1.6022434524431073, + "learning_rate": 1.2933497138217714e-05, + "loss": 0.7247673273086548, + "step": 2470 + }, + { + "epoch": 0.8708370044052863, + "grad_norm": 1.535051650641408, + "learning_rate": 1.2927926575858463e-05, + "loss": 0.746272087097168, + "step": 2471 + }, + { + "epoch": 0.8711894273127754, + "grad_norm": 1.5072596947359789, + "learning_rate": 1.2922355019559265e-05, + "loss": 0.6918776035308838, + "step": 2472 + }, + { + "epoch": 0.8715418502202643, + "grad_norm": 1.553343209452483, + "learning_rate": 1.2916782471211478e-05, + "loss": 0.6056039929389954, + "step": 2473 + }, + { + "epoch": 0.8718942731277532, + "grad_norm": 1.3670048649799473, + "learning_rate": 1.2911208932706821e-05, + "loss": 0.6699481010437012, + "step": 2474 + }, + { + "epoch": 0.8722466960352423, + "grad_norm": 1.4719810242076543, + "learning_rate": 1.2905634405937327e-05, + "loss": 0.5141814947128296, + "step": 2475 + }, + { + "epoch": 0.8725991189427312, + "grad_norm": 1.5819338229003952, + "learning_rate": 1.2900058892795383e-05, + "loss": 0.7521284818649292, + "step": 2476 + }, + { + "epoch": 0.8729515418502203, + "grad_norm": 2.2082732494247916, + "learning_rate": 1.2894482395173695e-05, + "loss": 0.6878937482833862, + "step": 2477 + }, + { + "epoch": 0.8733039647577092, + "grad_norm": 1.3942904192465777, + "learning_rate": 1.2888904914965317e-05, + "loss": 0.5963379144668579, + "step": 2478 + }, + { + "epoch": 0.8736563876651983, + "grad_norm": 1.7634340153188761, + "learning_rate": 1.2883326454063623e-05, + "loss": 0.7572320103645325, + "step": 2479 + }, + { + "epoch": 0.8740088105726872, + "grad_norm": 1.399026210420982, + "learning_rate": 1.2877747014362334e-05, + "loss": 0.7047982215881348, + "step": 2480 + }, + { + "epoch": 0.8743612334801762, + "grad_norm": 2.0588397887454715, + "learning_rate": 1.2872166597755488e-05, + "loss": 0.6449024677276611, + "step": 2481 + }, + { + "epoch": 0.8747136563876652, + "grad_norm": 1.6446468607591163, + "learning_rate": 1.2866585206137469e-05, + "loss": 0.7590922117233276, + "step": 2482 + }, + { + "epoch": 0.8750660792951542, + "grad_norm": 1.6164965426300901, + "learning_rate": 1.2861002841402983e-05, + "loss": 0.7534210085868835, + "step": 2483 + }, + { + "epoch": 0.8754185022026432, + "grad_norm": 1.9198456186069754, + "learning_rate": 1.2855419505447073e-05, + "loss": 0.7091225385665894, + "step": 2484 + }, + { + "epoch": 0.8757709251101321, + "grad_norm": 1.5347710098555305, + "learning_rate": 1.2849835200165104e-05, + "loss": 0.7578933835029602, + "step": 2485 + }, + { + "epoch": 0.8761233480176212, + "grad_norm": 1.3282869408675961, + "learning_rate": 1.2844249927452771e-05, + "loss": 0.5938349962234497, + "step": 2486 + }, + { + "epoch": 0.8764757709251101, + "grad_norm": 1.5090052513716286, + "learning_rate": 1.2838663689206108e-05, + "loss": 0.5726315379142761, + "step": 2487 + }, + { + "epoch": 0.8768281938325991, + "grad_norm": 1.450396836473225, + "learning_rate": 1.2833076487321465e-05, + "loss": 0.8181554079055786, + "step": 2488 + }, + { + "epoch": 0.8771806167400881, + "grad_norm": 1.71919397348368, + "learning_rate": 1.2827488323695522e-05, + "loss": 0.7465275526046753, + "step": 2489 + }, + { + "epoch": 0.8775330396475771, + "grad_norm": 1.2623461784182488, + "learning_rate": 1.2821899200225288e-05, + "loss": 0.6083456873893738, + "step": 2490 + }, + { + "epoch": 0.877885462555066, + "grad_norm": 1.4922167619772364, + "learning_rate": 1.2816309118808095e-05, + "loss": 0.6393307447433472, + "step": 2491 + }, + { + "epoch": 0.8782378854625551, + "grad_norm": 1.3846980777960398, + "learning_rate": 1.2810718081341604e-05, + "loss": 0.6562504768371582, + "step": 2492 + }, + { + "epoch": 0.878590308370044, + "grad_norm": 1.5590691123255283, + "learning_rate": 1.2805126089723798e-05, + "loss": 0.6737300753593445, + "step": 2493 + }, + { + "epoch": 0.8789427312775331, + "grad_norm": 1.7724399876158112, + "learning_rate": 1.2799533145852982e-05, + "loss": 0.6246815919876099, + "step": 2494 + }, + { + "epoch": 0.879295154185022, + "grad_norm": 1.7718655540042538, + "learning_rate": 1.2793939251627788e-05, + "loss": 0.7499577403068542, + "step": 2495 + }, + { + "epoch": 0.8796475770925111, + "grad_norm": 1.6628095797742937, + "learning_rate": 1.2788344408947171e-05, + "loss": 0.7645655870437622, + "step": 2496 + }, + { + "epoch": 0.88, + "grad_norm": 1.732888201165417, + "learning_rate": 1.27827486197104e-05, + "loss": 0.7407524585723877, + "step": 2497 + }, + { + "epoch": 0.8803524229074889, + "grad_norm": 1.590151572985607, + "learning_rate": 1.2777151885817078e-05, + "loss": 0.6401108503341675, + "step": 2498 + }, + { + "epoch": 0.880704845814978, + "grad_norm": 1.5984459598023502, + "learning_rate": 1.2771554209167116e-05, + "loss": 0.8332269191741943, + "step": 2499 + }, + { + "epoch": 0.8810572687224669, + "grad_norm": 1.61859187638703, + "learning_rate": 1.2765955591660757e-05, + "loss": 0.7677830457687378, + "step": 2500 + }, + { + "epoch": 0.881409691629956, + "grad_norm": 1.4420535275594295, + "learning_rate": 1.2760356035198553e-05, + "loss": 0.8532943725585938, + "step": 2501 + }, + { + "epoch": 0.8817621145374449, + "grad_norm": 1.3662949943021319, + "learning_rate": 1.2754755541681384e-05, + "loss": 0.6287009716033936, + "step": 2502 + }, + { + "epoch": 0.882114537444934, + "grad_norm": 1.38981570117233, + "learning_rate": 1.2749154113010432e-05, + "loss": 0.7039133310317993, + "step": 2503 + }, + { + "epoch": 0.8824669603524229, + "grad_norm": 1.6518390089780828, + "learning_rate": 1.2743551751087222e-05, + "loss": 0.6959357857704163, + "step": 2504 + }, + { + "epoch": 0.8828193832599119, + "grad_norm": 1.3554006828606007, + "learning_rate": 1.2737948457813571e-05, + "loss": 0.6862938404083252, + "step": 2505 + }, + { + "epoch": 0.8831718061674009, + "grad_norm": 1.6773466383223146, + "learning_rate": 1.273234423509163e-05, + "loss": 0.6903352737426758, + "step": 2506 + }, + { + "epoch": 0.8835242290748899, + "grad_norm": 1.374322606051121, + "learning_rate": 1.2726739084823851e-05, + "loss": 0.7226145267486572, + "step": 2507 + }, + { + "epoch": 0.8838766519823789, + "grad_norm": 1.4091144718113782, + "learning_rate": 1.2721133008913015e-05, + "loss": 0.7865043878555298, + "step": 2508 + }, + { + "epoch": 0.8842290748898678, + "grad_norm": 1.4501170174913356, + "learning_rate": 1.2715526009262209e-05, + "loss": 0.6594572067260742, + "step": 2509 + }, + { + "epoch": 0.8845814977973568, + "grad_norm": 1.3500042347590218, + "learning_rate": 1.270991808777483e-05, + "loss": 0.5967481136322021, + "step": 2510 + }, + { + "epoch": 0.8849339207048458, + "grad_norm": 1.3600104271689806, + "learning_rate": 1.2704309246354599e-05, + "loss": 0.7843632698059082, + "step": 2511 + }, + { + "epoch": 0.8852863436123348, + "grad_norm": 1.3543191802484777, + "learning_rate": 1.2698699486905538e-05, + "loss": 0.7475506067276001, + "step": 2512 + }, + { + "epoch": 0.8856387665198238, + "grad_norm": 1.4881501151953718, + "learning_rate": 1.2693088811331987e-05, + "loss": 0.8082534670829773, + "step": 2513 + }, + { + "epoch": 0.8859911894273128, + "grad_norm": 1.6899694353159702, + "learning_rate": 1.2687477221538598e-05, + "loss": 0.7421785593032837, + "step": 2514 + }, + { + "epoch": 0.8863436123348017, + "grad_norm": 1.295151070825849, + "learning_rate": 1.2681864719430328e-05, + "loss": 0.6268718242645264, + "step": 2515 + }, + { + "epoch": 0.8866960352422908, + "grad_norm": 1.595396389533138, + "learning_rate": 1.2676251306912448e-05, + "loss": 0.7285459041595459, + "step": 2516 + }, + { + "epoch": 0.8870484581497797, + "grad_norm": 1.4826705601530517, + "learning_rate": 1.2670636985890542e-05, + "loss": 0.6132184267044067, + "step": 2517 + }, + { + "epoch": 0.8874008810572688, + "grad_norm": 1.4018565352445778, + "learning_rate": 1.2665021758270488e-05, + "loss": 0.5550754070281982, + "step": 2518 + }, + { + "epoch": 0.8877533039647577, + "grad_norm": 1.3628132273232696, + "learning_rate": 1.2659405625958488e-05, + "loss": 0.5357390642166138, + "step": 2519 + }, + { + "epoch": 0.8881057268722466, + "grad_norm": 1.4153066703364516, + "learning_rate": 1.2653788590861039e-05, + "loss": 0.5858328342437744, + "step": 2520 + }, + { + "epoch": 0.8884581497797357, + "grad_norm": 1.731815068535558, + "learning_rate": 1.2648170654884955e-05, + "loss": 0.7109283208847046, + "step": 2521 + }, + { + "epoch": 0.8888105726872246, + "grad_norm": 1.9753429482306435, + "learning_rate": 1.2642551819937348e-05, + "loss": 0.808137834072113, + "step": 2522 + }, + { + "epoch": 0.8891629955947137, + "grad_norm": 1.6385693606484741, + "learning_rate": 1.2636932087925637e-05, + "loss": 0.587998628616333, + "step": 2523 + }, + { + "epoch": 0.8895154185022026, + "grad_norm": 1.4234526769499198, + "learning_rate": 1.2631311460757545e-05, + "loss": 0.5555537343025208, + "step": 2524 + }, + { + "epoch": 0.8898678414096917, + "grad_norm": 1.4118650122814267, + "learning_rate": 1.2625689940341102e-05, + "loss": 0.641632080078125, + "step": 2525 + }, + { + "epoch": 0.8902202643171806, + "grad_norm": 1.5401015682174186, + "learning_rate": 1.262006752858464e-05, + "loss": 0.7005184888839722, + "step": 2526 + }, + { + "epoch": 0.8905726872246696, + "grad_norm": 1.272518513643159, + "learning_rate": 1.2614444227396792e-05, + "loss": 0.6907261610031128, + "step": 2527 + }, + { + "epoch": 0.8909251101321586, + "grad_norm": 1.4162379009723582, + "learning_rate": 1.2608820038686492e-05, + "loss": 0.5757718086242676, + "step": 2528 + }, + { + "epoch": 0.8912775330396476, + "grad_norm": 1.888252337049927, + "learning_rate": 1.2603194964362979e-05, + "loss": 0.6462569832801819, + "step": 2529 + }, + { + "epoch": 0.8916299559471366, + "grad_norm": 2.6509089623338586, + "learning_rate": 1.2597569006335787e-05, + "loss": 0.7028999328613281, + "step": 2530 + }, + { + "epoch": 0.8919823788546255, + "grad_norm": 1.3325876541370223, + "learning_rate": 1.2591942166514763e-05, + "loss": 0.5789325833320618, + "step": 2531 + }, + { + "epoch": 0.8923348017621145, + "grad_norm": 1.5373223041612576, + "learning_rate": 1.258631444681003e-05, + "loss": 0.6545255184173584, + "step": 2532 + }, + { + "epoch": 0.8926872246696035, + "grad_norm": 1.560686991488605, + "learning_rate": 1.258068584913204e-05, + "loss": 0.7227469682693481, + "step": 2533 + }, + { + "epoch": 0.8930396475770925, + "grad_norm": 1.3545909427052794, + "learning_rate": 1.2575056375391513e-05, + "loss": 0.5985771417617798, + "step": 2534 + }, + { + "epoch": 0.8933920704845815, + "grad_norm": 1.5422643503857134, + "learning_rate": 1.2569426027499485e-05, + "loss": 0.6705960035324097, + "step": 2535 + }, + { + "epoch": 0.8937444933920705, + "grad_norm": 1.5427105799340322, + "learning_rate": 1.2563794807367284e-05, + "loss": 0.6662027835845947, + "step": 2536 + }, + { + "epoch": 0.8940969162995595, + "grad_norm": 1.5270286613671318, + "learning_rate": 1.2558162716906537e-05, + "loss": 0.7742453813552856, + "step": 2537 + }, + { + "epoch": 0.8944493392070485, + "grad_norm": 1.628032718158035, + "learning_rate": 1.255252975802916e-05, + "loss": 0.6124528050422668, + "step": 2538 + }, + { + "epoch": 0.8948017621145374, + "grad_norm": 1.455711423520218, + "learning_rate": 1.2546895932647365e-05, + "loss": 0.5728615522384644, + "step": 2539 + }, + { + "epoch": 0.8951541850220265, + "grad_norm": 1.5737389396802581, + "learning_rate": 1.2541261242673665e-05, + "loss": 0.6347167491912842, + "step": 2540 + }, + { + "epoch": 0.8955066079295154, + "grad_norm": 1.62324317727844, + "learning_rate": 1.2535625690020861e-05, + "loss": 0.6350656747817993, + "step": 2541 + }, + { + "epoch": 0.8958590308370044, + "grad_norm": 1.674339310689998, + "learning_rate": 1.2529989276602043e-05, + "loss": 0.7538303732872009, + "step": 2542 + }, + { + "epoch": 0.8962114537444934, + "grad_norm": 1.5900983527544528, + "learning_rate": 1.2524352004330607e-05, + "loss": 0.8154318928718567, + "step": 2543 + }, + { + "epoch": 0.8965638766519823, + "grad_norm": 1.4033932104877718, + "learning_rate": 1.2518713875120222e-05, + "loss": 0.5313037633895874, + "step": 2544 + }, + { + "epoch": 0.8969162995594714, + "grad_norm": 1.3069539051845793, + "learning_rate": 1.2513074890884864e-05, + "loss": 0.740921139717102, + "step": 2545 + }, + { + "epoch": 0.8972687224669603, + "grad_norm": 1.593785966579892, + "learning_rate": 1.250743505353879e-05, + "loss": 0.6079888343811035, + "step": 2546 + }, + { + "epoch": 0.8976211453744494, + "grad_norm": 1.266024042192646, + "learning_rate": 1.2501794364996553e-05, + "loss": 0.46736663579940796, + "step": 2547 + }, + { + "epoch": 0.8979735682819383, + "grad_norm": 1.5066472302506413, + "learning_rate": 1.2496152827172982e-05, + "loss": 0.5670880079269409, + "step": 2548 + }, + { + "epoch": 0.8983259911894274, + "grad_norm": 1.4991563073413907, + "learning_rate": 1.2490510441983212e-05, + "loss": 0.7845931649208069, + "step": 2549 + }, + { + "epoch": 0.8986784140969163, + "grad_norm": 1.5458127280177445, + "learning_rate": 1.2484867211342653e-05, + "loss": 0.5625143647193909, + "step": 2550 + }, + { + "epoch": 0.8990308370044053, + "grad_norm": 1.5409896244330605, + "learning_rate": 1.2479223137167011e-05, + "loss": 0.6631217002868652, + "step": 2551 + }, + { + "epoch": 0.8993832599118943, + "grad_norm": 1.6071757454969378, + "learning_rate": 1.247357822137227e-05, + "loss": 0.6588548421859741, + "step": 2552 + }, + { + "epoch": 0.8997356828193832, + "grad_norm": 1.4192601474848106, + "learning_rate": 1.24679324658747e-05, + "loss": 0.8046029806137085, + "step": 2553 + }, + { + "epoch": 0.9000881057268723, + "grad_norm": 1.6272051463241026, + "learning_rate": 1.2462285872590862e-05, + "loss": 0.6651894450187683, + "step": 2554 + }, + { + "epoch": 0.9004405286343612, + "grad_norm": 1.5179002680249722, + "learning_rate": 1.2456638443437605e-05, + "loss": 0.5888474583625793, + "step": 2555 + }, + { + "epoch": 0.9007929515418502, + "grad_norm": 1.7319345866859506, + "learning_rate": 1.2450990180332045e-05, + "loss": 0.5915735363960266, + "step": 2556 + }, + { + "epoch": 0.9011453744493392, + "grad_norm": 1.5409991319630119, + "learning_rate": 1.24453410851916e-05, + "loss": 0.6830431222915649, + "step": 2557 + }, + { + "epoch": 0.9014977973568282, + "grad_norm": 1.3954767744454935, + "learning_rate": 1.2439691159933955e-05, + "loss": 0.6812379956245422, + "step": 2558 + }, + { + "epoch": 0.9018502202643172, + "grad_norm": 1.3481753587360845, + "learning_rate": 1.2434040406477092e-05, + "loss": 0.6887152791023254, + "step": 2559 + }, + { + "epoch": 0.9022026431718062, + "grad_norm": 1.495436388275929, + "learning_rate": 1.2428388826739254e-05, + "loss": 0.677071213722229, + "step": 2560 + }, + { + "epoch": 0.9025550660792951, + "grad_norm": 1.5809198519920526, + "learning_rate": 1.242273642263899e-05, + "loss": 0.6635652780532837, + "step": 2561 + }, + { + "epoch": 0.9029074889867842, + "grad_norm": 1.7455357614962055, + "learning_rate": 1.2417083196095105e-05, + "loss": 0.7543712854385376, + "step": 2562 + }, + { + "epoch": 0.9032599118942731, + "grad_norm": 1.743758273604275, + "learning_rate": 1.2411429149026701e-05, + "loss": 0.6219073534011841, + "step": 2563 + }, + { + "epoch": 0.9036123348017621, + "grad_norm": 1.360518097358955, + "learning_rate": 1.2405774283353144e-05, + "loss": 0.6576533317565918, + "step": 2564 + }, + { + "epoch": 0.9039647577092511, + "grad_norm": 1.3683846685040542, + "learning_rate": 1.240011860099409e-05, + "loss": 0.6458585262298584, + "step": 2565 + }, + { + "epoch": 0.90431718061674, + "grad_norm": 1.5753618523282886, + "learning_rate": 1.2394462103869464e-05, + "loss": 0.6943198442459106, + "step": 2566 + }, + { + "epoch": 0.9046696035242291, + "grad_norm": 1.5425443594991994, + "learning_rate": 1.2388804793899473e-05, + "loss": 0.6684235334396362, + "step": 2567 + }, + { + "epoch": 0.905022026431718, + "grad_norm": 1.4432793187881665, + "learning_rate": 1.2383146673004598e-05, + "loss": 0.6707017421722412, + "step": 2568 + }, + { + "epoch": 0.9053744493392071, + "grad_norm": 1.4610510830510222, + "learning_rate": 1.2377487743105593e-05, + "loss": 0.6009544134140015, + "step": 2569 + }, + { + "epoch": 0.905726872246696, + "grad_norm": 1.3343070463381261, + "learning_rate": 1.2371828006123488e-05, + "loss": 0.57770836353302, + "step": 2570 + }, + { + "epoch": 0.9060792951541851, + "grad_norm": 1.50423514822828, + "learning_rate": 1.236616746397959e-05, + "loss": 0.6146866083145142, + "step": 2571 + }, + { + "epoch": 0.906431718061674, + "grad_norm": 1.4060902038910876, + "learning_rate": 1.2360506118595476e-05, + "loss": 0.6374951601028442, + "step": 2572 + }, + { + "epoch": 0.906784140969163, + "grad_norm": 1.5006132241656203, + "learning_rate": 1.2354843971892998e-05, + "loss": 0.6933800578117371, + "step": 2573 + }, + { + "epoch": 0.907136563876652, + "grad_norm": 1.6402374081466708, + "learning_rate": 1.2349181025794278e-05, + "loss": 0.857126772403717, + "step": 2574 + }, + { + "epoch": 0.9074889867841409, + "grad_norm": 1.7970464713795387, + "learning_rate": 1.2343517282221704e-05, + "loss": 0.7316192388534546, + "step": 2575 + }, + { + "epoch": 0.90784140969163, + "grad_norm": 1.7338748475900745, + "learning_rate": 1.2337852743097947e-05, + "loss": 0.7916824817657471, + "step": 2576 + }, + { + "epoch": 0.9081938325991189, + "grad_norm": 1.342845056559204, + "learning_rate": 1.2332187410345941e-05, + "loss": 0.6437021493911743, + "step": 2577 + }, + { + "epoch": 0.908546255506608, + "grad_norm": 1.547322536503476, + "learning_rate": 1.2326521285888892e-05, + "loss": 0.8788109421730042, + "step": 2578 + }, + { + "epoch": 0.9088986784140969, + "grad_norm": 1.4382005842040866, + "learning_rate": 1.2320854371650268e-05, + "loss": 0.704395055770874, + "step": 2579 + }, + { + "epoch": 0.9092511013215859, + "grad_norm": 1.410037340911335, + "learning_rate": 1.2315186669553814e-05, + "loss": 0.6579844951629639, + "step": 2580 + }, + { + "epoch": 0.9096035242290749, + "grad_norm": 1.3089054036910626, + "learning_rate": 1.2309518181523537e-05, + "loss": 0.6329941749572754, + "step": 2581 + }, + { + "epoch": 0.9099559471365639, + "grad_norm": 1.746183595307062, + "learning_rate": 1.2303848909483711e-05, + "loss": 0.8868603706359863, + "step": 2582 + }, + { + "epoch": 0.9103083700440529, + "grad_norm": 1.4531546458491524, + "learning_rate": 1.2298178855358875e-05, + "loss": 0.6402688026428223, + "step": 2583 + }, + { + "epoch": 0.9106607929515419, + "grad_norm": 1.3289180353613772, + "learning_rate": 1.2292508021073846e-05, + "loss": 0.8017194271087646, + "step": 2584 + }, + { + "epoch": 0.9110132158590308, + "grad_norm": 1.6340808373356166, + "learning_rate": 1.2286836408553687e-05, + "loss": 0.7396517992019653, + "step": 2585 + }, + { + "epoch": 0.9113656387665198, + "grad_norm": 1.5443847526543046, + "learning_rate": 1.2281164019723737e-05, + "loss": 0.6123272776603699, + "step": 2586 + }, + { + "epoch": 0.9117180616740088, + "grad_norm": 1.464544186162697, + "learning_rate": 1.2275490856509591e-05, + "loss": 0.7675807476043701, + "step": 2587 + }, + { + "epoch": 0.9120704845814978, + "grad_norm": 1.67164115622116, + "learning_rate": 1.2269816920837121e-05, + "loss": 0.6814998388290405, + "step": 2588 + }, + { + "epoch": 0.9124229074889868, + "grad_norm": 1.3228366401729674, + "learning_rate": 1.2264142214632441e-05, + "loss": 0.6290348768234253, + "step": 2589 + }, + { + "epoch": 0.9127753303964757, + "grad_norm": 1.5676260945728981, + "learning_rate": 1.2258466739821946e-05, + "loss": 0.6752464175224304, + "step": 2590 + }, + { + "epoch": 0.9131277533039648, + "grad_norm": 1.3388236473063337, + "learning_rate": 1.2252790498332275e-05, + "loss": 0.6153687238693237, + "step": 2591 + }, + { + "epoch": 0.9134801762114537, + "grad_norm": 1.5346187118504635, + "learning_rate": 1.2247113492090344e-05, + "loss": 0.5952479839324951, + "step": 2592 + }, + { + "epoch": 0.9138325991189428, + "grad_norm": 1.4457638395568853, + "learning_rate": 1.2241435723023309e-05, + "loss": 0.5457659959793091, + "step": 2593 + }, + { + "epoch": 0.9141850220264317, + "grad_norm": 1.5389040689398128, + "learning_rate": 1.2235757193058607e-05, + "loss": 0.7373491525650024, + "step": 2594 + }, + { + "epoch": 0.9145374449339208, + "grad_norm": 1.3149945847764668, + "learning_rate": 1.2230077904123914e-05, + "loss": 0.6564488410949707, + "step": 2595 + }, + { + "epoch": 0.9148898678414097, + "grad_norm": 1.8716233271125673, + "learning_rate": 1.2224397858147176e-05, + "loss": 0.6790947914123535, + "step": 2596 + }, + { + "epoch": 0.9152422907488986, + "grad_norm": 1.6467277287942856, + "learning_rate": 1.2218717057056592e-05, + "loss": 0.8304486274719238, + "step": 2597 + }, + { + "epoch": 0.9155947136563877, + "grad_norm": 1.7018746535629268, + "learning_rate": 1.2213035502780616e-05, + "loss": 0.7452701330184937, + "step": 2598 + }, + { + "epoch": 0.9159471365638766, + "grad_norm": 1.270448247487427, + "learning_rate": 1.2207353197247957e-05, + "loss": 0.572200357913971, + "step": 2599 + }, + { + "epoch": 0.9162995594713657, + "grad_norm": 1.574291214704138, + "learning_rate": 1.2201670142387587e-05, + "loss": 0.7142342925071716, + "step": 2600 + }, + { + "epoch": 0.9166519823788546, + "grad_norm": 1.367606009894927, + "learning_rate": 1.219598634012872e-05, + "loss": 0.9390528202056885, + "step": 2601 + }, + { + "epoch": 0.9170044052863436, + "grad_norm": 1.6870829349403977, + "learning_rate": 1.2190301792400832e-05, + "loss": 0.6897540092468262, + "step": 2602 + }, + { + "epoch": 0.9173568281938326, + "grad_norm": 1.5631074773710765, + "learning_rate": 1.2184616501133649e-05, + "loss": 0.7309582233428955, + "step": 2603 + }, + { + "epoch": 0.9177092511013216, + "grad_norm": 1.4956685909345118, + "learning_rate": 1.2178930468257154e-05, + "loss": 0.7692370414733887, + "step": 2604 + }, + { + "epoch": 0.9180616740088106, + "grad_norm": 1.6160577913139176, + "learning_rate": 1.2173243695701575e-05, + "loss": 0.7650456428527832, + "step": 2605 + }, + { + "epoch": 0.9184140969162996, + "grad_norm": 1.4419682356133905, + "learning_rate": 1.2167556185397396e-05, + "loss": 0.6000699996948242, + "step": 2606 + }, + { + "epoch": 0.9187665198237885, + "grad_norm": 1.368037173998054, + "learning_rate": 1.2161867939275344e-05, + "loss": 0.6227651834487915, + "step": 2607 + }, + { + "epoch": 0.9191189427312776, + "grad_norm": 1.3507337866227296, + "learning_rate": 1.2156178959266414e-05, + "loss": 0.6554160118103027, + "step": 2608 + }, + { + "epoch": 0.9194713656387665, + "grad_norm": 1.4986959017577084, + "learning_rate": 1.2150489247301826e-05, + "loss": 0.5360773801803589, + "step": 2609 + }, + { + "epoch": 0.9198237885462555, + "grad_norm": 1.3546990782009203, + "learning_rate": 1.2144798805313065e-05, + "loss": 0.7184062004089355, + "step": 2610 + }, + { + "epoch": 0.9201762114537445, + "grad_norm": 1.6293146255106934, + "learning_rate": 1.2139107635231857e-05, + "loss": 0.646910548210144, + "step": 2611 + }, + { + "epoch": 0.9205286343612334, + "grad_norm": 1.449047238736513, + "learning_rate": 1.2133415738990179e-05, + "loss": 0.7794413566589355, + "step": 2612 + }, + { + "epoch": 0.9208810572687225, + "grad_norm": 1.5173448374489182, + "learning_rate": 1.2127723118520254e-05, + "loss": 0.5904654860496521, + "step": 2613 + }, + { + "epoch": 0.9212334801762114, + "grad_norm": 1.6062827687776695, + "learning_rate": 1.2122029775754545e-05, + "loss": 0.5526635646820068, + "step": 2614 + }, + { + "epoch": 0.9215859030837005, + "grad_norm": 1.584080412995617, + "learning_rate": 1.2116335712625766e-05, + "loss": 0.6832528710365295, + "step": 2615 + }, + { + "epoch": 0.9219383259911894, + "grad_norm": 1.5962919739796952, + "learning_rate": 1.211064093106688e-05, + "loss": 0.5858304500579834, + "step": 2616 + }, + { + "epoch": 0.9222907488986785, + "grad_norm": 1.6542154949587857, + "learning_rate": 1.2104945433011079e-05, + "loss": 0.7383478879928589, + "step": 2617 + }, + { + "epoch": 0.9226431718061674, + "grad_norm": 1.4197774198085091, + "learning_rate": 1.2099249220391815e-05, + "loss": 0.6466768980026245, + "step": 2618 + }, + { + "epoch": 0.9229955947136564, + "grad_norm": 1.6780588288371647, + "learning_rate": 1.209355229514277e-05, + "loss": 0.5681238174438477, + "step": 2619 + }, + { + "epoch": 0.9233480176211454, + "grad_norm": 1.4473210287022626, + "learning_rate": 1.2087854659197874e-05, + "loss": 0.5726606249809265, + "step": 2620 + }, + { + "epoch": 0.9237004405286343, + "grad_norm": 1.5671254030487451, + "learning_rate": 1.2082156314491298e-05, + "loss": 0.6643342971801758, + "step": 2621 + }, + { + "epoch": 0.9240528634361234, + "grad_norm": 1.6891696074210503, + "learning_rate": 1.2076457262957454e-05, + "loss": 0.5408967733383179, + "step": 2622 + }, + { + "epoch": 0.9244052863436123, + "grad_norm": 1.503887173232949, + "learning_rate": 1.207075750653099e-05, + "loss": 0.706169843673706, + "step": 2623 + }, + { + "epoch": 0.9247577092511013, + "grad_norm": 1.7934999117659478, + "learning_rate": 1.2065057047146797e-05, + "loss": 0.7973969578742981, + "step": 2624 + }, + { + "epoch": 0.9251101321585903, + "grad_norm": 1.4120942109312036, + "learning_rate": 1.2059355886740002e-05, + "loss": 0.6907010674476624, + "step": 2625 + }, + { + "epoch": 0.9254625550660793, + "grad_norm": 1.8378017160561377, + "learning_rate": 1.2053654027245977e-05, + "loss": 0.8174253702163696, + "step": 2626 + }, + { + "epoch": 0.9258149779735683, + "grad_norm": 1.8873519737119473, + "learning_rate": 1.204795147060032e-05, + "loss": 0.60319983959198, + "step": 2627 + }, + { + "epoch": 0.9261674008810573, + "grad_norm": 2.916318649806586, + "learning_rate": 1.204224821873887e-05, + "loss": 0.718228816986084, + "step": 2628 + }, + { + "epoch": 0.9265198237885462, + "grad_norm": 1.5801609410641386, + "learning_rate": 1.2036544273597708e-05, + "loss": 0.7385132312774658, + "step": 2629 + }, + { + "epoch": 0.9268722466960353, + "grad_norm": 1.5320403236251587, + "learning_rate": 1.203083963711315e-05, + "loss": 0.7700635194778442, + "step": 2630 + }, + { + "epoch": 0.9272246696035242, + "grad_norm": 1.4381703720368488, + "learning_rate": 1.2025134311221732e-05, + "loss": 0.8767666816711426, + "step": 2631 + }, + { + "epoch": 0.9275770925110132, + "grad_norm": 1.4534975042510074, + "learning_rate": 1.2019428297860241e-05, + "loss": 0.6517986059188843, + "step": 2632 + }, + { + "epoch": 0.9279295154185022, + "grad_norm": 1.3295910752440807, + "learning_rate": 1.2013721598965688e-05, + "loss": 0.5967941284179688, + "step": 2633 + }, + { + "epoch": 0.9282819383259912, + "grad_norm": 2.085302745009741, + "learning_rate": 1.2008014216475327e-05, + "loss": 0.7480533123016357, + "step": 2634 + }, + { + "epoch": 0.9286343612334802, + "grad_norm": 1.415633444981562, + "learning_rate": 1.2002306152326626e-05, + "loss": 0.8020488023757935, + "step": 2635 + }, + { + "epoch": 0.9289867841409691, + "grad_norm": 1.235581839334599, + "learning_rate": 1.1996597408457302e-05, + "loss": 0.5535889863967896, + "step": 2636 + }, + { + "epoch": 0.9293392070484582, + "grad_norm": 1.5093780754929471, + "learning_rate": 1.1990887986805295e-05, + "loss": 0.6888864040374756, + "step": 2637 + }, + { + "epoch": 0.9296916299559471, + "grad_norm": 1.761723253773031, + "learning_rate": 1.1985177889308777e-05, + "loss": 0.7723515033721924, + "step": 2638 + }, + { + "epoch": 0.9300440528634362, + "grad_norm": 2.4386861549294476, + "learning_rate": 1.1979467117906143e-05, + "loss": 0.6929488182067871, + "step": 2639 + }, + { + "epoch": 0.9303964757709251, + "grad_norm": 1.7413716913523498, + "learning_rate": 1.1973755674536027e-05, + "loss": 0.7025216221809387, + "step": 2640 + }, + { + "epoch": 0.9307488986784141, + "grad_norm": 1.5278537581621425, + "learning_rate": 1.1968043561137287e-05, + "loss": 0.6618740558624268, + "step": 2641 + }, + { + "epoch": 0.9311013215859031, + "grad_norm": 1.3720349025623486, + "learning_rate": 1.1962330779649002e-05, + "loss": 0.5308352708816528, + "step": 2642 + }, + { + "epoch": 0.931453744493392, + "grad_norm": 1.6043152610659495, + "learning_rate": 1.1956617332010488e-05, + "loss": 0.6559470891952515, + "step": 2643 + }, + { + "epoch": 0.9318061674008811, + "grad_norm": 1.5758989244918422, + "learning_rate": 1.1950903220161286e-05, + "loss": 0.6572221517562866, + "step": 2644 + }, + { + "epoch": 0.93215859030837, + "grad_norm": 1.7357943090474917, + "learning_rate": 1.194518844604115e-05, + "loss": 0.7854161262512207, + "step": 2645 + }, + { + "epoch": 0.932511013215859, + "grad_norm": 1.555855365183626, + "learning_rate": 1.1939473011590075e-05, + "loss": 0.6471760869026184, + "step": 2646 + }, + { + "epoch": 0.932863436123348, + "grad_norm": 1.5672890328663938, + "learning_rate": 1.1933756918748271e-05, + "loss": 0.6261184215545654, + "step": 2647 + }, + { + "epoch": 0.933215859030837, + "grad_norm": 1.425764950800843, + "learning_rate": 1.1928040169456176e-05, + "loss": 0.6876180171966553, + "step": 2648 + }, + { + "epoch": 0.933568281938326, + "grad_norm": 1.6203483271740744, + "learning_rate": 1.1922322765654446e-05, + "loss": 0.6782447099685669, + "step": 2649 + }, + { + "epoch": 0.933920704845815, + "grad_norm": 1.640471126849017, + "learning_rate": 1.1916604709283958e-05, + "loss": 0.6085894107818604, + "step": 2650 + }, + { + "epoch": 0.934273127753304, + "grad_norm": 1.6964969219798813, + "learning_rate": 1.1910886002285822e-05, + "loss": 0.6940577030181885, + "step": 2651 + }, + { + "epoch": 0.934625550660793, + "grad_norm": 1.4704189591593113, + "learning_rate": 1.1905166646601356e-05, + "loss": 0.8204144239425659, + "step": 2652 + }, + { + "epoch": 0.9349779735682819, + "grad_norm": 1.389489538033466, + "learning_rate": 1.1899446644172106e-05, + "loss": 0.6184309720993042, + "step": 2653 + }, + { + "epoch": 0.9353303964757709, + "grad_norm": 2.1507675107714306, + "learning_rate": 1.1893725996939831e-05, + "loss": 0.7499250173568726, + "step": 2654 + }, + { + "epoch": 0.9356828193832599, + "grad_norm": 1.739709417281562, + "learning_rate": 1.1888004706846519e-05, + "loss": 0.7021058797836304, + "step": 2655 + }, + { + "epoch": 0.9360352422907489, + "grad_norm": 1.4311959050457856, + "learning_rate": 1.188228277583436e-05, + "loss": 0.6005666255950928, + "step": 2656 + }, + { + "epoch": 0.9363876651982379, + "grad_norm": 1.4910024814198868, + "learning_rate": 1.1876560205845782e-05, + "loss": 0.6572481393814087, + "step": 2657 + }, + { + "epoch": 0.9367400881057268, + "grad_norm": 1.5258435486694566, + "learning_rate": 1.187083699882341e-05, + "loss": 0.7402434349060059, + "step": 2658 + }, + { + "epoch": 0.9370925110132159, + "grad_norm": 1.4352893489445113, + "learning_rate": 1.1865113156710106e-05, + "loss": 0.6693596243858337, + "step": 2659 + }, + { + "epoch": 0.9374449339207048, + "grad_norm": 1.6704808140330663, + "learning_rate": 1.1859388681448925e-05, + "loss": 0.7708617448806763, + "step": 2660 + }, + { + "epoch": 0.9377973568281939, + "grad_norm": 1.4245143913781195, + "learning_rate": 1.1853663574983154e-05, + "loss": 0.5871701836585999, + "step": 2661 + }, + { + "epoch": 0.9381497797356828, + "grad_norm": 1.505716027406483, + "learning_rate": 1.1847937839256287e-05, + "loss": 0.6492994427680969, + "step": 2662 + }, + { + "epoch": 0.9385022026431719, + "grad_norm": 1.3908643684674444, + "learning_rate": 1.1842211476212038e-05, + "loss": 0.6803429126739502, + "step": 2663 + }, + { + "epoch": 0.9388546255506608, + "grad_norm": 1.5017846140199234, + "learning_rate": 1.1836484487794318e-05, + "loss": 0.5602244734764099, + "step": 2664 + }, + { + "epoch": 0.9392070484581497, + "grad_norm": 1.2797636855685697, + "learning_rate": 1.183075687594727e-05, + "loss": 0.6562157869338989, + "step": 2665 + }, + { + "epoch": 0.9395594713656388, + "grad_norm": 1.4855818018568143, + "learning_rate": 1.182502864261524e-05, + "loss": 0.71474289894104, + "step": 2666 + }, + { + "epoch": 0.9399118942731277, + "grad_norm": 1.5995143445420303, + "learning_rate": 1.1819299789742782e-05, + "loss": 0.7130062580108643, + "step": 2667 + }, + { + "epoch": 0.9402643171806168, + "grad_norm": 1.645740195320987, + "learning_rate": 1.1813570319274663e-05, + "loss": 0.788813054561615, + "step": 2668 + }, + { + "epoch": 0.9406167400881057, + "grad_norm": 1.965041520497338, + "learning_rate": 1.1807840233155863e-05, + "loss": 0.6485022306442261, + "step": 2669 + }, + { + "epoch": 0.9409691629955947, + "grad_norm": 1.6399057690578631, + "learning_rate": 1.1802109533331562e-05, + "loss": 0.4491521418094635, + "step": 2670 + }, + { + "epoch": 0.9413215859030837, + "grad_norm": 1.6744760497066637, + "learning_rate": 1.1796378221747162e-05, + "loss": 0.6073683500289917, + "step": 2671 + }, + { + "epoch": 0.9416740088105727, + "grad_norm": 1.859395754773969, + "learning_rate": 1.179064630034826e-05, + "loss": 0.5942971706390381, + "step": 2672 + }, + { + "epoch": 0.9420264317180617, + "grad_norm": 1.4303169952284007, + "learning_rate": 1.1784913771080667e-05, + "loss": 0.7295013666152954, + "step": 2673 + }, + { + "epoch": 0.9423788546255507, + "grad_norm": 1.8192026049611665, + "learning_rate": 1.1779180635890394e-05, + "loss": 0.7347372770309448, + "step": 2674 + }, + { + "epoch": 0.9427312775330396, + "grad_norm": 1.5350977995485566, + "learning_rate": 1.1773446896723668e-05, + "loss": 0.5591942667961121, + "step": 2675 + }, + { + "epoch": 0.9430837004405286, + "grad_norm": 1.5036340589436215, + "learning_rate": 1.1767712555526911e-05, + "loss": 0.822568953037262, + "step": 2676 + }, + { + "epoch": 0.9434361233480176, + "grad_norm": 1.4619836017557306, + "learning_rate": 1.1761977614246757e-05, + "loss": 0.649920642375946, + "step": 2677 + }, + { + "epoch": 0.9437885462555066, + "grad_norm": 1.4884584586985279, + "learning_rate": 1.1756242074830036e-05, + "loss": 0.6298861503601074, + "step": 2678 + }, + { + "epoch": 0.9441409691629956, + "grad_norm": 1.6194483495779424, + "learning_rate": 1.1750505939223787e-05, + "loss": 0.81938636302948, + "step": 2679 + }, + { + "epoch": 0.9444933920704845, + "grad_norm": 1.4751430048371623, + "learning_rate": 1.1744769209375248e-05, + "loss": 0.6627225875854492, + "step": 2680 + }, + { + "epoch": 0.9448458149779736, + "grad_norm": 1.310837287475738, + "learning_rate": 1.1739031887231864e-05, + "loss": 0.6563318371772766, + "step": 2681 + }, + { + "epoch": 0.9451982378854625, + "grad_norm": 1.3782616320804129, + "learning_rate": 1.1733293974741273e-05, + "loss": 0.5702694654464722, + "step": 2682 + }, + { + "epoch": 0.9455506607929516, + "grad_norm": 1.5543579440741437, + "learning_rate": 1.1727555473851321e-05, + "loss": 0.685553789138794, + "step": 2683 + }, + { + "epoch": 0.9459030837004405, + "grad_norm": 1.2085432227797441, + "learning_rate": 1.172181638651005e-05, + "loss": 0.6092622876167297, + "step": 2684 + }, + { + "epoch": 0.9462555066079296, + "grad_norm": 2.0946243925185013, + "learning_rate": 1.1716076714665701e-05, + "loss": 0.6650614738464355, + "step": 2685 + }, + { + "epoch": 0.9466079295154185, + "grad_norm": 1.6479809864443196, + "learning_rate": 1.171033646026671e-05, + "loss": 0.7665754556655884, + "step": 2686 + }, + { + "epoch": 0.9469603524229074, + "grad_norm": 1.3199886923676785, + "learning_rate": 1.1704595625261722e-05, + "loss": 0.6365277171134949, + "step": 2687 + }, + { + "epoch": 0.9473127753303965, + "grad_norm": 1.4825934002405374, + "learning_rate": 1.1698854211599565e-05, + "loss": 0.6622267961502075, + "step": 2688 + }, + { + "epoch": 0.9476651982378854, + "grad_norm": 1.4519347010464663, + "learning_rate": 1.1693112221229278e-05, + "loss": 0.6636145710945129, + "step": 2689 + }, + { + "epoch": 0.9480176211453745, + "grad_norm": 1.3381328445735352, + "learning_rate": 1.168736965610008e-05, + "loss": 0.6943212747573853, + "step": 2690 + }, + { + "epoch": 0.9483700440528634, + "grad_norm": 1.5439836232478343, + "learning_rate": 1.1681626518161397e-05, + "loss": 0.7479512691497803, + "step": 2691 + }, + { + "epoch": 0.9487224669603525, + "grad_norm": 1.5424571304173897, + "learning_rate": 1.1675882809362846e-05, + "loss": 0.7227041721343994, + "step": 2692 + }, + { + "epoch": 0.9490748898678414, + "grad_norm": 1.3855049912904343, + "learning_rate": 1.1670138531654238e-05, + "loss": 0.7366166114807129, + "step": 2693 + }, + { + "epoch": 0.9494273127753304, + "grad_norm": 1.634945701470733, + "learning_rate": 1.1664393686985571e-05, + "loss": 0.8634493350982666, + "step": 2694 + }, + { + "epoch": 0.9497797356828194, + "grad_norm": 1.3102748532201536, + "learning_rate": 1.165864827730705e-05, + "loss": 0.5802862048149109, + "step": 2695 + }, + { + "epoch": 0.9501321585903084, + "grad_norm": 1.571840947668404, + "learning_rate": 1.1652902304569053e-05, + "loss": 0.5931085348129272, + "step": 2696 + }, + { + "epoch": 0.9504845814977974, + "grad_norm": 1.7175179856841813, + "learning_rate": 1.164715577072217e-05, + "loss": 0.7684508562088013, + "step": 2697 + }, + { + "epoch": 0.9508370044052863, + "grad_norm": 1.6094834386500196, + "learning_rate": 1.1641408677717158e-05, + "loss": 0.94246906042099, + "step": 2698 + }, + { + "epoch": 0.9511894273127753, + "grad_norm": 1.3999360216133725, + "learning_rate": 1.1635661027504985e-05, + "loss": 0.7072316408157349, + "step": 2699 + }, + { + "epoch": 0.9515418502202643, + "grad_norm": 1.5926279454886292, + "learning_rate": 1.16299128220368e-05, + "loss": 0.5872572064399719, + "step": 2700 + }, + { + "epoch": 0.9518942731277533, + "grad_norm": 1.4987885212929257, + "learning_rate": 1.1624164063263931e-05, + "loss": 0.6549060344696045, + "step": 2701 + }, + { + "epoch": 0.9522466960352423, + "grad_norm": 1.6773153304869155, + "learning_rate": 1.161841475313791e-05, + "loss": 0.7338137626647949, + "step": 2702 + }, + { + "epoch": 0.9525991189427313, + "grad_norm": 1.6523970676343225, + "learning_rate": 1.161266489361045e-05, + "loss": 0.6942911148071289, + "step": 2703 + }, + { + "epoch": 0.9529515418502202, + "grad_norm": 2.037450532351288, + "learning_rate": 1.1606914486633444e-05, + "loss": 0.674375057220459, + "step": 2704 + }, + { + "epoch": 0.9533039647577093, + "grad_norm": 1.6450610385875453, + "learning_rate": 1.160116353415898e-05, + "loss": 0.6790377497673035, + "step": 2705 + }, + { + "epoch": 0.9536563876651982, + "grad_norm": 1.6724856793361191, + "learning_rate": 1.1595412038139326e-05, + "loss": 0.5902142524719238, + "step": 2706 + }, + { + "epoch": 0.9540088105726873, + "grad_norm": 1.4286047469499437, + "learning_rate": 1.1589660000526937e-05, + "loss": 0.7034019231796265, + "step": 2707 + }, + { + "epoch": 0.9543612334801762, + "grad_norm": 3.1062423334867106, + "learning_rate": 1.158390742327445e-05, + "loss": 0.6986846923828125, + "step": 2708 + }, + { + "epoch": 0.9547136563876651, + "grad_norm": 1.8367783325674814, + "learning_rate": 1.1578154308334683e-05, + "loss": 0.6972544193267822, + "step": 2709 + }, + { + "epoch": 0.9550660792951542, + "grad_norm": 1.3370474194561557, + "learning_rate": 1.1572400657660646e-05, + "loss": 0.6312702298164368, + "step": 2710 + }, + { + "epoch": 0.9554185022026431, + "grad_norm": 1.7161015062577845, + "learning_rate": 1.1566646473205518e-05, + "loss": 0.7584360241889954, + "step": 2711 + }, + { + "epoch": 0.9557709251101322, + "grad_norm": 1.256436023255263, + "learning_rate": 1.156089175692267e-05, + "loss": 0.700894296169281, + "step": 2712 + }, + { + "epoch": 0.9561233480176211, + "grad_norm": 1.3257581819044393, + "learning_rate": 1.1555136510765645e-05, + "loss": 0.5637902617454529, + "step": 2713 + }, + { + "epoch": 0.9564757709251102, + "grad_norm": 1.388319575976614, + "learning_rate": 1.1549380736688173e-05, + "loss": 0.4537314772605896, + "step": 2714 + }, + { + "epoch": 0.9568281938325991, + "grad_norm": 1.8324279373886256, + "learning_rate": 1.1543624436644161e-05, + "loss": 0.7880423069000244, + "step": 2715 + }, + { + "epoch": 0.9571806167400881, + "grad_norm": 1.6310441104063826, + "learning_rate": 1.1537867612587692e-05, + "loss": 0.7314344644546509, + "step": 2716 + }, + { + "epoch": 0.9575330396475771, + "grad_norm": 1.7810937354544796, + "learning_rate": 1.1532110266473026e-05, + "loss": 0.9550024271011353, + "step": 2717 + }, + { + "epoch": 0.9578854625550661, + "grad_norm": 1.3474455317445524, + "learning_rate": 1.152635240025461e-05, + "loss": 0.6482470035552979, + "step": 2718 + }, + { + "epoch": 0.9582378854625551, + "grad_norm": 1.6637520992254753, + "learning_rate": 1.152059401588705e-05, + "loss": 0.6347365975379944, + "step": 2719 + }, + { + "epoch": 0.958590308370044, + "grad_norm": 1.469780222161662, + "learning_rate": 1.151483511532515e-05, + "loss": 0.7214993238449097, + "step": 2720 + }, + { + "epoch": 0.958942731277533, + "grad_norm": 1.4597118679681749, + "learning_rate": 1.1509075700523869e-05, + "loss": 0.6255312561988831, + "step": 2721 + }, + { + "epoch": 0.959295154185022, + "grad_norm": 1.4735593911126945, + "learning_rate": 1.1503315773438352e-05, + "loss": 0.6152437925338745, + "step": 2722 + }, + { + "epoch": 0.959647577092511, + "grad_norm": 1.8178378627357112, + "learning_rate": 1.1497555336023916e-05, + "loss": 0.6565401554107666, + "step": 2723 + }, + { + "epoch": 0.96, + "grad_norm": 1.5268947365741583, + "learning_rate": 1.1491794390236047e-05, + "loss": 0.796178936958313, + "step": 2724 + }, + { + "epoch": 0.960352422907489, + "grad_norm": 1.4289859748860345, + "learning_rate": 1.1486032938030409e-05, + "loss": 0.6243436336517334, + "step": 2725 + }, + { + "epoch": 0.960704845814978, + "grad_norm": 3.1702620206811036, + "learning_rate": 1.148027098136284e-05, + "loss": 0.6043159365653992, + "step": 2726 + }, + { + "epoch": 0.961057268722467, + "grad_norm": 2.2643023721896554, + "learning_rate": 1.1474508522189334e-05, + "loss": 0.7268002033233643, + "step": 2727 + }, + { + "epoch": 0.9614096916299559, + "grad_norm": 1.6105062692265093, + "learning_rate": 1.1468745562466076e-05, + "loss": 0.6156840324401855, + "step": 2728 + }, + { + "epoch": 0.961762114537445, + "grad_norm": 1.3602355982897767, + "learning_rate": 1.1462982104149409e-05, + "loss": 0.8415796756744385, + "step": 2729 + }, + { + "epoch": 0.9621145374449339, + "grad_norm": 1.7603646172978014, + "learning_rate": 1.145721814919585e-05, + "loss": 0.5983521342277527, + "step": 2730 + }, + { + "epoch": 0.962466960352423, + "grad_norm": 1.6358592349658665, + "learning_rate": 1.1451453699562077e-05, + "loss": 0.6144511699676514, + "step": 2731 + }, + { + "epoch": 0.9628193832599119, + "grad_norm": 1.66844617820458, + "learning_rate": 1.1445688757204942e-05, + "loss": 0.6449630260467529, + "step": 2732 + }, + { + "epoch": 0.9631718061674008, + "grad_norm": 1.5343236560799753, + "learning_rate": 1.1439923324081465e-05, + "loss": 0.7321716547012329, + "step": 2733 + }, + { + "epoch": 0.9635242290748899, + "grad_norm": 1.9877317345810759, + "learning_rate": 1.1434157402148838e-05, + "loss": 0.8354923129081726, + "step": 2734 + }, + { + "epoch": 0.9638766519823788, + "grad_norm": 1.3653549857555707, + "learning_rate": 1.14283909933644e-05, + "loss": 0.728820264339447, + "step": 2735 + }, + { + "epoch": 0.9642290748898679, + "grad_norm": 1.4013626479373464, + "learning_rate": 1.1422624099685675e-05, + "loss": 0.6683202981948853, + "step": 2736 + }, + { + "epoch": 0.9645814977973568, + "grad_norm": 1.6203635868462385, + "learning_rate": 1.141685672307034e-05, + "loss": 0.7159590125083923, + "step": 2737 + }, + { + "epoch": 0.9649339207048458, + "grad_norm": 1.9197883933040156, + "learning_rate": 1.1411088865476245e-05, + "loss": 0.8269981145858765, + "step": 2738 + }, + { + "epoch": 0.9652863436123348, + "grad_norm": 1.7561037821195844, + "learning_rate": 1.1405320528861393e-05, + "loss": 0.6993168592453003, + "step": 2739 + }, + { + "epoch": 0.9656387665198238, + "grad_norm": 1.4700171152077626, + "learning_rate": 1.1399551715183956e-05, + "loss": 0.6296184062957764, + "step": 2740 + }, + { + "epoch": 0.9659911894273128, + "grad_norm": 1.5505746175576802, + "learning_rate": 1.1393782426402267e-05, + "loss": 0.670283317565918, + "step": 2741 + }, + { + "epoch": 0.9663436123348018, + "grad_norm": 1.6125051339337373, + "learning_rate": 1.1388012664474824e-05, + "loss": 0.9248946905136108, + "step": 2742 + }, + { + "epoch": 0.9666960352422908, + "grad_norm": 1.7027770081175677, + "learning_rate": 1.1382242431360272e-05, + "loss": 0.7965992093086243, + "step": 2743 + }, + { + "epoch": 0.9670484581497797, + "grad_norm": 1.6413263453773168, + "learning_rate": 1.1376471729017435e-05, + "loss": 0.632454514503479, + "step": 2744 + }, + { + "epoch": 0.9674008810572687, + "grad_norm": 1.4364322830343181, + "learning_rate": 1.1370700559405283e-05, + "loss": 0.6463649272918701, + "step": 2745 + }, + { + "epoch": 0.9677533039647577, + "grad_norm": 1.5890798975591325, + "learning_rate": 1.1364928924482952e-05, + "loss": 0.5864677429199219, + "step": 2746 + }, + { + "epoch": 0.9681057268722467, + "grad_norm": 1.5090045708209912, + "learning_rate": 1.1359156826209726e-05, + "loss": 0.6313967108726501, + "step": 2747 + }, + { + "epoch": 0.9684581497797357, + "grad_norm": 1.2634359711899723, + "learning_rate": 1.1353384266545056e-05, + "loss": 0.5736903548240662, + "step": 2748 + }, + { + "epoch": 0.9688105726872247, + "grad_norm": 1.3956693120918684, + "learning_rate": 1.1347611247448544e-05, + "loss": 0.672286868095398, + "step": 2749 + }, + { + "epoch": 0.9691629955947136, + "grad_norm": 1.7905269273993527, + "learning_rate": 1.1341837770879957e-05, + "loss": 0.7181379795074463, + "step": 2750 + }, + { + "epoch": 0.9695154185022027, + "grad_norm": 1.3192307426609728, + "learning_rate": 1.1336063838799204e-05, + "loss": 0.6160816550254822, + "step": 2751 + }, + { + "epoch": 0.9698678414096916, + "grad_norm": 1.3858752821091025, + "learning_rate": 1.1330289453166361e-05, + "loss": 0.737337589263916, + "step": 2752 + }, + { + "epoch": 0.9702202643171807, + "grad_norm": 1.4067461052680075, + "learning_rate": 1.1324514615941644e-05, + "loss": 0.6752150058746338, + "step": 2753 + }, + { + "epoch": 0.9705726872246696, + "grad_norm": 1.502210352579975, + "learning_rate": 1.1318739329085438e-05, + "loss": 0.6917784214019775, + "step": 2754 + }, + { + "epoch": 0.9709251101321585, + "grad_norm": 1.873477988490531, + "learning_rate": 1.131296359455827e-05, + "loss": 0.7863353490829468, + "step": 2755 + }, + { + "epoch": 0.9712775330396476, + "grad_norm": 1.338648959960645, + "learning_rate": 1.1307187414320823e-05, + "loss": 0.6236519813537598, + "step": 2756 + }, + { + "epoch": 0.9716299559471365, + "grad_norm": 1.443196389025093, + "learning_rate": 1.130141079033393e-05, + "loss": 0.6957560181617737, + "step": 2757 + }, + { + "epoch": 0.9719823788546256, + "grad_norm": 1.6687230505642796, + "learning_rate": 1.1295633724558574e-05, + "loss": 0.6460270881652832, + "step": 2758 + }, + { + "epoch": 0.9723348017621145, + "grad_norm": 1.4575621917812085, + "learning_rate": 1.1289856218955892e-05, + "loss": 0.7352741956710815, + "step": 2759 + }, + { + "epoch": 0.9726872246696036, + "grad_norm": 1.7999835448567072, + "learning_rate": 1.1284078275487165e-05, + "loss": 0.6285911798477173, + "step": 2760 + }, + { + "epoch": 0.9730396475770925, + "grad_norm": 1.4280819376163427, + "learning_rate": 1.1278299896113823e-05, + "loss": 0.6577984094619751, + "step": 2761 + }, + { + "epoch": 0.9733920704845815, + "grad_norm": 1.4424142490511096, + "learning_rate": 1.1272521082797452e-05, + "loss": 0.6445770859718323, + "step": 2762 + }, + { + "epoch": 0.9737444933920705, + "grad_norm": 1.3911141072298185, + "learning_rate": 1.1266741837499773e-05, + "loss": 0.557687520980835, + "step": 2763 + }, + { + "epoch": 0.9740969162995595, + "grad_norm": 1.559776829553993, + "learning_rate": 1.1260962162182664e-05, + "loss": 0.6117650866508484, + "step": 2764 + }, + { + "epoch": 0.9744493392070485, + "grad_norm": 1.4751836492364416, + "learning_rate": 1.1255182058808143e-05, + "loss": 0.6498113870620728, + "step": 2765 + }, + { + "epoch": 0.9748017621145374, + "grad_norm": 1.9707928584824135, + "learning_rate": 1.1249401529338375e-05, + "loss": 0.8738062381744385, + "step": 2766 + }, + { + "epoch": 0.9751541850220264, + "grad_norm": 1.6389865398372674, + "learning_rate": 1.1243620575735672e-05, + "loss": 0.551408052444458, + "step": 2767 + }, + { + "epoch": 0.9755066079295154, + "grad_norm": 1.645802380531443, + "learning_rate": 1.1237839199962488e-05, + "loss": 0.7197355031967163, + "step": 2768 + }, + { + "epoch": 0.9758590308370044, + "grad_norm": 1.5393826706252047, + "learning_rate": 1.1232057403981415e-05, + "loss": 0.5704015493392944, + "step": 2769 + }, + { + "epoch": 0.9762114537444934, + "grad_norm": 1.373872634740153, + "learning_rate": 1.1226275189755199e-05, + "loss": 0.603929877281189, + "step": 2770 + }, + { + "epoch": 0.9765638766519824, + "grad_norm": 1.731229349756288, + "learning_rate": 1.1220492559246719e-05, + "loss": 0.8652673363685608, + "step": 2771 + }, + { + "epoch": 0.9769162995594713, + "grad_norm": 1.5891679358388853, + "learning_rate": 1.1214709514418998e-05, + "loss": 0.6827684044837952, + "step": 2772 + }, + { + "epoch": 0.9772687224669604, + "grad_norm": 1.3323036683469254, + "learning_rate": 1.1208926057235197e-05, + "loss": 0.5584808588027954, + "step": 2773 + }, + { + "epoch": 0.9776211453744493, + "grad_norm": 1.5495557729443614, + "learning_rate": 1.1203142189658627e-05, + "loss": 0.7242820262908936, + "step": 2774 + }, + { + "epoch": 0.9779735682819384, + "grad_norm": 1.3489108616226997, + "learning_rate": 1.1197357913652725e-05, + "loss": 0.5299571752548218, + "step": 2775 + }, + { + "epoch": 0.9783259911894273, + "grad_norm": 1.8541326435971137, + "learning_rate": 1.1191573231181074e-05, + "loss": 0.69478440284729, + "step": 2776 + }, + { + "epoch": 0.9786784140969162, + "grad_norm": 1.540885425711554, + "learning_rate": 1.1185788144207394e-05, + "loss": 0.6997090578079224, + "step": 2777 + }, + { + "epoch": 0.9790308370044053, + "grad_norm": 1.422432956680528, + "learning_rate": 1.1180002654695543e-05, + "loss": 0.6882679462432861, + "step": 2778 + }, + { + "epoch": 0.9793832599118942, + "grad_norm": 1.5811365233101125, + "learning_rate": 1.1174216764609514e-05, + "loss": 0.6434916257858276, + "step": 2779 + }, + { + "epoch": 0.9797356828193833, + "grad_norm": 1.5811226707061032, + "learning_rate": 1.1168430475913437e-05, + "loss": 0.6614376902580261, + "step": 2780 + }, + { + "epoch": 0.9800881057268722, + "grad_norm": 1.380437766979243, + "learning_rate": 1.1162643790571574e-05, + "loss": 0.6440471410751343, + "step": 2781 + }, + { + "epoch": 0.9804405286343613, + "grad_norm": 1.6997398594970703, + "learning_rate": 1.1156856710548327e-05, + "loss": 0.6493573188781738, + "step": 2782 + }, + { + "epoch": 0.9807929515418502, + "grad_norm": 1.5246321952125226, + "learning_rate": 1.1151069237808231e-05, + "loss": 0.660174548625946, + "step": 2783 + }, + { + "epoch": 0.9811453744493392, + "grad_norm": 1.7392611870715098, + "learning_rate": 1.1145281374315953e-05, + "loss": 0.8041812181472778, + "step": 2784 + }, + { + "epoch": 0.9814977973568282, + "grad_norm": 1.3479949919135392, + "learning_rate": 1.1139493122036289e-05, + "loss": 0.4758625030517578, + "step": 2785 + }, + { + "epoch": 0.9818502202643172, + "grad_norm": 1.6334305751982239, + "learning_rate": 1.113370448293417e-05, + "loss": 0.6482613682746887, + "step": 2786 + }, + { + "epoch": 0.9822026431718062, + "grad_norm": 1.475447708954463, + "learning_rate": 1.1127915458974665e-05, + "loss": 0.6911569237709045, + "step": 2787 + }, + { + "epoch": 0.9825550660792951, + "grad_norm": 1.362340888945518, + "learning_rate": 1.1122126052122963e-05, + "loss": 0.6851824522018433, + "step": 2788 + }, + { + "epoch": 0.9829074889867842, + "grad_norm": 1.5792587066367831, + "learning_rate": 1.111633626434439e-05, + "loss": 0.6405081748962402, + "step": 2789 + }, + { + "epoch": 0.9832599118942731, + "grad_norm": 1.5781550908818451, + "learning_rate": 1.1110546097604391e-05, + "loss": 0.7064476013183594, + "step": 2790 + }, + { + "epoch": 0.9836123348017621, + "grad_norm": 1.4647903320195184, + "learning_rate": 1.1104755553868559e-05, + "loss": 0.641350269317627, + "step": 2791 + }, + { + "epoch": 0.9839647577092511, + "grad_norm": 1.4142953897430577, + "learning_rate": 1.1098964635102597e-05, + "loss": 0.748977780342102, + "step": 2792 + }, + { + "epoch": 0.9843171806167401, + "grad_norm": 1.3989289975006294, + "learning_rate": 1.1093173343272342e-05, + "loss": 0.6033440828323364, + "step": 2793 + }, + { + "epoch": 0.984669603524229, + "grad_norm": 1.2877663440814373, + "learning_rate": 1.1087381680343754e-05, + "loss": 0.5684633255004883, + "step": 2794 + }, + { + "epoch": 0.9850220264317181, + "grad_norm": 1.5189384787980884, + "learning_rate": 1.1081589648282928e-05, + "loss": 0.7041289210319519, + "step": 2795 + }, + { + "epoch": 0.985374449339207, + "grad_norm": 1.5616342989862266, + "learning_rate": 1.1075797249056079e-05, + "loss": 0.7189786434173584, + "step": 2796 + }, + { + "epoch": 0.9857268722466961, + "grad_norm": 1.534620191791425, + "learning_rate": 1.1070004484629543e-05, + "loss": 0.5114344358444214, + "step": 2797 + }, + { + "epoch": 0.986079295154185, + "grad_norm": 1.6541092784437663, + "learning_rate": 1.1064211356969782e-05, + "loss": 0.5897136926651001, + "step": 2798 + }, + { + "epoch": 0.986431718061674, + "grad_norm": 1.5980123151797752, + "learning_rate": 1.1058417868043387e-05, + "loss": 0.8490760326385498, + "step": 2799 + }, + { + "epoch": 0.986784140969163, + "grad_norm": 1.5100542298165633, + "learning_rate": 1.1052624019817065e-05, + "loss": 0.6392524242401123, + "step": 2800 + }, + { + "epoch": 0.9871365638766519, + "grad_norm": 1.5630522519900902, + "learning_rate": 1.104682981425765e-05, + "loss": 0.7267303466796875, + "step": 2801 + }, + { + "epoch": 0.987488986784141, + "grad_norm": 1.5413815660334662, + "learning_rate": 1.1041035253332087e-05, + "loss": 0.6622469425201416, + "step": 2802 + }, + { + "epoch": 0.9878414096916299, + "grad_norm": 1.4547931829788883, + "learning_rate": 1.1035240339007454e-05, + "loss": 0.643883466720581, + "step": 2803 + }, + { + "epoch": 0.988193832599119, + "grad_norm": 1.4919310534649226, + "learning_rate": 1.1029445073250945e-05, + "loss": 0.6281142234802246, + "step": 2804 + }, + { + "epoch": 0.9885462555066079, + "grad_norm": 1.606048707782168, + "learning_rate": 1.1023649458029873e-05, + "loss": 0.6356241703033447, + "step": 2805 + }, + { + "epoch": 0.988898678414097, + "grad_norm": 1.7018688321982895, + "learning_rate": 1.1017853495311664e-05, + "loss": 0.8118115663528442, + "step": 2806 + }, + { + "epoch": 0.9892511013215859, + "grad_norm": 1.4779776881835476, + "learning_rate": 1.1012057187063872e-05, + "loss": 0.7673395276069641, + "step": 2807 + }, + { + "epoch": 0.9896035242290749, + "grad_norm": 1.5158382122898324, + "learning_rate": 1.1006260535254159e-05, + "loss": 0.6617262959480286, + "step": 2808 + }, + { + "epoch": 0.9899559471365639, + "grad_norm": 1.7342419352159402, + "learning_rate": 1.1000463541850315e-05, + "loss": 0.537519097328186, + "step": 2809 + }, + { + "epoch": 0.9903083700440528, + "grad_norm": 1.8093297060046025, + "learning_rate": 1.0994666208820229e-05, + "loss": 0.6281024813652039, + "step": 2810 + }, + { + "epoch": 0.9906607929515419, + "grad_norm": 1.4111971416204439, + "learning_rate": 1.0988868538131922e-05, + "loss": 0.7189136743545532, + "step": 2811 + }, + { + "epoch": 0.9910132158590308, + "grad_norm": 1.3844162550962045, + "learning_rate": 1.098307053175352e-05, + "loss": 0.622093677520752, + "step": 2812 + }, + { + "epoch": 0.9913656387665198, + "grad_norm": 1.4032650881900075, + "learning_rate": 1.0977272191653272e-05, + "loss": 0.6774802207946777, + "step": 2813 + }, + { + "epoch": 0.9917180616740088, + "grad_norm": 1.490303383982121, + "learning_rate": 1.0971473519799523e-05, + "loss": 0.5999646186828613, + "step": 2814 + }, + { + "epoch": 0.9920704845814978, + "grad_norm": 1.3508886274303966, + "learning_rate": 1.096567451816075e-05, + "loss": 0.6450619697570801, + "step": 2815 + }, + { + "epoch": 0.9924229074889868, + "grad_norm": 1.8693455627252262, + "learning_rate": 1.0959875188705529e-05, + "loss": 0.693134069442749, + "step": 2816 + }, + { + "epoch": 0.9927753303964758, + "grad_norm": 1.744167199385734, + "learning_rate": 1.0954075533402557e-05, + "loss": 0.8968616724014282, + "step": 2817 + }, + { + "epoch": 0.9931277533039647, + "grad_norm": 1.5750441805034816, + "learning_rate": 1.0948275554220632e-05, + "loss": 0.6114391088485718, + "step": 2818 + }, + { + "epoch": 0.9934801762114538, + "grad_norm": 1.3761860122661305, + "learning_rate": 1.0942475253128667e-05, + "loss": 0.7583796977996826, + "step": 2819 + }, + { + "epoch": 0.9938325991189427, + "grad_norm": 2.0494911253957735, + "learning_rate": 1.0936674632095683e-05, + "loss": 0.5683549046516418, + "step": 2820 + }, + { + "epoch": 0.9941850220264317, + "grad_norm": 1.4100630352107084, + "learning_rate": 1.0930873693090815e-05, + "loss": 0.5664689540863037, + "step": 2821 + }, + { + "epoch": 0.9945374449339207, + "grad_norm": 1.1859055454278844, + "learning_rate": 1.0925072438083296e-05, + "loss": 0.5799476504325867, + "step": 2822 + }, + { + "epoch": 0.9948898678414096, + "grad_norm": 1.4558284543811444, + "learning_rate": 1.0919270869042475e-05, + "loss": 0.6879112720489502, + "step": 2823 + }, + { + "epoch": 0.9952422907488987, + "grad_norm": 1.3673096151886848, + "learning_rate": 1.09134689879378e-05, + "loss": 0.6348927021026611, + "step": 2824 + }, + { + "epoch": 0.9955947136563876, + "grad_norm": 1.5301215006310536, + "learning_rate": 1.0907666796738839e-05, + "loss": 0.55754554271698, + "step": 2825 + }, + { + "epoch": 0.9959471365638767, + "grad_norm": 1.6611255848189581, + "learning_rate": 1.090186429741524e-05, + "loss": 0.6664899587631226, + "step": 2826 + }, + { + "epoch": 0.9962995594713656, + "grad_norm": 1.3580224067934683, + "learning_rate": 1.0896061491936782e-05, + "loss": 0.6521929502487183, + "step": 2827 + }, + { + "epoch": 0.9966519823788547, + "grad_norm": 1.4217882734660863, + "learning_rate": 1.0890258382273333e-05, + "loss": 0.542471170425415, + "step": 2828 + }, + { + "epoch": 0.9970044052863436, + "grad_norm": 1.3242120868836005, + "learning_rate": 1.0884454970394871e-05, + "loss": 0.60117506980896, + "step": 2829 + }, + { + "epoch": 0.9973568281938326, + "grad_norm": 1.5563969946549858, + "learning_rate": 1.0878651258271471e-05, + "loss": 0.6783676147460938, + "step": 2830 + }, + { + "epoch": 0.9977092511013216, + "grad_norm": 1.4867095260992749, + "learning_rate": 1.0872847247873315e-05, + "loss": 0.7080766558647156, + "step": 2831 + }, + { + "epoch": 0.9980616740088105, + "grad_norm": 1.7595047000981443, + "learning_rate": 1.0867042941170677e-05, + "loss": 0.9228106141090393, + "step": 2832 + }, + { + "epoch": 0.9984140969162996, + "grad_norm": 1.749212162747955, + "learning_rate": 1.086123834013395e-05, + "loss": 0.7601282596588135, + "step": 2833 + }, + { + "epoch": 0.9987665198237885, + "grad_norm": 1.388473564306277, + "learning_rate": 1.0855433446733607e-05, + "loss": 0.7101393342018127, + "step": 2834 + }, + { + "epoch": 0.9991189427312775, + "grad_norm": 1.426665891638417, + "learning_rate": 1.084962826294023e-05, + "loss": 0.5006242394447327, + "step": 2835 + }, + { + "epoch": 0.9994713656387665, + "grad_norm": 1.6063601330711992, + "learning_rate": 1.08438227907245e-05, + "loss": 0.7270148992538452, + "step": 2836 + }, + { + "epoch": 0.9998237885462555, + "grad_norm": 1.5770914971205114, + "learning_rate": 1.0838017032057194e-05, + "loss": 0.7252628803253174, + "step": 2837 + }, + { + "epoch": 1.0, + "grad_norm": 2.9062070384731578, + "learning_rate": 1.0832210988909187e-05, + "loss": 0.4579252004623413, + "step": 2838 + }, + { + "epoch": 1.000352422907489, + "grad_norm": 1.410073366222354, + "learning_rate": 1.0826404663251446e-05, + "loss": 0.635676920413971, + "step": 2839 + }, + { + "epoch": 1.0007048458149779, + "grad_norm": 1.5085425099131595, + "learning_rate": 1.0820598057055039e-05, + "loss": 0.6083015203475952, + "step": 2840 + }, + { + "epoch": 1.001057268722467, + "grad_norm": 1.2571881093552235, + "learning_rate": 1.0814791172291132e-05, + "loss": 0.5641704797744751, + "step": 2841 + }, + { + "epoch": 1.001409691629956, + "grad_norm": 1.448254627835315, + "learning_rate": 1.0808984010930981e-05, + "loss": 0.7668559551239014, + "step": 2842 + }, + { + "epoch": 1.001762114537445, + "grad_norm": 1.7836674103878665, + "learning_rate": 1.0803176574945933e-05, + "loss": 0.5205796957015991, + "step": 2843 + }, + { + "epoch": 1.0021145374449338, + "grad_norm": 1.2460568970106132, + "learning_rate": 1.0797368866307431e-05, + "loss": 0.6771252155303955, + "step": 2844 + }, + { + "epoch": 1.002466960352423, + "grad_norm": 1.3246167691239887, + "learning_rate": 1.0791560886987016e-05, + "loss": 0.6101677417755127, + "step": 2845 + }, + { + "epoch": 1.002819383259912, + "grad_norm": 1.683370422985012, + "learning_rate": 1.0785752638956315e-05, + "loss": 0.5651522874832153, + "step": 2846 + }, + { + "epoch": 1.0031718061674009, + "grad_norm": 1.3543139981801942, + "learning_rate": 1.0779944124187048e-05, + "loss": 0.6814571619033813, + "step": 2847 + }, + { + "epoch": 1.0035242290748898, + "grad_norm": 1.5579116379809095, + "learning_rate": 1.0774135344651023e-05, + "loss": 0.6786171197891235, + "step": 2848 + }, + { + "epoch": 1.003876651982379, + "grad_norm": 1.341282658364188, + "learning_rate": 1.0768326302320136e-05, + "loss": 0.5244907736778259, + "step": 2849 + }, + { + "epoch": 1.004229074889868, + "grad_norm": 1.5100504884551087, + "learning_rate": 1.0762516999166383e-05, + "loss": 0.6368712186813354, + "step": 2850 + }, + { + "epoch": 1.0045814977973568, + "grad_norm": 1.3929085404961679, + "learning_rate": 1.0756707437161841e-05, + "loss": 0.6389411687850952, + "step": 2851 + }, + { + "epoch": 1.0049339207048458, + "grad_norm": 1.796913818431425, + "learning_rate": 1.0750897618278675e-05, + "loss": 0.6257550716400146, + "step": 2852 + }, + { + "epoch": 1.0052863436123347, + "grad_norm": 1.384078231158131, + "learning_rate": 1.0745087544489132e-05, + "loss": 0.49478042125701904, + "step": 2853 + }, + { + "epoch": 1.0056387665198239, + "grad_norm": 1.3713236142324383, + "learning_rate": 1.0739277217765558e-05, + "loss": 0.6350952386856079, + "step": 2854 + }, + { + "epoch": 1.0059911894273128, + "grad_norm": 1.4287669419061304, + "learning_rate": 1.0733466640080374e-05, + "loss": 0.6057480573654175, + "step": 2855 + }, + { + "epoch": 1.0063436123348017, + "grad_norm": 1.5646694084149986, + "learning_rate": 1.0727655813406094e-05, + "loss": 0.5545427799224854, + "step": 2856 + }, + { + "epoch": 1.0066960352422907, + "grad_norm": 1.371726691889951, + "learning_rate": 1.0721844739715311e-05, + "loss": 0.55484938621521, + "step": 2857 + }, + { + "epoch": 1.0070484581497798, + "grad_norm": 1.6325523903522516, + "learning_rate": 1.0716033420980703e-05, + "loss": 0.6889834403991699, + "step": 2858 + }, + { + "epoch": 1.0074008810572688, + "grad_norm": 1.928061303452338, + "learning_rate": 1.0710221859175031e-05, + "loss": 0.7259023189544678, + "step": 2859 + }, + { + "epoch": 1.0077533039647577, + "grad_norm": 1.7213820381224034, + "learning_rate": 1.0704410056271144e-05, + "loss": 0.6200032234191895, + "step": 2860 + }, + { + "epoch": 1.0081057268722466, + "grad_norm": 1.2488919699208767, + "learning_rate": 1.069859801424196e-05, + "loss": 0.5357909202575684, + "step": 2861 + }, + { + "epoch": 1.0084581497797356, + "grad_norm": 1.462725629247434, + "learning_rate": 1.0692785735060495e-05, + "loss": 0.8121966123580933, + "step": 2862 + }, + { + "epoch": 1.0088105726872247, + "grad_norm": 1.5047486906511685, + "learning_rate": 1.0686973220699834e-05, + "loss": 0.5698819160461426, + "step": 2863 + }, + { + "epoch": 1.0091629955947137, + "grad_norm": 1.3352019656375154, + "learning_rate": 1.0681160473133144e-05, + "loss": 0.6598206162452698, + "step": 2864 + }, + { + "epoch": 1.0095154185022026, + "grad_norm": 1.571854196128042, + "learning_rate": 1.0675347494333667e-05, + "loss": 0.7574363946914673, + "step": 2865 + }, + { + "epoch": 1.0098678414096915, + "grad_norm": 2.0265508752029007, + "learning_rate": 1.0669534286274737e-05, + "loss": 0.6749663949012756, + "step": 2866 + }, + { + "epoch": 1.0102202643171807, + "grad_norm": 1.5445692097493786, + "learning_rate": 1.0663720850929753e-05, + "loss": 0.5932409763336182, + "step": 2867 + }, + { + "epoch": 1.0105726872246696, + "grad_norm": 1.4883467064779885, + "learning_rate": 1.0657907190272197e-05, + "loss": 0.7070773839950562, + "step": 2868 + }, + { + "epoch": 1.0109251101321586, + "grad_norm": 1.6639794076635466, + "learning_rate": 1.0652093306275621e-05, + "loss": 0.531635582447052, + "step": 2869 + }, + { + "epoch": 1.0112775330396475, + "grad_norm": 1.5967103256398283, + "learning_rate": 1.0646279200913665e-05, + "loss": 0.5966447591781616, + "step": 2870 + }, + { + "epoch": 1.0116299559471367, + "grad_norm": 1.5047477869564347, + "learning_rate": 1.0640464876160033e-05, + "loss": 0.6308450698852539, + "step": 2871 + }, + { + "epoch": 1.0119823788546256, + "grad_norm": 1.6938927429813924, + "learning_rate": 1.0634650333988508e-05, + "loss": 0.6477035284042358, + "step": 2872 + }, + { + "epoch": 1.0123348017621145, + "grad_norm": 1.4725648899614407, + "learning_rate": 1.0628835576372942e-05, + "loss": 0.5856079459190369, + "step": 2873 + }, + { + "epoch": 1.0126872246696035, + "grad_norm": 1.6415031005435194, + "learning_rate": 1.062302060528727e-05, + "loss": 0.733691930770874, + "step": 2874 + }, + { + "epoch": 1.0130396475770924, + "grad_norm": 1.6528326658043055, + "learning_rate": 1.0617205422705495e-05, + "loss": 0.6020156145095825, + "step": 2875 + }, + { + "epoch": 1.0133920704845816, + "grad_norm": 1.5978613503890422, + "learning_rate": 1.0611390030601685e-05, + "loss": 0.4980982542037964, + "step": 2876 + }, + { + "epoch": 1.0137444933920705, + "grad_norm": 1.5178573200522583, + "learning_rate": 1.0605574430949983e-05, + "loss": 0.6498349905014038, + "step": 2877 + }, + { + "epoch": 1.0140969162995594, + "grad_norm": 1.7318519084472541, + "learning_rate": 1.0599758625724612e-05, + "loss": 0.6456383466720581, + "step": 2878 + }, + { + "epoch": 1.0144493392070484, + "grad_norm": 1.7056738628689527, + "learning_rate": 1.059394261689985e-05, + "loss": 0.6047386527061462, + "step": 2879 + }, + { + "epoch": 1.0148017621145375, + "grad_norm": 1.6633316847391189, + "learning_rate": 1.0588126406450056e-05, + "loss": 0.641674816608429, + "step": 2880 + }, + { + "epoch": 1.0151541850220265, + "grad_norm": 1.549495353719679, + "learning_rate": 1.0582309996349648e-05, + "loss": 0.6157702207565308, + "step": 2881 + }, + { + "epoch": 1.0155066079295154, + "grad_norm": 1.614686141937513, + "learning_rate": 1.057649338857312e-05, + "loss": 0.6004809737205505, + "step": 2882 + }, + { + "epoch": 1.0158590308370044, + "grad_norm": 1.460588924951717, + "learning_rate": 1.0570676585095028e-05, + "loss": 0.5534430742263794, + "step": 2883 + }, + { + "epoch": 1.0162114537444933, + "grad_norm": 2.0058626486485367, + "learning_rate": 1.0564859587889997e-05, + "loss": 0.7781813144683838, + "step": 2884 + }, + { + "epoch": 1.0165638766519824, + "grad_norm": 1.9228872779765243, + "learning_rate": 1.0559042398932713e-05, + "loss": 0.6949760913848877, + "step": 2885 + }, + { + "epoch": 1.0169162995594714, + "grad_norm": 1.51396598780538, + "learning_rate": 1.0553225020197932e-05, + "loss": 0.5718453526496887, + "step": 2886 + }, + { + "epoch": 1.0172687224669603, + "grad_norm": 1.7835909963123882, + "learning_rate": 1.0547407453660471e-05, + "loss": 0.6689345836639404, + "step": 2887 + }, + { + "epoch": 1.0176211453744493, + "grad_norm": 1.5559332596209525, + "learning_rate": 1.0541589701295222e-05, + "loss": 0.6615442037582397, + "step": 2888 + }, + { + "epoch": 1.0179735682819384, + "grad_norm": 1.4810070180145358, + "learning_rate": 1.0535771765077121e-05, + "loss": 0.6458337306976318, + "step": 2889 + }, + { + "epoch": 1.0183259911894273, + "grad_norm": 1.4770072284014752, + "learning_rate": 1.052995364698118e-05, + "loss": 0.5330519676208496, + "step": 2890 + }, + { + "epoch": 1.0186784140969163, + "grad_norm": 1.4780636522187705, + "learning_rate": 1.0524135348982467e-05, + "loss": 0.6219571232795715, + "step": 2891 + }, + { + "epoch": 1.0190308370044052, + "grad_norm": 1.4624191661889683, + "learning_rate": 1.0518316873056118e-05, + "loss": 0.6731684803962708, + "step": 2892 + }, + { + "epoch": 1.0193832599118944, + "grad_norm": 1.614741871357758, + "learning_rate": 1.0512498221177319e-05, + "loss": 0.6126813888549805, + "step": 2893 + }, + { + "epoch": 1.0197356828193833, + "grad_norm": 1.4895494518265573, + "learning_rate": 1.0506679395321325e-05, + "loss": 0.5796904563903809, + "step": 2894 + }, + { + "epoch": 1.0200881057268723, + "grad_norm": 1.5545739969005041, + "learning_rate": 1.050086039746344e-05, + "loss": 0.5765914916992188, + "step": 2895 + }, + { + "epoch": 1.0204405286343612, + "grad_norm": 1.3710954206781227, + "learning_rate": 1.0495041229579043e-05, + "loss": 0.4798969328403473, + "step": 2896 + }, + { + "epoch": 1.0207929515418501, + "grad_norm": 1.551476741605498, + "learning_rate": 1.0489221893643553e-05, + "loss": 0.673927366733551, + "step": 2897 + }, + { + "epoch": 1.0211453744493393, + "grad_norm": 1.6211129054938926, + "learning_rate": 1.0483402391632453e-05, + "loss": 0.5681431293487549, + "step": 2898 + }, + { + "epoch": 1.0214977973568282, + "grad_norm": 1.3128793329209902, + "learning_rate": 1.0477582725521287e-05, + "loss": 0.6156354546546936, + "step": 2899 + }, + { + "epoch": 1.0218502202643172, + "grad_norm": 1.4369078255379546, + "learning_rate": 1.0471762897285652e-05, + "loss": 0.6569045782089233, + "step": 2900 + }, + { + "epoch": 1.022202643171806, + "grad_norm": 1.4293089736412674, + "learning_rate": 1.046594290890119e-05, + "loss": 0.6125048995018005, + "step": 2901 + }, + { + "epoch": 1.0225550660792952, + "grad_norm": 1.6465466140905431, + "learning_rate": 1.0460122762343614e-05, + "loss": 0.604046106338501, + "step": 2902 + }, + { + "epoch": 1.0229074889867842, + "grad_norm": 1.5461286198100506, + "learning_rate": 1.0454302459588677e-05, + "loss": 0.4569816589355469, + "step": 2903 + }, + { + "epoch": 1.0232599118942731, + "grad_norm": 1.6187784923192434, + "learning_rate": 1.0448482002612194e-05, + "loss": 0.5764607787132263, + "step": 2904 + }, + { + "epoch": 1.023612334801762, + "grad_norm": 1.503585291483294, + "learning_rate": 1.044266139339003e-05, + "loss": 0.5859626531600952, + "step": 2905 + }, + { + "epoch": 1.023964757709251, + "grad_norm": 1.6642769825669268, + "learning_rate": 1.04368406338981e-05, + "loss": 0.7326341271400452, + "step": 2906 + }, + { + "epoch": 1.0243171806167402, + "grad_norm": 1.613324765385094, + "learning_rate": 1.0431019726112366e-05, + "loss": 0.6355161070823669, + "step": 2907 + }, + { + "epoch": 1.024669603524229, + "grad_norm": 1.5833367942965741, + "learning_rate": 1.0425198672008851e-05, + "loss": 0.6990653872489929, + "step": 2908 + }, + { + "epoch": 1.025022026431718, + "grad_norm": 2.3098262824716542, + "learning_rate": 1.0419377473563621e-05, + "loss": 0.631952166557312, + "step": 2909 + }, + { + "epoch": 1.025374449339207, + "grad_norm": 1.4397039525414863, + "learning_rate": 1.041355613275279e-05, + "loss": 0.4872596561908722, + "step": 2910 + }, + { + "epoch": 1.0257268722466961, + "grad_norm": 1.5222931253330352, + "learning_rate": 1.0407734651552522e-05, + "loss": 0.5334043502807617, + "step": 2911 + }, + { + "epoch": 1.026079295154185, + "grad_norm": 1.5817730675020623, + "learning_rate": 1.0401913031939026e-05, + "loss": 0.5971134305000305, + "step": 2912 + }, + { + "epoch": 1.026431718061674, + "grad_norm": 1.7562208471394358, + "learning_rate": 1.0396091275888567e-05, + "loss": 0.6527851819992065, + "step": 2913 + }, + { + "epoch": 1.026784140969163, + "grad_norm": 1.5387477454353993, + "learning_rate": 1.0390269385377444e-05, + "loss": 0.4515818953514099, + "step": 2914 + }, + { + "epoch": 1.027136563876652, + "grad_norm": 1.4624804092376522, + "learning_rate": 1.0384447362382013e-05, + "loss": 0.530797004699707, + "step": 2915 + }, + { + "epoch": 1.027488986784141, + "grad_norm": 1.4915704465108583, + "learning_rate": 1.0378625208878666e-05, + "loss": 0.5477641224861145, + "step": 2916 + }, + { + "epoch": 1.02784140969163, + "grad_norm": 1.6025052451883606, + "learning_rate": 1.0372802926843843e-05, + "loss": 0.6390479207038879, + "step": 2917 + }, + { + "epoch": 1.028193832599119, + "grad_norm": 1.5706073153963707, + "learning_rate": 1.0366980518254028e-05, + "loss": 0.610755443572998, + "step": 2918 + }, + { + "epoch": 1.0285462555066078, + "grad_norm": 1.4805888577219812, + "learning_rate": 1.036115798508575e-05, + "loss": 0.5427766442298889, + "step": 2919 + }, + { + "epoch": 1.028898678414097, + "grad_norm": 1.4610582929917253, + "learning_rate": 1.0355335329315573e-05, + "loss": 0.621055006980896, + "step": 2920 + }, + { + "epoch": 1.029251101321586, + "grad_norm": 1.7760527372961, + "learning_rate": 1.0349512552920114e-05, + "loss": 0.6098253726959229, + "step": 2921 + }, + { + "epoch": 1.0296035242290749, + "grad_norm": 1.8967300437588117, + "learning_rate": 1.0343689657876017e-05, + "loss": 0.591664731502533, + "step": 2922 + }, + { + "epoch": 1.0299559471365638, + "grad_norm": 1.616730113059231, + "learning_rate": 1.033786664615998e-05, + "loss": 0.6531485915184021, + "step": 2923 + }, + { + "epoch": 1.030308370044053, + "grad_norm": 1.5937698715448299, + "learning_rate": 1.0332043519748727e-05, + "loss": 0.6933655738830566, + "step": 2924 + }, + { + "epoch": 1.030660792951542, + "grad_norm": 1.5987643686429562, + "learning_rate": 1.0326220280619036e-05, + "loss": 0.6512705087661743, + "step": 2925 + }, + { + "epoch": 1.0310132158590308, + "grad_norm": 1.829250792437923, + "learning_rate": 1.0320396930747712e-05, + "loss": 0.5671502947807312, + "step": 2926 + }, + { + "epoch": 1.0313656387665198, + "grad_norm": 1.6239123058071627, + "learning_rate": 1.0314573472111601e-05, + "loss": 0.6795192360877991, + "step": 2927 + }, + { + "epoch": 1.0317180616740087, + "grad_norm": 1.5985127083182307, + "learning_rate": 1.0308749906687585e-05, + "loss": 0.6357578039169312, + "step": 2928 + }, + { + "epoch": 1.0320704845814979, + "grad_norm": 1.6982196546251649, + "learning_rate": 1.0302926236452588e-05, + "loss": 0.7009944915771484, + "step": 2929 + }, + { + "epoch": 1.0324229074889868, + "grad_norm": 1.4806960711115318, + "learning_rate": 1.0297102463383557e-05, + "loss": 0.4685679078102112, + "step": 2930 + }, + { + "epoch": 1.0327753303964757, + "grad_norm": 1.5429925693746163, + "learning_rate": 1.0291278589457488e-05, + "loss": 0.6359078884124756, + "step": 2931 + }, + { + "epoch": 1.0331277533039647, + "grad_norm": 1.8631741910761805, + "learning_rate": 1.0285454616651398e-05, + "loss": 0.6606266498565674, + "step": 2932 + }, + { + "epoch": 1.0334801762114538, + "grad_norm": 1.7076039728900445, + "learning_rate": 1.0279630546942353e-05, + "loss": 0.5405932664871216, + "step": 2933 + }, + { + "epoch": 1.0338325991189428, + "grad_norm": 1.4934491606364382, + "learning_rate": 1.0273806382307443e-05, + "loss": 0.8072758316993713, + "step": 2934 + }, + { + "epoch": 1.0341850220264317, + "grad_norm": 1.5899951805886359, + "learning_rate": 1.0267982124723783e-05, + "loss": 0.6923058032989502, + "step": 2935 + }, + { + "epoch": 1.0345374449339206, + "grad_norm": 1.7156977270346485, + "learning_rate": 1.0262157776168533e-05, + "loss": 0.5577275156974792, + "step": 2936 + }, + { + "epoch": 1.0348898678414098, + "grad_norm": 1.6363417924911698, + "learning_rate": 1.0256333338618875e-05, + "loss": 0.6780786514282227, + "step": 2937 + }, + { + "epoch": 1.0352422907488987, + "grad_norm": 1.6093019454005904, + "learning_rate": 1.0250508814052029e-05, + "loss": 0.6966040134429932, + "step": 2938 + }, + { + "epoch": 1.0355947136563877, + "grad_norm": 1.4912092272159942, + "learning_rate": 1.0244684204445237e-05, + "loss": 0.5726339817047119, + "step": 2939 + }, + { + "epoch": 1.0359471365638766, + "grad_norm": 1.372791278777169, + "learning_rate": 1.0238859511775768e-05, + "loss": 0.64924156665802, + "step": 2940 + }, + { + "epoch": 1.0362995594713655, + "grad_norm": 1.5498611273448277, + "learning_rate": 1.0233034738020933e-05, + "loss": 0.49121707677841187, + "step": 2941 + }, + { + "epoch": 1.0366519823788547, + "grad_norm": 1.4698297870867278, + "learning_rate": 1.0227209885158053e-05, + "loss": 0.5505814552307129, + "step": 2942 + }, + { + "epoch": 1.0370044052863436, + "grad_norm": 1.658171020881214, + "learning_rate": 1.022138495516449e-05, + "loss": 0.7429872751235962, + "step": 2943 + }, + { + "epoch": 1.0373568281938326, + "grad_norm": 1.5946562373848934, + "learning_rate": 1.0215559950017624e-05, + "loss": 0.6492434740066528, + "step": 2944 + }, + { + "epoch": 1.0377092511013215, + "grad_norm": 1.5139165780476451, + "learning_rate": 1.0209734871694865e-05, + "loss": 0.5418736338615417, + "step": 2945 + }, + { + "epoch": 1.0380616740088107, + "grad_norm": 1.676058492453494, + "learning_rate": 1.0203909722173644e-05, + "loss": 0.6252620220184326, + "step": 2946 + }, + { + "epoch": 1.0384140969162996, + "grad_norm": 1.4699238771485563, + "learning_rate": 1.0198084503431416e-05, + "loss": 0.5124455690383911, + "step": 2947 + }, + { + "epoch": 1.0387665198237885, + "grad_norm": 1.4358343290990208, + "learning_rate": 1.0192259217445663e-05, + "loss": 0.5729688405990601, + "step": 2948 + }, + { + "epoch": 1.0391189427312775, + "grad_norm": 1.8222711908460536, + "learning_rate": 1.0186433866193893e-05, + "loss": 0.5891536474227905, + "step": 2949 + }, + { + "epoch": 1.0394713656387666, + "grad_norm": 1.7110443983801997, + "learning_rate": 1.0180608451653626e-05, + "loss": 0.774397075176239, + "step": 2950 + }, + { + "epoch": 1.0398237885462556, + "grad_norm": 1.4480826912481708, + "learning_rate": 1.0174782975802408e-05, + "loss": 0.5987098813056946, + "step": 2951 + }, + { + "epoch": 1.0401762114537445, + "grad_norm": 1.634577600554869, + "learning_rate": 1.016895744061781e-05, + "loss": 0.5334598422050476, + "step": 2952 + }, + { + "epoch": 1.0405286343612334, + "grad_norm": 1.7236175912347957, + "learning_rate": 1.0163131848077421e-05, + "loss": 0.5946340560913086, + "step": 2953 + }, + { + "epoch": 1.0408810572687224, + "grad_norm": 1.601606630295311, + "learning_rate": 1.0157306200158847e-05, + "loss": 0.5780941247940063, + "step": 2954 + }, + { + "epoch": 1.0412334801762115, + "grad_norm": 1.6785528445522104, + "learning_rate": 1.0151480498839712e-05, + "loss": 0.6348963975906372, + "step": 2955 + }, + { + "epoch": 1.0415859030837005, + "grad_norm": 1.717999985242494, + "learning_rate": 1.014565474609766e-05, + "loss": 0.6868102550506592, + "step": 2956 + }, + { + "epoch": 1.0419383259911894, + "grad_norm": 1.6612318546166622, + "learning_rate": 1.0139828943910358e-05, + "loss": 0.6507548689842224, + "step": 2957 + }, + { + "epoch": 1.0422907488986783, + "grad_norm": 1.7617270521903845, + "learning_rate": 1.0134003094255478e-05, + "loss": 0.6358312964439392, + "step": 2958 + }, + { + "epoch": 1.0426431718061675, + "grad_norm": 1.5725895362844704, + "learning_rate": 1.0128177199110723e-05, + "loss": 0.7530224919319153, + "step": 2959 + }, + { + "epoch": 1.0429955947136564, + "grad_norm": 1.5496338862557548, + "learning_rate": 1.012235126045379e-05, + "loss": 0.545819878578186, + "step": 2960 + }, + { + "epoch": 1.0433480176211454, + "grad_norm": 1.5828250584633938, + "learning_rate": 1.011652528026242e-05, + "loss": 0.6626788377761841, + "step": 2961 + }, + { + "epoch": 1.0437004405286343, + "grad_norm": 1.6913571400986156, + "learning_rate": 1.0110699260514336e-05, + "loss": 0.6407896280288696, + "step": 2962 + }, + { + "epoch": 1.0440528634361232, + "grad_norm": 1.4558906354554821, + "learning_rate": 1.0104873203187307e-05, + "loss": 0.5633673667907715, + "step": 2963 + }, + { + "epoch": 1.0444052863436124, + "grad_norm": 1.6991226564822444, + "learning_rate": 1.0099047110259081e-05, + "loss": 0.5356892943382263, + "step": 2964 + }, + { + "epoch": 1.0447577092511013, + "grad_norm": 1.6571256461175092, + "learning_rate": 1.0093220983707448e-05, + "loss": 0.5527205467224121, + "step": 2965 + }, + { + "epoch": 1.0451101321585903, + "grad_norm": 1.5928434384321621, + "learning_rate": 1.008739482551019e-05, + "loss": 0.6148320436477661, + "step": 2966 + }, + { + "epoch": 1.0454625550660792, + "grad_norm": 1.8604930696261837, + "learning_rate": 1.0081568637645111e-05, + "loss": 0.5713976621627808, + "step": 2967 + }, + { + "epoch": 1.0458149779735684, + "grad_norm": 1.4811105317563769, + "learning_rate": 1.0075742422090015e-05, + "loss": 0.5836226940155029, + "step": 2968 + }, + { + "epoch": 1.0461674008810573, + "grad_norm": 1.829134506733255, + "learning_rate": 1.0069916180822727e-05, + "loss": 0.6452749371528625, + "step": 2969 + }, + { + "epoch": 1.0465198237885462, + "grad_norm": 1.507975881410604, + "learning_rate": 1.006408991582107e-05, + "loss": 0.5468501448631287, + "step": 2970 + }, + { + "epoch": 1.0468722466960352, + "grad_norm": 1.6217984014708016, + "learning_rate": 1.0058263629062883e-05, + "loss": 0.5195704698562622, + "step": 2971 + }, + { + "epoch": 1.0472246696035241, + "grad_norm": 1.603914403857505, + "learning_rate": 1.0052437322526003e-05, + "loss": 0.5144641995429993, + "step": 2972 + }, + { + "epoch": 1.0475770925110133, + "grad_norm": 1.767647834896278, + "learning_rate": 1.004661099818829e-05, + "loss": 0.7258927822113037, + "step": 2973 + }, + { + "epoch": 1.0479295154185022, + "grad_norm": 1.8920163745404244, + "learning_rate": 1.004078465802759e-05, + "loss": 0.6108053922653198, + "step": 2974 + }, + { + "epoch": 1.0482819383259911, + "grad_norm": 1.5703096539855212, + "learning_rate": 1.0034958304021766e-05, + "loss": 0.612535834312439, + "step": 2975 + }, + { + "epoch": 1.04863436123348, + "grad_norm": 1.6902304674604145, + "learning_rate": 1.0029131938148686e-05, + "loss": 0.7272380590438843, + "step": 2976 + }, + { + "epoch": 1.0489867841409692, + "grad_norm": 1.4306480582223446, + "learning_rate": 1.0023305562386222e-05, + "loss": 0.4748264253139496, + "step": 2977 + }, + { + "epoch": 1.0493392070484582, + "grad_norm": 1.7625234188194432, + "learning_rate": 1.0017479178712245e-05, + "loss": 0.6686758399009705, + "step": 2978 + }, + { + "epoch": 1.0496916299559471, + "grad_norm": 1.6796969203533192, + "learning_rate": 1.0011652789104631e-05, + "loss": 0.5003838539123535, + "step": 2979 + }, + { + "epoch": 1.050044052863436, + "grad_norm": 1.7305572983583226, + "learning_rate": 1.0005826395541257e-05, + "loss": 0.6210055351257324, + "step": 2980 + }, + { + "epoch": 1.0503964757709252, + "grad_norm": 1.6943397299052507, + "learning_rate": 1e-05, + "loss": 0.6160269975662231, + "step": 2981 + }, + { + "epoch": 1.0507488986784141, + "grad_norm": 1.6249468093767248, + "learning_rate": 9.994173604458748e-06, + "loss": 0.6432052850723267, + "step": 2982 + }, + { + "epoch": 1.051101321585903, + "grad_norm": 1.6764234439374022, + "learning_rate": 9.988347210895372e-06, + "loss": 0.588628888130188, + "step": 2983 + }, + { + "epoch": 1.051453744493392, + "grad_norm": 1.5595740377523009, + "learning_rate": 9.982520821287758e-06, + "loss": 0.6694320440292358, + "step": 2984 + }, + { + "epoch": 1.051806167400881, + "grad_norm": 1.7276474901524372, + "learning_rate": 9.976694437613778e-06, + "loss": 0.8591301441192627, + "step": 2985 + }, + { + "epoch": 1.0521585903083701, + "grad_norm": 1.6697380234108412, + "learning_rate": 9.970868061851315e-06, + "loss": 0.6000436544418335, + "step": 2986 + }, + { + "epoch": 1.052511013215859, + "grad_norm": 1.5357275356358564, + "learning_rate": 9.965041695978239e-06, + "loss": 0.624568521976471, + "step": 2987 + }, + { + "epoch": 1.052863436123348, + "grad_norm": 1.4223866897031825, + "learning_rate": 9.959215341972414e-06, + "loss": 0.6173535585403442, + "step": 2988 + }, + { + "epoch": 1.053215859030837, + "grad_norm": 1.7069399452687213, + "learning_rate": 9.953389001811716e-06, + "loss": 0.5991729497909546, + "step": 2989 + }, + { + "epoch": 1.053568281938326, + "grad_norm": 1.782972390393551, + "learning_rate": 9.947562677473999e-06, + "loss": 0.570953905582428, + "step": 2990 + }, + { + "epoch": 1.053920704845815, + "grad_norm": 1.7332305108715658, + "learning_rate": 9.941736370937119e-06, + "loss": 0.6079390048980713, + "step": 2991 + }, + { + "epoch": 1.054273127753304, + "grad_norm": 2.110617001097567, + "learning_rate": 9.935910084178934e-06, + "loss": 0.599539577960968, + "step": 2992 + }, + { + "epoch": 1.0546255506607929, + "grad_norm": 1.5854202353385896, + "learning_rate": 9.930083819177273e-06, + "loss": 0.6736180186271667, + "step": 2993 + }, + { + "epoch": 1.054977973568282, + "grad_norm": 1.6240153775210555, + "learning_rate": 9.924257577909987e-06, + "loss": 0.6953197717666626, + "step": 2994 + }, + { + "epoch": 1.055330396475771, + "grad_norm": 1.8737137053755175, + "learning_rate": 9.918431362354892e-06, + "loss": 0.6670099496841431, + "step": 2995 + }, + { + "epoch": 1.05568281938326, + "grad_norm": 1.844007753613641, + "learning_rate": 9.912605174489811e-06, + "loss": 0.5829994678497314, + "step": 2996 + }, + { + "epoch": 1.0560352422907489, + "grad_norm": 1.9198236703913207, + "learning_rate": 9.906779016292554e-06, + "loss": 0.5926212072372437, + "step": 2997 + }, + { + "epoch": 1.0563876651982378, + "grad_norm": 1.4868752944824364, + "learning_rate": 9.900952889740922e-06, + "loss": 0.6085237860679626, + "step": 2998 + }, + { + "epoch": 1.056740088105727, + "grad_norm": 1.8046049827658854, + "learning_rate": 9.895126796812698e-06, + "loss": 0.5348918437957764, + "step": 2999 + }, + { + "epoch": 1.0570925110132159, + "grad_norm": 1.79509807280399, + "learning_rate": 9.889300739485666e-06, + "loss": 0.6325811743736267, + "step": 3000 + }, + { + "epoch": 1.0574449339207048, + "grad_norm": 1.6006099839795653, + "learning_rate": 9.883474719737582e-06, + "loss": 0.6262463927268982, + "step": 3001 + }, + { + "epoch": 1.0577973568281938, + "grad_norm": 1.5914788157951554, + "learning_rate": 9.877648739546213e-06, + "loss": 0.5863393545150757, + "step": 3002 + }, + { + "epoch": 1.058149779735683, + "grad_norm": 2.0254476885032924, + "learning_rate": 9.871822800889284e-06, + "loss": 0.6200219392776489, + "step": 3003 + }, + { + "epoch": 1.0585022026431719, + "grad_norm": 1.6216300774961065, + "learning_rate": 9.865996905744523e-06, + "loss": 0.6994227170944214, + "step": 3004 + }, + { + "epoch": 1.0588546255506608, + "grad_norm": 1.735404014120002, + "learning_rate": 9.860171056089646e-06, + "loss": 0.6458406448364258, + "step": 3005 + }, + { + "epoch": 1.0592070484581497, + "grad_norm": 1.6209915560634427, + "learning_rate": 9.854345253902342e-06, + "loss": 0.6814782619476318, + "step": 3006 + }, + { + "epoch": 1.0595594713656387, + "grad_norm": 1.455508358080935, + "learning_rate": 9.84851950116029e-06, + "loss": 0.521275520324707, + "step": 3007 + }, + { + "epoch": 1.0599118942731278, + "grad_norm": 1.486020788258086, + "learning_rate": 9.84269379984116e-06, + "loss": 0.5541207790374756, + "step": 3008 + }, + { + "epoch": 1.0602643171806168, + "grad_norm": 1.7060435970959642, + "learning_rate": 9.836868151922579e-06, + "loss": 0.578704833984375, + "step": 3009 + }, + { + "epoch": 1.0606167400881057, + "grad_norm": 1.5220368339292814, + "learning_rate": 9.831042559382193e-06, + "loss": 0.6280980706214905, + "step": 3010 + }, + { + "epoch": 1.0609691629955946, + "grad_norm": 1.8314917502019485, + "learning_rate": 9.825217024197595e-06, + "loss": 0.6059408783912659, + "step": 3011 + }, + { + "epoch": 1.0613215859030838, + "grad_norm": 1.6362891327789773, + "learning_rate": 9.819391548346377e-06, + "loss": 0.6375449299812317, + "step": 3012 + }, + { + "epoch": 1.0616740088105727, + "grad_norm": 2.503364134053993, + "learning_rate": 9.81356613380611e-06, + "loss": 0.5959592461585999, + "step": 3013 + }, + { + "epoch": 1.0620264317180617, + "grad_norm": 1.735073300438408, + "learning_rate": 9.807740782554337e-06, + "loss": 0.7636409401893616, + "step": 3014 + }, + { + "epoch": 1.0623788546255506, + "grad_norm": 2.2227407713805722, + "learning_rate": 9.801915496568586e-06, + "loss": 0.6136656999588013, + "step": 3015 + }, + { + "epoch": 1.0627312775330395, + "grad_norm": 1.7360474444382674, + "learning_rate": 9.796090277826361e-06, + "loss": 0.4659839868545532, + "step": 3016 + }, + { + "epoch": 1.0630837004405287, + "grad_norm": 1.699131973967987, + "learning_rate": 9.790265128305137e-06, + "loss": 0.6053155660629272, + "step": 3017 + }, + { + "epoch": 1.0634361233480176, + "grad_norm": 1.698457126583602, + "learning_rate": 9.78444004998238e-06, + "loss": 0.6885203123092651, + "step": 3018 + }, + { + "epoch": 1.0637885462555066, + "grad_norm": 1.5620062631250171, + "learning_rate": 9.778615044835513e-06, + "loss": 0.4985584616661072, + "step": 3019 + }, + { + "epoch": 1.0641409691629955, + "grad_norm": 1.699890122838272, + "learning_rate": 9.772790114841948e-06, + "loss": 0.5782307386398315, + "step": 3020 + }, + { + "epoch": 1.0644933920704847, + "grad_norm": 1.7427928970766464, + "learning_rate": 9.766965261979072e-06, + "loss": 0.5819451212882996, + "step": 3021 + }, + { + "epoch": 1.0648458149779736, + "grad_norm": 1.9531302264016444, + "learning_rate": 9.761140488224232e-06, + "loss": 0.7316779494285583, + "step": 3022 + }, + { + "epoch": 1.0651982378854625, + "grad_norm": 2.4211241065200633, + "learning_rate": 9.755315795554766e-06, + "loss": 0.5986718535423279, + "step": 3023 + }, + { + "epoch": 1.0655506607929515, + "grad_norm": 1.5565361520380023, + "learning_rate": 9.749491185947977e-06, + "loss": 0.5052427053451538, + "step": 3024 + }, + { + "epoch": 1.0659030837004406, + "grad_norm": 1.658020296029534, + "learning_rate": 9.743666661381123e-06, + "loss": 0.7370901107788086, + "step": 3025 + }, + { + "epoch": 1.0662555066079296, + "grad_norm": 1.575987435195716, + "learning_rate": 9.73784222383147e-06, + "loss": 0.6423007249832153, + "step": 3026 + }, + { + "epoch": 1.0666079295154185, + "grad_norm": 1.94896820476588, + "learning_rate": 9.73201787527622e-06, + "loss": 0.5679126977920532, + "step": 3027 + }, + { + "epoch": 1.0669603524229074, + "grad_norm": 2.498602043471406, + "learning_rate": 9.72619361769256e-06, + "loss": 0.5890183448791504, + "step": 3028 + }, + { + "epoch": 1.0673127753303966, + "grad_norm": 1.7647674693242208, + "learning_rate": 9.720369453057648e-06, + "loss": 0.6772822141647339, + "step": 3029 + }, + { + "epoch": 1.0676651982378855, + "grad_norm": 2.109810086892336, + "learning_rate": 9.714545383348602e-06, + "loss": 0.8275488615036011, + "step": 3030 + }, + { + "epoch": 1.0680176211453745, + "grad_norm": 1.6620933678667917, + "learning_rate": 9.708721410542517e-06, + "loss": 0.5369541645050049, + "step": 3031 + }, + { + "epoch": 1.0683700440528634, + "grad_norm": 1.611800532750273, + "learning_rate": 9.70289753661645e-06, + "loss": 0.7173746824264526, + "step": 3032 + }, + { + "epoch": 1.0687224669603523, + "grad_norm": 1.7405771304623092, + "learning_rate": 9.697073763547415e-06, + "loss": 0.597034215927124, + "step": 3033 + }, + { + "epoch": 1.0690748898678415, + "grad_norm": 1.867958529307263, + "learning_rate": 9.691250093312419e-06, + "loss": 0.6680281162261963, + "step": 3034 + }, + { + "epoch": 1.0694273127753304, + "grad_norm": 1.4898600082698874, + "learning_rate": 9.6854265278884e-06, + "loss": 0.6155321002006531, + "step": 3035 + }, + { + "epoch": 1.0697797356828194, + "grad_norm": 2.4613840016445314, + "learning_rate": 9.67960306925229e-06, + "loss": 0.5945199728012085, + "step": 3036 + }, + { + "epoch": 1.0701321585903083, + "grad_norm": 1.7063166475670735, + "learning_rate": 9.673779719380967e-06, + "loss": 0.6492328643798828, + "step": 3037 + }, + { + "epoch": 1.0704845814977975, + "grad_norm": 1.8638826733925389, + "learning_rate": 9.667956480251273e-06, + "loss": 0.6501325964927673, + "step": 3038 + }, + { + "epoch": 1.0708370044052864, + "grad_norm": 1.4216071761527918, + "learning_rate": 9.662133353840025e-06, + "loss": 0.5956053733825684, + "step": 3039 + }, + { + "epoch": 1.0711894273127753, + "grad_norm": 1.7546711372901296, + "learning_rate": 9.656310342123988e-06, + "loss": 0.5966510772705078, + "step": 3040 + }, + { + "epoch": 1.0715418502202643, + "grad_norm": 1.7715803220306194, + "learning_rate": 9.65048744707989e-06, + "loss": 0.7096615433692932, + "step": 3041 + }, + { + "epoch": 1.0718942731277532, + "grad_norm": 1.5279732385894715, + "learning_rate": 9.644664670684429e-06, + "loss": 0.6697839498519897, + "step": 3042 + }, + { + "epoch": 1.0722466960352424, + "grad_norm": 1.6318262899161158, + "learning_rate": 9.638842014914253e-06, + "loss": 0.6288081407546997, + "step": 3043 + }, + { + "epoch": 1.0725991189427313, + "grad_norm": 1.6830476156095877, + "learning_rate": 9.633019481745973e-06, + "loss": 0.5870436429977417, + "step": 3044 + }, + { + "epoch": 1.0729515418502202, + "grad_norm": 1.4073037692368846, + "learning_rate": 9.62719707315616e-06, + "loss": 0.5540846586227417, + "step": 3045 + }, + { + "epoch": 1.0733039647577092, + "grad_norm": 1.8276869267624827, + "learning_rate": 9.621374791121335e-06, + "loss": 0.6134544014930725, + "step": 3046 + }, + { + "epoch": 1.0736563876651983, + "grad_norm": 1.9310361455307938, + "learning_rate": 9.61555263761799e-06, + "loss": 0.5537046194076538, + "step": 3047 + }, + { + "epoch": 1.0740088105726873, + "grad_norm": 1.5553451953770387, + "learning_rate": 9.60973061462256e-06, + "loss": 0.6423748731613159, + "step": 3048 + }, + { + "epoch": 1.0743612334801762, + "grad_norm": 1.7219317421679232, + "learning_rate": 9.603908724111438e-06, + "loss": 0.575737714767456, + "step": 3049 + }, + { + "epoch": 1.0747136563876651, + "grad_norm": 1.7334347992355148, + "learning_rate": 9.598086968060976e-06, + "loss": 0.5326197147369385, + "step": 3050 + }, + { + "epoch": 1.075066079295154, + "grad_norm": 1.5560472770838902, + "learning_rate": 9.592265348447481e-06, + "loss": 0.6533973217010498, + "step": 3051 + }, + { + "epoch": 1.0754185022026432, + "grad_norm": 1.5101678591543142, + "learning_rate": 9.586443867247212e-06, + "loss": 0.5536586046218872, + "step": 3052 + }, + { + "epoch": 1.0757709251101322, + "grad_norm": 1.6611779528904365, + "learning_rate": 9.580622526436382e-06, + "loss": 0.6024892926216125, + "step": 3053 + }, + { + "epoch": 1.076123348017621, + "grad_norm": 1.5423440836231639, + "learning_rate": 9.574801327991148e-06, + "loss": 0.5070478320121765, + "step": 3054 + }, + { + "epoch": 1.07647577092511, + "grad_norm": 1.9040251147858696, + "learning_rate": 9.568980273887637e-06, + "loss": 0.6518458127975464, + "step": 3055 + }, + { + "epoch": 1.0768281938325992, + "grad_norm": 1.8761852451910037, + "learning_rate": 9.563159366101905e-06, + "loss": 0.6120346784591675, + "step": 3056 + }, + { + "epoch": 1.0771806167400881, + "grad_norm": 1.7428937123650154, + "learning_rate": 9.557338606609973e-06, + "loss": 0.6725353598594666, + "step": 3057 + }, + { + "epoch": 1.077533039647577, + "grad_norm": 1.5136863007311347, + "learning_rate": 9.551517997387809e-06, + "loss": 0.5311183333396912, + "step": 3058 + }, + { + "epoch": 1.077885462555066, + "grad_norm": 1.8000300040025692, + "learning_rate": 9.545697540411324e-06, + "loss": 0.5728713274002075, + "step": 3059 + }, + { + "epoch": 1.0782378854625552, + "grad_norm": 1.7991281029512354, + "learning_rate": 9.53987723765639e-06, + "loss": 0.5527676343917847, + "step": 3060 + }, + { + "epoch": 1.078590308370044, + "grad_norm": 1.9177712397501578, + "learning_rate": 9.534057091098813e-06, + "loss": 0.7529809474945068, + "step": 3061 + }, + { + "epoch": 1.078942731277533, + "grad_norm": 1.6975104946869117, + "learning_rate": 9.528237102714352e-06, + "loss": 0.5485205054283142, + "step": 3062 + }, + { + "epoch": 1.079295154185022, + "grad_norm": 1.8773141561341242, + "learning_rate": 9.522417274478716e-06, + "loss": 0.785184383392334, + "step": 3063 + }, + { + "epoch": 1.079647577092511, + "grad_norm": 1.692195190429073, + "learning_rate": 9.516597608367547e-06, + "loss": 0.5645574331283569, + "step": 3064 + }, + { + "epoch": 1.08, + "grad_norm": 1.6815198266991151, + "learning_rate": 9.51077810635645e-06, + "loss": 0.5878466367721558, + "step": 3065 + }, + { + "epoch": 1.080352422907489, + "grad_norm": 1.7635464385467587, + "learning_rate": 9.504958770420962e-06, + "loss": 0.6610634922981262, + "step": 3066 + }, + { + "epoch": 1.080704845814978, + "grad_norm": 1.8113852263213976, + "learning_rate": 9.49913960253656e-06, + "loss": 0.5928626656532288, + "step": 3067 + }, + { + "epoch": 1.0810572687224669, + "grad_norm": 1.7322633216843277, + "learning_rate": 9.49332060467868e-06, + "loss": 0.7038083672523499, + "step": 3068 + }, + { + "epoch": 1.081409691629956, + "grad_norm": 1.3686406289588096, + "learning_rate": 9.487501778822685e-06, + "loss": 0.5966217517852783, + "step": 3069 + }, + { + "epoch": 1.081762114537445, + "grad_norm": 1.686172060324731, + "learning_rate": 9.481683126943884e-06, + "loss": 0.6594187021255493, + "step": 3070 + }, + { + "epoch": 1.082114537444934, + "grad_norm": 1.4709153501511232, + "learning_rate": 9.475864651017536e-06, + "loss": 0.450161874294281, + "step": 3071 + }, + { + "epoch": 1.0824669603524228, + "grad_norm": 1.9209170149530705, + "learning_rate": 9.470046353018821e-06, + "loss": 0.6459252834320068, + "step": 3072 + }, + { + "epoch": 1.082819383259912, + "grad_norm": 1.5818284678879686, + "learning_rate": 9.464228234922882e-06, + "loss": 0.6505793929100037, + "step": 3073 + }, + { + "epoch": 1.083171806167401, + "grad_norm": 1.5944722571395005, + "learning_rate": 9.458410298704781e-06, + "loss": 0.6480910778045654, + "step": 3074 + }, + { + "epoch": 1.0835242290748899, + "grad_norm": 1.530550500951046, + "learning_rate": 9.452592546339527e-06, + "loss": 0.6494983434677124, + "step": 3075 + }, + { + "epoch": 1.0838766519823788, + "grad_norm": 1.560525752678919, + "learning_rate": 9.44677497980207e-06, + "loss": 0.4710897207260132, + "step": 3076 + }, + { + "epoch": 1.0842290748898677, + "grad_norm": 1.5265540562186208, + "learning_rate": 9.440957601067294e-06, + "loss": 0.599402904510498, + "step": 3077 + }, + { + "epoch": 1.084581497797357, + "grad_norm": 1.9340764168188993, + "learning_rate": 9.435140412110006e-06, + "loss": 0.665642499923706, + "step": 3078 + }, + { + "epoch": 1.0849339207048458, + "grad_norm": 1.8868033009058576, + "learning_rate": 9.429323414904975e-06, + "loss": 0.5861828923225403, + "step": 3079 + }, + { + "epoch": 1.0852863436123348, + "grad_norm": 1.581789489047221, + "learning_rate": 9.42350661142688e-06, + "loss": 0.6115351915359497, + "step": 3080 + }, + { + "epoch": 1.0856387665198237, + "grad_norm": 1.6610293276945491, + "learning_rate": 9.417690003650353e-06, + "loss": 0.6627066135406494, + "step": 3081 + }, + { + "epoch": 1.0859911894273129, + "grad_norm": 1.5744692750190625, + "learning_rate": 9.411873593549947e-06, + "loss": 0.6155676245689392, + "step": 3082 + }, + { + "epoch": 1.0863436123348018, + "grad_norm": 1.59429166731528, + "learning_rate": 9.406057383100151e-06, + "loss": 0.5429089069366455, + "step": 3083 + }, + { + "epoch": 1.0866960352422907, + "grad_norm": 1.638763712553269, + "learning_rate": 9.400241374275391e-06, + "loss": 0.5416614413261414, + "step": 3084 + }, + { + "epoch": 1.0870484581497797, + "grad_norm": 1.5652840639245515, + "learning_rate": 9.394425569050018e-06, + "loss": 0.6708710193634033, + "step": 3085 + }, + { + "epoch": 1.0874008810572686, + "grad_norm": 1.6407899201706977, + "learning_rate": 9.388609969398318e-06, + "loss": 0.588347315788269, + "step": 3086 + }, + { + "epoch": 1.0877533039647578, + "grad_norm": 1.6990356352816562, + "learning_rate": 9.38279457729451e-06, + "loss": 0.4999222755432129, + "step": 3087 + }, + { + "epoch": 1.0881057268722467, + "grad_norm": 1.5508462782114225, + "learning_rate": 9.37697939471273e-06, + "loss": 0.5400034189224243, + "step": 3088 + }, + { + "epoch": 1.0884581497797357, + "grad_norm": 1.6869985582255194, + "learning_rate": 9.37116442362706e-06, + "loss": 0.5583670139312744, + "step": 3089 + }, + { + "epoch": 1.0888105726872246, + "grad_norm": 2.063349590123988, + "learning_rate": 9.365349666011497e-06, + "loss": 0.6863820552825928, + "step": 3090 + }, + { + "epoch": 1.0891629955947137, + "grad_norm": 1.7395123823701124, + "learning_rate": 9.35953512383997e-06, + "loss": 0.6422115564346313, + "step": 3091 + }, + { + "epoch": 1.0895154185022027, + "grad_norm": 1.7254266288951046, + "learning_rate": 9.353720799086337e-06, + "loss": 0.7106888294219971, + "step": 3092 + }, + { + "epoch": 1.0898678414096916, + "grad_norm": 1.7765997338600088, + "learning_rate": 9.347906693724379e-06, + "loss": 0.6070472002029419, + "step": 3093 + }, + { + "epoch": 1.0902202643171806, + "grad_norm": 2.653468303504809, + "learning_rate": 9.342092809727807e-06, + "loss": 0.577377200126648, + "step": 3094 + }, + { + "epoch": 1.0905726872246695, + "grad_norm": 2.222722693331331, + "learning_rate": 9.336279149070252e-06, + "loss": 0.6249948740005493, + "step": 3095 + }, + { + "epoch": 1.0909251101321586, + "grad_norm": 1.7155188858933852, + "learning_rate": 9.330465713725265e-06, + "loss": 0.5515183210372925, + "step": 3096 + }, + { + "epoch": 1.0912775330396476, + "grad_norm": 1.866411497064146, + "learning_rate": 9.324652505666336e-06, + "loss": 0.6074613332748413, + "step": 3097 + }, + { + "epoch": 1.0916299559471365, + "grad_norm": 1.7632595046666684, + "learning_rate": 9.318839526866863e-06, + "loss": 0.6520178318023682, + "step": 3098 + }, + { + "epoch": 1.0919823788546255, + "grad_norm": 1.4274715968201055, + "learning_rate": 9.31302677930017e-06, + "loss": 0.45863813161849976, + "step": 3099 + }, + { + "epoch": 1.0923348017621146, + "grad_norm": 1.6772052003130429, + "learning_rate": 9.307214264939508e-06, + "loss": 0.610805869102478, + "step": 3100 + }, + { + "epoch": 1.0926872246696036, + "grad_norm": 1.6545163632346178, + "learning_rate": 9.30140198575804e-06, + "loss": 0.5954282283782959, + "step": 3101 + }, + { + "epoch": 1.0930396475770925, + "grad_norm": 1.4805927694864789, + "learning_rate": 9.29558994372886e-06, + "loss": 0.6941400170326233, + "step": 3102 + }, + { + "epoch": 1.0933920704845814, + "grad_norm": 1.4236727289117346, + "learning_rate": 9.289778140824974e-06, + "loss": 0.6723533868789673, + "step": 3103 + }, + { + "epoch": 1.0937444933920706, + "grad_norm": 1.5690147341016918, + "learning_rate": 9.2839665790193e-06, + "loss": 0.49137037992477417, + "step": 3104 + }, + { + "epoch": 1.0940969162995595, + "grad_norm": 1.6112616837583658, + "learning_rate": 9.278155260284692e-06, + "loss": 0.5827045440673828, + "step": 3105 + }, + { + "epoch": 1.0944493392070485, + "grad_norm": 1.7496187485651187, + "learning_rate": 9.272344186593909e-06, + "loss": 0.6391462683677673, + "step": 3106 + }, + { + "epoch": 1.0948017621145374, + "grad_norm": 1.857839078789808, + "learning_rate": 9.266533359919628e-06, + "loss": 0.4994915723800659, + "step": 3107 + }, + { + "epoch": 1.0951541850220265, + "grad_norm": 1.7820549618718244, + "learning_rate": 9.260722782234445e-06, + "loss": 0.6480728387832642, + "step": 3108 + }, + { + "epoch": 1.0955066079295155, + "grad_norm": 1.9724258404436363, + "learning_rate": 9.25491245551087e-06, + "loss": 0.5734057426452637, + "step": 3109 + }, + { + "epoch": 1.0958590308370044, + "grad_norm": 1.5757198230236702, + "learning_rate": 9.249102381721328e-06, + "loss": 0.5650345087051392, + "step": 3110 + }, + { + "epoch": 1.0962114537444934, + "grad_norm": 1.6196253415823336, + "learning_rate": 9.243292562838164e-06, + "loss": 0.6261975765228271, + "step": 3111 + }, + { + "epoch": 1.0965638766519823, + "grad_norm": 1.6283298345999566, + "learning_rate": 9.237483000833619e-06, + "loss": 0.730735182762146, + "step": 3112 + }, + { + "epoch": 1.0969162995594715, + "grad_norm": 1.614573149399901, + "learning_rate": 9.231673697679867e-06, + "loss": 0.6198948621749878, + "step": 3113 + }, + { + "epoch": 1.0972687224669604, + "grad_norm": 1.526191646446162, + "learning_rate": 9.225864655348982e-06, + "loss": 0.5302865505218506, + "step": 3114 + }, + { + "epoch": 1.0976211453744493, + "grad_norm": 1.6895671377093768, + "learning_rate": 9.220055875812955e-06, + "loss": 0.5995128154754639, + "step": 3115 + }, + { + "epoch": 1.0979735682819383, + "grad_norm": 1.5451580100020488, + "learning_rate": 9.214247361043687e-06, + "loss": 0.3801479935646057, + "step": 3116 + }, + { + "epoch": 1.0983259911894274, + "grad_norm": 1.7467243659333909, + "learning_rate": 9.208439113012984e-06, + "loss": 0.5617209076881409, + "step": 3117 + }, + { + "epoch": 1.0986784140969164, + "grad_norm": 2.3313501330545776, + "learning_rate": 9.202631133692572e-06, + "loss": 0.5233842134475708, + "step": 3118 + }, + { + "epoch": 1.0990308370044053, + "grad_norm": 1.5308784453968334, + "learning_rate": 9.196823425054073e-06, + "loss": 0.5300124883651733, + "step": 3119 + }, + { + "epoch": 1.0993832599118942, + "grad_norm": 1.6766914696070794, + "learning_rate": 9.191015989069024e-06, + "loss": 0.686185359954834, + "step": 3120 + }, + { + "epoch": 1.0997356828193832, + "grad_norm": 4.625699614895419, + "learning_rate": 9.18520882770887e-06, + "loss": 0.6043056845664978, + "step": 3121 + }, + { + "epoch": 1.1000881057268723, + "grad_norm": 1.4445640616396158, + "learning_rate": 9.179401942944961e-06, + "loss": 0.6299905776977539, + "step": 3122 + }, + { + "epoch": 1.1004405286343613, + "grad_norm": 1.639683344548818, + "learning_rate": 9.173595336748557e-06, + "loss": 0.57872474193573, + "step": 3123 + }, + { + "epoch": 1.1007929515418502, + "grad_norm": 1.6533643796746975, + "learning_rate": 9.167789011090818e-06, + "loss": 0.5638746023178101, + "step": 3124 + }, + { + "epoch": 1.1011453744493391, + "grad_norm": 1.9780317067618627, + "learning_rate": 9.161982967942806e-06, + "loss": 0.6150490045547485, + "step": 3125 + }, + { + "epoch": 1.1014977973568283, + "grad_norm": 1.6035565827670604, + "learning_rate": 9.156177209275503e-06, + "loss": 0.547231912612915, + "step": 3126 + }, + { + "epoch": 1.1018502202643172, + "grad_norm": 1.753224578445511, + "learning_rate": 9.150371737059773e-06, + "loss": 0.6999325752258301, + "step": 3127 + }, + { + "epoch": 1.1022026431718062, + "grad_norm": 1.868897492269033, + "learning_rate": 9.144566553266396e-06, + "loss": 0.7175568342208862, + "step": 3128 + }, + { + "epoch": 1.102555066079295, + "grad_norm": 1.6615553040601516, + "learning_rate": 9.138761659866054e-06, + "loss": 0.7308273911476135, + "step": 3129 + }, + { + "epoch": 1.102907488986784, + "grad_norm": 1.6216416819643327, + "learning_rate": 9.132957058829323e-06, + "loss": 0.5951930284500122, + "step": 3130 + }, + { + "epoch": 1.1032599118942732, + "grad_norm": 1.8459198222998503, + "learning_rate": 9.127152752126688e-06, + "loss": 0.5684988498687744, + "step": 3131 + }, + { + "epoch": 1.1036123348017621, + "grad_norm": 1.6778026851292638, + "learning_rate": 9.121348741728532e-06, + "loss": 0.6490764617919922, + "step": 3132 + }, + { + "epoch": 1.103964757709251, + "grad_norm": 1.9759558630482505, + "learning_rate": 9.115545029605129e-06, + "loss": 0.7795257568359375, + "step": 3133 + }, + { + "epoch": 1.10431718061674, + "grad_norm": 1.677150279034534, + "learning_rate": 9.10974161772667e-06, + "loss": 0.5443774461746216, + "step": 3134 + }, + { + "epoch": 1.1046696035242292, + "grad_norm": 1.4979331299176493, + "learning_rate": 9.103938508063223e-06, + "loss": 0.48989373445510864, + "step": 3135 + }, + { + "epoch": 1.105022026431718, + "grad_norm": 1.7384756252454785, + "learning_rate": 9.098135702584762e-06, + "loss": 0.5628808736801147, + "step": 3136 + }, + { + "epoch": 1.105374449339207, + "grad_norm": 1.7853238397751252, + "learning_rate": 9.092333203261168e-06, + "loss": 0.6549321413040161, + "step": 3137 + }, + { + "epoch": 1.105726872246696, + "grad_norm": 1.6854667721006384, + "learning_rate": 9.0865310120622e-06, + "loss": 0.7353606224060059, + "step": 3138 + }, + { + "epoch": 1.106079295154185, + "grad_norm": 1.4467352618974103, + "learning_rate": 9.080729130957528e-06, + "loss": 0.650668203830719, + "step": 3139 + }, + { + "epoch": 1.106431718061674, + "grad_norm": 1.4313841589857448, + "learning_rate": 9.07492756191671e-06, + "loss": 0.5618860721588135, + "step": 3140 + }, + { + "epoch": 1.106784140969163, + "grad_norm": 1.6263891772619556, + "learning_rate": 9.069126306909187e-06, + "loss": 0.5532773733139038, + "step": 3141 + }, + { + "epoch": 1.107136563876652, + "grad_norm": 1.5761547934103723, + "learning_rate": 9.06332536790432e-06, + "loss": 0.6240289211273193, + "step": 3142 + }, + { + "epoch": 1.1074889867841409, + "grad_norm": 1.6326282131144043, + "learning_rate": 9.057524746871335e-06, + "loss": 0.5952814221382141, + "step": 3143 + }, + { + "epoch": 1.10784140969163, + "grad_norm": 1.7063742447281478, + "learning_rate": 9.051724445779373e-06, + "loss": 0.6011646389961243, + "step": 3144 + }, + { + "epoch": 1.108193832599119, + "grad_norm": 1.54385403751274, + "learning_rate": 9.045924466597448e-06, + "loss": 0.6964641213417053, + "step": 3145 + }, + { + "epoch": 1.108546255506608, + "grad_norm": 1.9798851390043897, + "learning_rate": 9.040124811294473e-06, + "loss": 0.6821622848510742, + "step": 3146 + }, + { + "epoch": 1.1088986784140968, + "grad_norm": 1.569676973352834, + "learning_rate": 9.034325481839253e-06, + "loss": 0.5045080184936523, + "step": 3147 + }, + { + "epoch": 1.109251101321586, + "grad_norm": 1.608921739397865, + "learning_rate": 9.028526480200482e-06, + "loss": 0.5709735155105591, + "step": 3148 + }, + { + "epoch": 1.109603524229075, + "grad_norm": 1.6331449251948336, + "learning_rate": 9.022727808346731e-06, + "loss": 0.5882325172424316, + "step": 3149 + }, + { + "epoch": 1.1099559471365639, + "grad_norm": 1.6560869042500304, + "learning_rate": 9.016929468246482e-06, + "loss": 0.627426266670227, + "step": 3150 + }, + { + "epoch": 1.1103083700440528, + "grad_norm": 1.5720686051365462, + "learning_rate": 9.011131461868078e-06, + "loss": 0.42419761419296265, + "step": 3151 + }, + { + "epoch": 1.110660792951542, + "grad_norm": 1.487398401726564, + "learning_rate": 9.005333791179775e-06, + "loss": 0.5261023044586182, + "step": 3152 + }, + { + "epoch": 1.111013215859031, + "grad_norm": 1.853640852117203, + "learning_rate": 8.999536458149692e-06, + "loss": 0.6654448509216309, + "step": 3153 + }, + { + "epoch": 1.1113656387665198, + "grad_norm": 1.8252144061899127, + "learning_rate": 8.993739464745843e-06, + "loss": 0.5939514636993408, + "step": 3154 + }, + { + "epoch": 1.1117180616740088, + "grad_norm": 2.120048901517583, + "learning_rate": 8.987942812936133e-06, + "loss": 0.6381959319114685, + "step": 3155 + }, + { + "epoch": 1.1120704845814977, + "grad_norm": 1.5708485505419778, + "learning_rate": 8.982146504688343e-06, + "loss": 0.5474847555160522, + "step": 3156 + }, + { + "epoch": 1.1124229074889869, + "grad_norm": 1.9617265332983251, + "learning_rate": 8.97635054197013e-06, + "loss": 0.6306884288787842, + "step": 3157 + }, + { + "epoch": 1.1127753303964758, + "grad_norm": 1.6582794196349533, + "learning_rate": 8.97055492674906e-06, + "loss": 0.5988807678222656, + "step": 3158 + }, + { + "epoch": 1.1131277533039647, + "grad_norm": 1.4627681911625667, + "learning_rate": 8.964759660992547e-06, + "loss": 0.6316757202148438, + "step": 3159 + }, + { + "epoch": 1.1134801762114537, + "grad_norm": 2.1475966254528265, + "learning_rate": 8.958964746667917e-06, + "loss": 0.6031370162963867, + "step": 3160 + }, + { + "epoch": 1.1138325991189428, + "grad_norm": 1.631780585948097, + "learning_rate": 8.953170185742357e-06, + "loss": 0.6334977149963379, + "step": 3161 + }, + { + "epoch": 1.1141850220264318, + "grad_norm": 1.7666867258825858, + "learning_rate": 8.947375980182937e-06, + "loss": 0.49237731099128723, + "step": 3162 + }, + { + "epoch": 1.1145374449339207, + "grad_norm": 1.8113939325794732, + "learning_rate": 8.941582131956615e-06, + "loss": 0.7349523305892944, + "step": 3163 + }, + { + "epoch": 1.1148898678414096, + "grad_norm": 1.9764498599764084, + "learning_rate": 8.935788643030218e-06, + "loss": 0.5048422813415527, + "step": 3164 + }, + { + "epoch": 1.1152422907488986, + "grad_norm": 1.90381850621639, + "learning_rate": 8.92999551537046e-06, + "loss": 0.6217244267463684, + "step": 3165 + }, + { + "epoch": 1.1155947136563877, + "grad_norm": 1.6579628905821213, + "learning_rate": 8.924202750943926e-06, + "loss": 0.4949147701263428, + "step": 3166 + }, + { + "epoch": 1.1159471365638767, + "grad_norm": 1.8665150826118222, + "learning_rate": 8.918410351717074e-06, + "loss": 0.5975630283355713, + "step": 3167 + }, + { + "epoch": 1.1162995594713656, + "grad_norm": 1.8627553919144322, + "learning_rate": 8.91261831965625e-06, + "loss": 0.7546026110649109, + "step": 3168 + }, + { + "epoch": 1.1166519823788545, + "grad_norm": 1.8785066059323416, + "learning_rate": 8.906826656727665e-06, + "loss": 0.6238037347793579, + "step": 3169 + }, + { + "epoch": 1.1170044052863437, + "grad_norm": 1.7775910427875068, + "learning_rate": 8.901035364897407e-06, + "loss": 0.617587685585022, + "step": 3170 + }, + { + "epoch": 1.1173568281938326, + "grad_norm": 1.6345696523196545, + "learning_rate": 8.895244446131445e-06, + "loss": 0.4834432005882263, + "step": 3171 + }, + { + "epoch": 1.1177092511013216, + "grad_norm": 1.8061061322305951, + "learning_rate": 8.889453902395608e-06, + "loss": 0.614972710609436, + "step": 3172 + }, + { + "epoch": 1.1180616740088105, + "grad_norm": 2.26536947887869, + "learning_rate": 8.883663735655612e-06, + "loss": 0.6468379497528076, + "step": 3173 + }, + { + "epoch": 1.1184140969162994, + "grad_norm": 1.8154030785363677, + "learning_rate": 8.877873947877042e-06, + "loss": 0.6372466683387756, + "step": 3174 + }, + { + "epoch": 1.1187665198237886, + "grad_norm": 1.8831907584481906, + "learning_rate": 8.872084541025336e-06, + "loss": 0.6295863389968872, + "step": 3175 + }, + { + "epoch": 1.1191189427312775, + "grad_norm": 1.7211075291863254, + "learning_rate": 8.866295517065831e-06, + "loss": 0.6109524369239807, + "step": 3176 + }, + { + "epoch": 1.1194713656387665, + "grad_norm": 1.6861537948886334, + "learning_rate": 8.860506877963715e-06, + "loss": 0.6724812388420105, + "step": 3177 + }, + { + "epoch": 1.1198237885462554, + "grad_norm": 1.4091706259139964, + "learning_rate": 8.854718625684049e-06, + "loss": 0.6612162590026855, + "step": 3178 + }, + { + "epoch": 1.1201762114537446, + "grad_norm": 1.6332443405139663, + "learning_rate": 8.84893076219177e-06, + "loss": 0.6209636926651001, + "step": 3179 + }, + { + "epoch": 1.1205286343612335, + "grad_norm": 1.7567347030111673, + "learning_rate": 8.843143289451673e-06, + "loss": 0.8548281192779541, + "step": 3180 + }, + { + "epoch": 1.1208810572687224, + "grad_norm": 1.742397796953756, + "learning_rate": 8.837356209428428e-06, + "loss": 0.4621508717536926, + "step": 3181 + }, + { + "epoch": 1.1212334801762114, + "grad_norm": 1.8553184481302196, + "learning_rate": 8.831569524086568e-06, + "loss": 0.5065817832946777, + "step": 3182 + }, + { + "epoch": 1.1215859030837005, + "grad_norm": 1.5532313157641433, + "learning_rate": 8.825783235390488e-06, + "loss": 0.5467691421508789, + "step": 3183 + }, + { + "epoch": 1.1219383259911895, + "grad_norm": 1.3786030341795126, + "learning_rate": 8.81999734530446e-06, + "loss": 0.4938517212867737, + "step": 3184 + }, + { + "epoch": 1.1222907488986784, + "grad_norm": 1.4972934746199023, + "learning_rate": 8.814211855792609e-06, + "loss": 0.6125702857971191, + "step": 3185 + }, + { + "epoch": 1.1226431718061674, + "grad_norm": 1.427476145591487, + "learning_rate": 8.80842676881893e-06, + "loss": 0.5272841453552246, + "step": 3186 + }, + { + "epoch": 1.1229955947136563, + "grad_norm": 1.8463623605620603, + "learning_rate": 8.802642086347278e-06, + "loss": 0.5595715045928955, + "step": 3187 + }, + { + "epoch": 1.1233480176211454, + "grad_norm": 1.7533827268189746, + "learning_rate": 8.796857810341375e-06, + "loss": 0.7178677916526794, + "step": 3188 + }, + { + "epoch": 1.1237004405286344, + "grad_norm": 2.166791630557212, + "learning_rate": 8.791073942764806e-06, + "loss": 0.6000991463661194, + "step": 3189 + }, + { + "epoch": 1.1240528634361233, + "grad_norm": 1.7926160729471858, + "learning_rate": 8.785290485581008e-06, + "loss": 0.537361741065979, + "step": 3190 + }, + { + "epoch": 1.1244052863436123, + "grad_norm": 1.7666842188914018, + "learning_rate": 8.779507440753286e-06, + "loss": 0.7135556936264038, + "step": 3191 + }, + { + "epoch": 1.1247577092511014, + "grad_norm": 1.7053825384185084, + "learning_rate": 8.773724810244805e-06, + "loss": 0.501063346862793, + "step": 3192 + }, + { + "epoch": 1.1251101321585903, + "grad_norm": 1.679109568038749, + "learning_rate": 8.767942596018587e-06, + "loss": 0.6885302662849426, + "step": 3193 + }, + { + "epoch": 1.1254625550660793, + "grad_norm": 1.321748305255468, + "learning_rate": 8.762160800037516e-06, + "loss": 0.5902360081672668, + "step": 3194 + }, + { + "epoch": 1.1258149779735682, + "grad_norm": 1.687654327550192, + "learning_rate": 8.75637942426433e-06, + "loss": 0.6308953762054443, + "step": 3195 + }, + { + "epoch": 1.1261674008810574, + "grad_norm": 1.8380657710321036, + "learning_rate": 8.750598470661625e-06, + "loss": 0.5710124969482422, + "step": 3196 + }, + { + "epoch": 1.1265198237885463, + "grad_norm": 1.76295044659038, + "learning_rate": 8.744817941191862e-06, + "loss": 0.6110632419586182, + "step": 3197 + }, + { + "epoch": 1.1268722466960353, + "grad_norm": 1.7274451742305768, + "learning_rate": 8.73903783781734e-06, + "loss": 0.5274624824523926, + "step": 3198 + }, + { + "epoch": 1.1272246696035242, + "grad_norm": 1.549070468504263, + "learning_rate": 8.733258162500228e-06, + "loss": 0.6144713163375854, + "step": 3199 + }, + { + "epoch": 1.1275770925110131, + "grad_norm": 1.8001185698886477, + "learning_rate": 8.727478917202551e-06, + "loss": 0.6404621005058289, + "step": 3200 + }, + { + "epoch": 1.1279295154185023, + "grad_norm": 1.602548541775438, + "learning_rate": 8.721700103886177e-06, + "loss": 0.5693025588989258, + "step": 3201 + }, + { + "epoch": 1.1282819383259912, + "grad_norm": 1.6563446017851289, + "learning_rate": 8.715921724512838e-06, + "loss": 0.5631159543991089, + "step": 3202 + }, + { + "epoch": 1.1286343612334802, + "grad_norm": 1.5785191171510689, + "learning_rate": 8.710143781044113e-06, + "loss": 0.648078441619873, + "step": 3203 + }, + { + "epoch": 1.128986784140969, + "grad_norm": 2.0721270642934666, + "learning_rate": 8.704366275441426e-06, + "loss": 0.6858379244804382, + "step": 3204 + }, + { + "epoch": 1.1293392070484582, + "grad_norm": 1.8203927475030908, + "learning_rate": 8.698589209666074e-06, + "loss": 0.7244000434875488, + "step": 3205 + }, + { + "epoch": 1.1296916299559472, + "grad_norm": 1.7775130777760553, + "learning_rate": 8.692812585679182e-06, + "loss": 0.5918365716934204, + "step": 3206 + }, + { + "epoch": 1.1300440528634361, + "grad_norm": 1.8950041670387165, + "learning_rate": 8.687036405441733e-06, + "loss": 0.6893443465232849, + "step": 3207 + }, + { + "epoch": 1.130396475770925, + "grad_norm": 1.6934464725865028, + "learning_rate": 8.681260670914564e-06, + "loss": 0.729834794998169, + "step": 3208 + }, + { + "epoch": 1.130748898678414, + "grad_norm": 1.9278305082183818, + "learning_rate": 8.675485384058356e-06, + "loss": 0.6525821685791016, + "step": 3209 + }, + { + "epoch": 1.1311013215859032, + "grad_norm": 1.7892045210081244, + "learning_rate": 8.669710546833642e-06, + "loss": 0.6799874305725098, + "step": 3210 + }, + { + "epoch": 1.131453744493392, + "grad_norm": 1.6216385781826248, + "learning_rate": 8.6639361612008e-06, + "loss": 0.5614932775497437, + "step": 3211 + }, + { + "epoch": 1.131806167400881, + "grad_norm": 1.6912315117870094, + "learning_rate": 8.658162229120045e-06, + "loss": 0.5975101590156555, + "step": 3212 + }, + { + "epoch": 1.13215859030837, + "grad_norm": 1.7352702737909875, + "learning_rate": 8.652388752551458e-06, + "loss": 0.5367887020111084, + "step": 3213 + }, + { + "epoch": 1.1325110132158591, + "grad_norm": 1.360358935584503, + "learning_rate": 8.646615733454949e-06, + "loss": 0.4451865553855896, + "step": 3214 + }, + { + "epoch": 1.132863436123348, + "grad_norm": 1.8983821913108012, + "learning_rate": 8.64084317379028e-06, + "loss": 0.6482576131820679, + "step": 3215 + }, + { + "epoch": 1.133215859030837, + "grad_norm": 1.5858394578763535, + "learning_rate": 8.635071075517053e-06, + "loss": 0.5890318155288696, + "step": 3216 + }, + { + "epoch": 1.133568281938326, + "grad_norm": 1.6567929917802857, + "learning_rate": 8.629299440594719e-06, + "loss": 0.554576575756073, + "step": 3217 + }, + { + "epoch": 1.1339207048458149, + "grad_norm": 1.6966150183280715, + "learning_rate": 8.623528270982567e-06, + "loss": 0.5987116694450378, + "step": 3218 + }, + { + "epoch": 1.134273127753304, + "grad_norm": 1.8696533969224407, + "learning_rate": 8.617757568639731e-06, + "loss": 0.49857625365257263, + "step": 3219 + }, + { + "epoch": 1.134625550660793, + "grad_norm": 1.6960564098429034, + "learning_rate": 8.61198733552518e-06, + "loss": 0.6116641759872437, + "step": 3220 + }, + { + "epoch": 1.134977973568282, + "grad_norm": 1.6619215502907394, + "learning_rate": 8.606217573597738e-06, + "loss": 0.4346674978733063, + "step": 3221 + }, + { + "epoch": 1.1353303964757708, + "grad_norm": 1.6058889875943096, + "learning_rate": 8.600448284816046e-06, + "loss": 0.6973283290863037, + "step": 3222 + }, + { + "epoch": 1.13568281938326, + "grad_norm": 1.547791232560021, + "learning_rate": 8.594679471138613e-06, + "loss": 0.5457896590232849, + "step": 3223 + }, + { + "epoch": 1.136035242290749, + "grad_norm": 1.6457593373386994, + "learning_rate": 8.58891113452376e-06, + "loss": 0.4520479440689087, + "step": 3224 + }, + { + "epoch": 1.1363876651982379, + "grad_norm": 1.6501706928794149, + "learning_rate": 8.58314327692966e-06, + "loss": 0.6169587969779968, + "step": 3225 + }, + { + "epoch": 1.1367400881057268, + "grad_norm": 1.729795732302939, + "learning_rate": 8.577375900314327e-06, + "loss": 0.6398670673370361, + "step": 3226 + }, + { + "epoch": 1.1370925110132157, + "grad_norm": 1.6846614829900397, + "learning_rate": 8.571609006635604e-06, + "loss": 0.5772207975387573, + "step": 3227 + }, + { + "epoch": 1.137444933920705, + "grad_norm": 1.5622430074284195, + "learning_rate": 8.565842597851165e-06, + "loss": 0.5561503171920776, + "step": 3228 + }, + { + "epoch": 1.1377973568281938, + "grad_norm": 1.644881271079104, + "learning_rate": 8.560076675918537e-06, + "loss": 0.4702373743057251, + "step": 3229 + }, + { + "epoch": 1.1381497797356828, + "grad_norm": 1.778044829497574, + "learning_rate": 8.554311242795061e-06, + "loss": 0.5967564582824707, + "step": 3230 + }, + { + "epoch": 1.138502202643172, + "grad_norm": 1.782270527802186, + "learning_rate": 8.548546300437928e-06, + "loss": 0.4749453663825989, + "step": 3231 + }, + { + "epoch": 1.1388546255506609, + "grad_norm": 2.2009062727733046, + "learning_rate": 8.542781850804155e-06, + "loss": 0.6939869523048401, + "step": 3232 + }, + { + "epoch": 1.1392070484581498, + "grad_norm": 1.4327701228186707, + "learning_rate": 8.537017895850593e-06, + "loss": 0.5618892908096313, + "step": 3233 + }, + { + "epoch": 1.1395594713656387, + "grad_norm": 1.6784618730938181, + "learning_rate": 8.531254437533925e-06, + "loss": 0.6627654433250427, + "step": 3234 + }, + { + "epoch": 1.1399118942731277, + "grad_norm": 1.770712809653697, + "learning_rate": 8.525491477810671e-06, + "loss": 0.6365151405334473, + "step": 3235 + }, + { + "epoch": 1.1402643171806168, + "grad_norm": 1.6623213186798471, + "learning_rate": 8.519729018637164e-06, + "loss": 0.5207303762435913, + "step": 3236 + }, + { + "epoch": 1.1406167400881058, + "grad_norm": 1.8240600257881658, + "learning_rate": 8.513967061969594e-06, + "loss": 0.7469059228897095, + "step": 3237 + }, + { + "epoch": 1.1409691629955947, + "grad_norm": 1.7786802310337648, + "learning_rate": 8.508205609763955e-06, + "loss": 0.5778630971908569, + "step": 3238 + }, + { + "epoch": 1.1413215859030836, + "grad_norm": 1.756406665695002, + "learning_rate": 8.502444663976089e-06, + "loss": 0.5447480082511902, + "step": 3239 + }, + { + "epoch": 1.1416740088105728, + "grad_norm": 1.628690443424602, + "learning_rate": 8.496684226561653e-06, + "loss": 0.6002986431121826, + "step": 3240 + }, + { + "epoch": 1.1420264317180617, + "grad_norm": 1.7257255594282812, + "learning_rate": 8.490924299476133e-06, + "loss": 0.7627072930335999, + "step": 3241 + }, + { + "epoch": 1.1423788546255507, + "grad_norm": 1.725113553289998, + "learning_rate": 8.485164884674854e-06, + "loss": 0.6406078338623047, + "step": 3242 + }, + { + "epoch": 1.1427312775330396, + "grad_norm": 2.110533369358698, + "learning_rate": 8.479405984112949e-06, + "loss": 0.47047436237335205, + "step": 3243 + }, + { + "epoch": 1.1430837004405285, + "grad_norm": 2.0564519486525903, + "learning_rate": 8.473647599745393e-06, + "loss": 0.6702529191970825, + "step": 3244 + }, + { + "epoch": 1.1434361233480177, + "grad_norm": 2.1168699536348488, + "learning_rate": 8.467889733526977e-06, + "loss": 0.6570258140563965, + "step": 3245 + }, + { + "epoch": 1.1437885462555066, + "grad_norm": 11.021488641985083, + "learning_rate": 8.462132387412312e-06, + "loss": 0.6248423457145691, + "step": 3246 + }, + { + "epoch": 1.1441409691629956, + "grad_norm": 1.6339128666105858, + "learning_rate": 8.456375563355842e-06, + "loss": 0.7377427816390991, + "step": 3247 + }, + { + "epoch": 1.1444933920704845, + "grad_norm": 1.8159484011485405, + "learning_rate": 8.45061926331183e-06, + "loss": 0.6469020843505859, + "step": 3248 + }, + { + "epoch": 1.1448458149779737, + "grad_norm": 1.81461416151687, + "learning_rate": 8.444863489234356e-06, + "loss": 0.6417430639266968, + "step": 3249 + }, + { + "epoch": 1.1451982378854626, + "grad_norm": 1.7715952211280361, + "learning_rate": 8.439108243077335e-06, + "loss": 0.5447275638580322, + "step": 3250 + }, + { + "epoch": 1.1455506607929515, + "grad_norm": 1.8341737914542349, + "learning_rate": 8.433353526794484e-06, + "loss": 0.6621315479278564, + "step": 3251 + }, + { + "epoch": 1.1459030837004405, + "grad_norm": 1.850872292820976, + "learning_rate": 8.42759934233936e-06, + "loss": 0.5660392045974731, + "step": 3252 + }, + { + "epoch": 1.1462555066079294, + "grad_norm": 1.695638018183687, + "learning_rate": 8.42184569166532e-06, + "loss": 0.43074172735214233, + "step": 3253 + }, + { + "epoch": 1.1466079295154186, + "grad_norm": 1.6152519611154568, + "learning_rate": 8.416092576725554e-06, + "loss": 0.5863226056098938, + "step": 3254 + }, + { + "epoch": 1.1469603524229075, + "grad_norm": 1.8724827582882198, + "learning_rate": 8.410339999473067e-06, + "loss": 0.6003422737121582, + "step": 3255 + }, + { + "epoch": 1.1473127753303964, + "grad_norm": 1.806876842860533, + "learning_rate": 8.404587961860678e-06, + "loss": 0.6109241247177124, + "step": 3256 + }, + { + "epoch": 1.1476651982378854, + "grad_norm": 1.7768687099142642, + "learning_rate": 8.398836465841021e-06, + "loss": 0.5749140977859497, + "step": 3257 + }, + { + "epoch": 1.1480176211453745, + "grad_norm": 1.762377433704451, + "learning_rate": 8.393085513366557e-06, + "loss": 0.6920739412307739, + "step": 3258 + }, + { + "epoch": 1.1483700440528635, + "grad_norm": 1.903311052790267, + "learning_rate": 8.38733510638955e-06, + "loss": 0.6632573008537292, + "step": 3259 + }, + { + "epoch": 1.1487224669603524, + "grad_norm": 1.925929272799836, + "learning_rate": 8.381585246862091e-06, + "loss": 0.6396503448486328, + "step": 3260 + }, + { + "epoch": 1.1490748898678413, + "grad_norm": 4.327872701462553, + "learning_rate": 8.375835936736072e-06, + "loss": 0.5975937843322754, + "step": 3261 + }, + { + "epoch": 1.1494273127753303, + "grad_norm": 1.9097739370767552, + "learning_rate": 8.370087177963204e-06, + "loss": 0.6297920346260071, + "step": 3262 + }, + { + "epoch": 1.1497797356828194, + "grad_norm": 1.6773858737351708, + "learning_rate": 8.364338972495016e-06, + "loss": 0.7004375457763672, + "step": 3263 + }, + { + "epoch": 1.1501321585903084, + "grad_norm": 1.9905333664754346, + "learning_rate": 8.358591322282845e-06, + "loss": 0.5850871801376343, + "step": 3264 + }, + { + "epoch": 1.1504845814977973, + "grad_norm": 1.6216139435027066, + "learning_rate": 8.352844229277834e-06, + "loss": 0.493900865316391, + "step": 3265 + }, + { + "epoch": 1.1508370044052865, + "grad_norm": 1.8994324319983171, + "learning_rate": 8.34709769543095e-06, + "loss": 0.573354959487915, + "step": 3266 + }, + { + "epoch": 1.1511894273127754, + "grad_norm": 2.1672972359364175, + "learning_rate": 8.341351722692951e-06, + "loss": 0.7154442667961121, + "step": 3267 + }, + { + "epoch": 1.1515418502202643, + "grad_norm": 1.705511845117997, + "learning_rate": 8.335606313014432e-06, + "loss": 0.5429074764251709, + "step": 3268 + }, + { + "epoch": 1.1518942731277533, + "grad_norm": 1.8606068751906144, + "learning_rate": 8.329861468345768e-06, + "loss": 0.6938891410827637, + "step": 3269 + }, + { + "epoch": 1.1522466960352422, + "grad_norm": 5.765839224937511, + "learning_rate": 8.324117190637157e-06, + "loss": 0.7114205360412598, + "step": 3270 + }, + { + "epoch": 1.1525991189427314, + "grad_norm": 1.761532917196708, + "learning_rate": 8.318373481838605e-06, + "loss": 0.5353071093559265, + "step": 3271 + }, + { + "epoch": 1.1529515418502203, + "grad_norm": 1.931038515640054, + "learning_rate": 8.312630343899921e-06, + "loss": 0.7838516235351562, + "step": 3272 + }, + { + "epoch": 1.1533039647577092, + "grad_norm": 2.013028743927059, + "learning_rate": 8.306887778770724e-06, + "loss": 0.630479633808136, + "step": 3273 + }, + { + "epoch": 1.1536563876651982, + "grad_norm": 1.908388737326531, + "learning_rate": 8.301145788400438e-06, + "loss": 0.6568116545677185, + "step": 3274 + }, + { + "epoch": 1.1540088105726873, + "grad_norm": 1.4673620532583986, + "learning_rate": 8.295404374738278e-06, + "loss": 0.5410804748535156, + "step": 3275 + }, + { + "epoch": 1.1543612334801763, + "grad_norm": 2.0887831204496017, + "learning_rate": 8.289663539733292e-06, + "loss": 0.6699862480163574, + "step": 3276 + }, + { + "epoch": 1.1547136563876652, + "grad_norm": 2.146352543425904, + "learning_rate": 8.283923285334304e-06, + "loss": 0.6828576326370239, + "step": 3277 + }, + { + "epoch": 1.1550660792951541, + "grad_norm": 1.6441665475307043, + "learning_rate": 8.278183613489951e-06, + "loss": 0.5569214820861816, + "step": 3278 + }, + { + "epoch": 1.155418502202643, + "grad_norm": 1.5736783771881073, + "learning_rate": 8.27244452614868e-06, + "loss": 0.6276477575302124, + "step": 3279 + }, + { + "epoch": 1.1557709251101322, + "grad_norm": 1.639795393267647, + "learning_rate": 8.266706025258727e-06, + "loss": 0.5752792954444885, + "step": 3280 + }, + { + "epoch": 1.1561233480176212, + "grad_norm": 1.8007170708068962, + "learning_rate": 8.260968112768137e-06, + "loss": 0.6149388551712036, + "step": 3281 + }, + { + "epoch": 1.1564757709251101, + "grad_norm": 1.8241425629966381, + "learning_rate": 8.255230790624755e-06, + "loss": 0.6399196982383728, + "step": 3282 + }, + { + "epoch": 1.156828193832599, + "grad_norm": 1.8065599712551461, + "learning_rate": 8.249494060776215e-06, + "loss": 0.6927458047866821, + "step": 3283 + }, + { + "epoch": 1.1571806167400882, + "grad_norm": 1.5535864037785454, + "learning_rate": 8.243757925169968e-06, + "loss": 0.5843946933746338, + "step": 3284 + }, + { + "epoch": 1.1575330396475771, + "grad_norm": 1.7771012211418213, + "learning_rate": 8.238022385753248e-06, + "loss": 0.6469332575798035, + "step": 3285 + }, + { + "epoch": 1.157885462555066, + "grad_norm": 1.5500454202505596, + "learning_rate": 8.23228744447309e-06, + "loss": 0.572630763053894, + "step": 3286 + }, + { + "epoch": 1.158237885462555, + "grad_norm": 1.7219264264044976, + "learning_rate": 8.226553103276335e-06, + "loss": 0.6872239112854004, + "step": 3287 + }, + { + "epoch": 1.158590308370044, + "grad_norm": 1.7206454172461807, + "learning_rate": 8.220819364109607e-06, + "loss": 0.5116995573043823, + "step": 3288 + }, + { + "epoch": 1.1589427312775331, + "grad_norm": 1.7411708693012447, + "learning_rate": 8.215086228919336e-06, + "loss": 0.6179347038269043, + "step": 3289 + }, + { + "epoch": 1.159295154185022, + "grad_norm": 1.5098995111565061, + "learning_rate": 8.209353699651745e-06, + "loss": 0.573688805103302, + "step": 3290 + }, + { + "epoch": 1.159647577092511, + "grad_norm": 1.6209084651188936, + "learning_rate": 8.20362177825284e-06, + "loss": 0.6622583866119385, + "step": 3291 + }, + { + "epoch": 1.16, + "grad_norm": 1.9043199482736668, + "learning_rate": 8.197890466668441e-06, + "loss": 0.4945096969604492, + "step": 3292 + }, + { + "epoch": 1.160352422907489, + "grad_norm": 1.9477760218669748, + "learning_rate": 8.19215976684414e-06, + "loss": 0.5657082796096802, + "step": 3293 + }, + { + "epoch": 1.160704845814978, + "grad_norm": 1.7301213281073105, + "learning_rate": 8.186429680725339e-06, + "loss": 0.5684623122215271, + "step": 3294 + }, + { + "epoch": 1.161057268722467, + "grad_norm": 1.7781389678625354, + "learning_rate": 8.180700210257223e-06, + "loss": 0.567638635635376, + "step": 3295 + }, + { + "epoch": 1.1614096916299559, + "grad_norm": 1.8973989975016394, + "learning_rate": 8.174971357384762e-06, + "loss": 0.7182992696762085, + "step": 3296 + }, + { + "epoch": 1.1617621145374448, + "grad_norm": 1.7202032555937063, + "learning_rate": 8.169243124052731e-06, + "loss": 0.7188737392425537, + "step": 3297 + }, + { + "epoch": 1.162114537444934, + "grad_norm": 1.632750713102644, + "learning_rate": 8.163515512205687e-06, + "loss": 0.5532418489456177, + "step": 3298 + }, + { + "epoch": 1.162466960352423, + "grad_norm": 2.2725291479645136, + "learning_rate": 8.157788523787967e-06, + "loss": 0.7167447209358215, + "step": 3299 + }, + { + "epoch": 1.1628193832599119, + "grad_norm": 1.8053860419209504, + "learning_rate": 8.152062160743716e-06, + "loss": 0.633411169052124, + "step": 3300 + }, + { + "epoch": 1.1631718061674008, + "grad_norm": 1.8006555184567121, + "learning_rate": 8.146336425016849e-06, + "loss": 0.6686321496963501, + "step": 3301 + }, + { + "epoch": 1.16352422907489, + "grad_norm": 1.884331587638867, + "learning_rate": 8.140611318551078e-06, + "loss": 0.608701765537262, + "step": 3302 + }, + { + "epoch": 1.1638766519823789, + "grad_norm": 1.6532674404979102, + "learning_rate": 8.1348868432899e-06, + "loss": 0.5607466101646423, + "step": 3303 + }, + { + "epoch": 1.1642290748898678, + "grad_norm": 1.9224536271892947, + "learning_rate": 8.12916300117659e-06, + "loss": 0.6397457122802734, + "step": 3304 + }, + { + "epoch": 1.1645814977973568, + "grad_norm": 1.9075190910370474, + "learning_rate": 8.123439794154223e-06, + "loss": 0.6681507229804993, + "step": 3305 + }, + { + "epoch": 1.1649339207048457, + "grad_norm": 1.7601065273352539, + "learning_rate": 8.117717224165645e-06, + "loss": 0.5549972057342529, + "step": 3306 + }, + { + "epoch": 1.1652863436123349, + "grad_norm": 1.9981914923817063, + "learning_rate": 8.111995293153486e-06, + "loss": 0.7519058585166931, + "step": 3307 + }, + { + "epoch": 1.1656387665198238, + "grad_norm": 1.8817978978557874, + "learning_rate": 8.106274003060172e-06, + "loss": 0.7100121378898621, + "step": 3308 + }, + { + "epoch": 1.1659911894273127, + "grad_norm": 2.081586750876693, + "learning_rate": 8.100553355827897e-06, + "loss": 0.6297321319580078, + "step": 3309 + }, + { + "epoch": 1.1663436123348019, + "grad_norm": 2.2854313216105635, + "learning_rate": 8.094833353398645e-06, + "loss": 0.6875895857810974, + "step": 3310 + }, + { + "epoch": 1.1666960352422908, + "grad_norm": 1.7297215389141958, + "learning_rate": 8.08911399771418e-06, + "loss": 0.5369099974632263, + "step": 3311 + }, + { + "epoch": 1.1670484581497798, + "grad_norm": 1.7209622601094259, + "learning_rate": 8.083395290716042e-06, + "loss": 0.5598124265670776, + "step": 3312 + }, + { + "epoch": 1.1674008810572687, + "grad_norm": 1.6153396072397332, + "learning_rate": 8.077677234345557e-06, + "loss": 0.6438342332839966, + "step": 3313 + }, + { + "epoch": 1.1677533039647576, + "grad_norm": 1.649767256033485, + "learning_rate": 8.07195983054383e-06, + "loss": 0.5558618307113647, + "step": 3314 + }, + { + "epoch": 1.1681057268722468, + "grad_norm": 1.744681713922102, + "learning_rate": 8.06624308125173e-06, + "loss": 0.5729602575302124, + "step": 3315 + }, + { + "epoch": 1.1684581497797357, + "grad_norm": 2.294706401477936, + "learning_rate": 8.060526988409929e-06, + "loss": 0.5094903707504272, + "step": 3316 + }, + { + "epoch": 1.1688105726872247, + "grad_norm": 1.6352779890455922, + "learning_rate": 8.054811553958853e-06, + "loss": 0.6605818867683411, + "step": 3317 + }, + { + "epoch": 1.1691629955947136, + "grad_norm": 2.240048633930669, + "learning_rate": 8.04909677983872e-06, + "loss": 0.7929576635360718, + "step": 3318 + }, + { + "epoch": 1.1695154185022028, + "grad_norm": 1.7445241989865017, + "learning_rate": 8.043382667989514e-06, + "loss": 0.5915192365646362, + "step": 3319 + }, + { + "epoch": 1.1698678414096917, + "grad_norm": 1.6537456786938194, + "learning_rate": 8.037669220351e-06, + "loss": 0.5923853516578674, + "step": 3320 + }, + { + "epoch": 1.1702202643171806, + "grad_norm": 1.7692219343864357, + "learning_rate": 8.031956438862718e-06, + "loss": 0.7034223079681396, + "step": 3321 + }, + { + "epoch": 1.1705726872246696, + "grad_norm": 1.699093684077835, + "learning_rate": 8.026244325463975e-06, + "loss": 0.6093307733535767, + "step": 3322 + }, + { + "epoch": 1.1709251101321585, + "grad_norm": 1.820021264359909, + "learning_rate": 8.020532882093862e-06, + "loss": 0.5709424614906311, + "step": 3323 + }, + { + "epoch": 1.1712775330396477, + "grad_norm": 1.6327248259933085, + "learning_rate": 8.01482211069123e-06, + "loss": 0.5242069959640503, + "step": 3324 + }, + { + "epoch": 1.1716299559471366, + "grad_norm": 1.8755413800206977, + "learning_rate": 8.009112013194707e-06, + "loss": 0.5869580507278442, + "step": 3325 + }, + { + "epoch": 1.1719823788546255, + "grad_norm": 1.927667149386539, + "learning_rate": 8.0034025915427e-06, + "loss": 0.7281460762023926, + "step": 3326 + }, + { + "epoch": 1.1723348017621145, + "grad_norm": 1.8020991914636244, + "learning_rate": 7.997693847673378e-06, + "loss": 0.6877723336219788, + "step": 3327 + }, + { + "epoch": 1.1726872246696036, + "grad_norm": 1.4739994768631006, + "learning_rate": 7.991985783524676e-06, + "loss": 0.6045002937316895, + "step": 3328 + }, + { + "epoch": 1.1730396475770926, + "grad_norm": 1.7637996531853402, + "learning_rate": 7.986278401034315e-06, + "loss": 0.5698690414428711, + "step": 3329 + }, + { + "epoch": 1.1733920704845815, + "grad_norm": 1.879664532548966, + "learning_rate": 7.980571702139759e-06, + "loss": 0.6802438497543335, + "step": 3330 + }, + { + "epoch": 1.1737444933920704, + "grad_norm": 1.9432824884843154, + "learning_rate": 7.974865688778271e-06, + "loss": 0.5840654373168945, + "step": 3331 + }, + { + "epoch": 1.1740969162995594, + "grad_norm": 1.7557288678447098, + "learning_rate": 7.969160362886855e-06, + "loss": 0.5203073024749756, + "step": 3332 + }, + { + "epoch": 1.1744493392070485, + "grad_norm": 1.5188701776399616, + "learning_rate": 7.963455726402292e-06, + "loss": 0.4558306932449341, + "step": 3333 + }, + { + "epoch": 1.1748017621145375, + "grad_norm": 1.8464169088081481, + "learning_rate": 7.957751781261132e-06, + "loss": 0.6200483441352844, + "step": 3334 + }, + { + "epoch": 1.1751541850220264, + "grad_norm": 1.4009839443781218, + "learning_rate": 7.952048529399686e-06, + "loss": 0.559386670589447, + "step": 3335 + }, + { + "epoch": 1.1755066079295153, + "grad_norm": 1.5776847118393618, + "learning_rate": 7.946345972754026e-06, + "loss": 0.5521356463432312, + "step": 3336 + }, + { + "epoch": 1.1758590308370045, + "grad_norm": 1.6725655120909741, + "learning_rate": 7.940644113260001e-06, + "loss": 0.6235495805740356, + "step": 3337 + }, + { + "epoch": 1.1762114537444934, + "grad_norm": 1.6364629990686756, + "learning_rate": 7.934942952853203e-06, + "loss": 0.5196648836135864, + "step": 3338 + }, + { + "epoch": 1.1765638766519824, + "grad_norm": 1.658819201732712, + "learning_rate": 7.929242493469013e-06, + "loss": 0.5959422588348389, + "step": 3339 + }, + { + "epoch": 1.1769162995594713, + "grad_norm": 1.8867606277211662, + "learning_rate": 7.923542737042549e-06, + "loss": 0.5400167107582092, + "step": 3340 + }, + { + "epoch": 1.1772687224669602, + "grad_norm": 1.8686352871929341, + "learning_rate": 7.917843685508702e-06, + "loss": 0.688996434211731, + "step": 3341 + }, + { + "epoch": 1.1776211453744494, + "grad_norm": 1.844624213320976, + "learning_rate": 7.912145340802127e-06, + "loss": 0.623216450214386, + "step": 3342 + }, + { + "epoch": 1.1779735682819383, + "grad_norm": 1.7951119497780943, + "learning_rate": 7.906447704857233e-06, + "loss": 0.587382435798645, + "step": 3343 + }, + { + "epoch": 1.1783259911894273, + "grad_norm": 1.4508698182802122, + "learning_rate": 7.900750779608187e-06, + "loss": 0.6033053398132324, + "step": 3344 + }, + { + "epoch": 1.1786784140969162, + "grad_norm": 1.5026274052311877, + "learning_rate": 7.895054566988924e-06, + "loss": 0.557671308517456, + "step": 3345 + }, + { + "epoch": 1.1790308370044054, + "grad_norm": 1.6193785911353318, + "learning_rate": 7.889359068933122e-06, + "loss": 0.4550681710243225, + "step": 3346 + }, + { + "epoch": 1.1793832599118943, + "grad_norm": 1.7532225132073032, + "learning_rate": 7.883664287374235e-06, + "loss": 0.6417531967163086, + "step": 3347 + }, + { + "epoch": 1.1797356828193832, + "grad_norm": 2.046641045277204, + "learning_rate": 7.877970224245458e-06, + "loss": 0.703549861907959, + "step": 3348 + }, + { + "epoch": 1.1800881057268722, + "grad_norm": 1.9966595548369739, + "learning_rate": 7.87227688147975e-06, + "loss": 0.7438976764678955, + "step": 3349 + }, + { + "epoch": 1.1804405286343613, + "grad_norm": 1.9757665254478705, + "learning_rate": 7.866584261009823e-06, + "loss": 0.5563932657241821, + "step": 3350 + }, + { + "epoch": 1.1807929515418503, + "grad_norm": 1.9705828017858218, + "learning_rate": 7.860892364768145e-06, + "loss": 0.6332740783691406, + "step": 3351 + }, + { + "epoch": 1.1811453744493392, + "grad_norm": 1.6800252042998722, + "learning_rate": 7.855201194686938e-06, + "loss": 0.5207923650741577, + "step": 3352 + }, + { + "epoch": 1.1814977973568281, + "grad_norm": 1.704285155728578, + "learning_rate": 7.849510752698179e-06, + "loss": 0.5930209755897522, + "step": 3353 + }, + { + "epoch": 1.1818502202643173, + "grad_norm": 1.9626347095192314, + "learning_rate": 7.843821040733588e-06, + "loss": 0.6207472085952759, + "step": 3354 + }, + { + "epoch": 1.1822026431718062, + "grad_norm": 1.631891920380694, + "learning_rate": 7.838132060724657e-06, + "loss": 0.5487867593765259, + "step": 3355 + }, + { + "epoch": 1.1825550660792952, + "grad_norm": 1.719446635213068, + "learning_rate": 7.83244381460261e-06, + "loss": 0.5457941889762878, + "step": 3356 + }, + { + "epoch": 1.182907488986784, + "grad_norm": 4.79087339281713, + "learning_rate": 7.826756304298428e-06, + "loss": 0.5203769207000732, + "step": 3357 + }, + { + "epoch": 1.183259911894273, + "grad_norm": 2.2130523974851006, + "learning_rate": 7.821069531742848e-06, + "loss": 0.7241770029067993, + "step": 3358 + }, + { + "epoch": 1.1836123348017622, + "grad_norm": 1.872241533824603, + "learning_rate": 7.815383498866351e-06, + "loss": 0.5085904598236084, + "step": 3359 + }, + { + "epoch": 1.1839647577092511, + "grad_norm": 1.7457024495825946, + "learning_rate": 7.80969820759917e-06, + "loss": 0.6219276785850525, + "step": 3360 + }, + { + "epoch": 1.18431718061674, + "grad_norm": 1.657619548935653, + "learning_rate": 7.804013659871286e-06, + "loss": 0.5621576309204102, + "step": 3361 + }, + { + "epoch": 1.184669603524229, + "grad_norm": 2.006942738555184, + "learning_rate": 7.798329857612415e-06, + "loss": 0.6862529516220093, + "step": 3362 + }, + { + "epoch": 1.1850220264317182, + "grad_norm": 1.6254700608957282, + "learning_rate": 7.792646802752045e-06, + "loss": 0.5536706447601318, + "step": 3363 + }, + { + "epoch": 1.185374449339207, + "grad_norm": 1.8365676060407183, + "learning_rate": 7.786964497219389e-06, + "loss": 0.7158493995666504, + "step": 3364 + }, + { + "epoch": 1.185726872246696, + "grad_norm": 1.5882377854785632, + "learning_rate": 7.781282942943411e-06, + "loss": 0.6510338187217712, + "step": 3365 + }, + { + "epoch": 1.186079295154185, + "grad_norm": 1.6887309758558333, + "learning_rate": 7.775602141852827e-06, + "loss": 0.4999651312828064, + "step": 3366 + }, + { + "epoch": 1.186431718061674, + "grad_norm": 1.7482854003458987, + "learning_rate": 7.769922095876088e-06, + "loss": 0.566371738910675, + "step": 3367 + }, + { + "epoch": 1.186784140969163, + "grad_norm": 1.8523910267151578, + "learning_rate": 7.764242806941396e-06, + "loss": 0.6424880623817444, + "step": 3368 + }, + { + "epoch": 1.187136563876652, + "grad_norm": 1.7770666290685069, + "learning_rate": 7.758564276976696e-06, + "loss": 0.6731792688369751, + "step": 3369 + }, + { + "epoch": 1.187488986784141, + "grad_norm": 1.8284341736993877, + "learning_rate": 7.752886507909661e-06, + "loss": 0.7350698113441467, + "step": 3370 + }, + { + "epoch": 1.1878414096916299, + "grad_norm": 1.6211597569244138, + "learning_rate": 7.747209501667729e-06, + "loss": 0.49212586879730225, + "step": 3371 + }, + { + "epoch": 1.188193832599119, + "grad_norm": 1.8399284999038652, + "learning_rate": 7.741533260178058e-06, + "loss": 0.46775591373443604, + "step": 3372 + }, + { + "epoch": 1.188546255506608, + "grad_norm": 1.9173381710912725, + "learning_rate": 7.73585778536756e-06, + "loss": 0.7006367444992065, + "step": 3373 + }, + { + "epoch": 1.188898678414097, + "grad_norm": 1.9011259462553447, + "learning_rate": 7.730183079162882e-06, + "loss": 0.6403789520263672, + "step": 3374 + }, + { + "epoch": 1.1892511013215858, + "grad_norm": 1.7192698764020407, + "learning_rate": 7.724509143490409e-06, + "loss": 0.5788881778717041, + "step": 3375 + }, + { + "epoch": 1.1896035242290748, + "grad_norm": 1.8160886708158774, + "learning_rate": 7.718835980276265e-06, + "loss": 0.5216118693351746, + "step": 3376 + }, + { + "epoch": 1.189955947136564, + "grad_norm": 1.8022868379388808, + "learning_rate": 7.713163591446318e-06, + "loss": 0.5951248407363892, + "step": 3377 + }, + { + "epoch": 1.1903083700440529, + "grad_norm": 1.7460515067285554, + "learning_rate": 7.707491978926157e-06, + "loss": 0.4975050687789917, + "step": 3378 + }, + { + "epoch": 1.1906607929515418, + "grad_norm": 1.770763460120106, + "learning_rate": 7.701821144641127e-06, + "loss": 0.6019243001937866, + "step": 3379 + }, + { + "epoch": 1.1910132158590307, + "grad_norm": 1.7832166509700509, + "learning_rate": 7.696151090516292e-06, + "loss": 0.6395450830459595, + "step": 3380 + }, + { + "epoch": 1.19136563876652, + "grad_norm": 1.6347986183513594, + "learning_rate": 7.690481818476468e-06, + "loss": 0.579787015914917, + "step": 3381 + }, + { + "epoch": 1.1917180616740088, + "grad_norm": 1.4744637046036069, + "learning_rate": 7.684813330446191e-06, + "loss": 0.5136005878448486, + "step": 3382 + }, + { + "epoch": 1.1920704845814978, + "grad_norm": 1.7266158280823927, + "learning_rate": 7.679145628349734e-06, + "loss": 0.6639782190322876, + "step": 3383 + }, + { + "epoch": 1.1924229074889867, + "grad_norm": 1.8900727159770023, + "learning_rate": 7.673478714111111e-06, + "loss": 0.5575984716415405, + "step": 3384 + }, + { + "epoch": 1.1927753303964757, + "grad_norm": 2.0885094289190658, + "learning_rate": 7.667812589654062e-06, + "loss": 0.6456045508384705, + "step": 3385 + }, + { + "epoch": 1.1931277533039648, + "grad_norm": 1.9286041654650978, + "learning_rate": 7.662147256902055e-06, + "loss": 0.6936196088790894, + "step": 3386 + }, + { + "epoch": 1.1934801762114537, + "grad_norm": 1.758654368664718, + "learning_rate": 7.656482717778299e-06, + "loss": 0.5490384697914124, + "step": 3387 + }, + { + "epoch": 1.1938325991189427, + "grad_norm": 1.9621511017976598, + "learning_rate": 7.650818974205727e-06, + "loss": 0.6973621845245361, + "step": 3388 + }, + { + "epoch": 1.1941850220264318, + "grad_norm": 1.835769632858156, + "learning_rate": 7.645156028107005e-06, + "loss": 0.7471047639846802, + "step": 3389 + }, + { + "epoch": 1.1945374449339208, + "grad_norm": 1.7902415027725214, + "learning_rate": 7.639493881404526e-06, + "loss": 0.6205108165740967, + "step": 3390 + }, + { + "epoch": 1.1948898678414097, + "grad_norm": 1.6920866725907067, + "learning_rate": 7.63383253602041e-06, + "loss": 0.747038722038269, + "step": 3391 + }, + { + "epoch": 1.1952422907488987, + "grad_norm": 1.5771320255200836, + "learning_rate": 7.628171993876514e-06, + "loss": 0.5185794830322266, + "step": 3392 + }, + { + "epoch": 1.1955947136563876, + "grad_norm": 1.6878325344643712, + "learning_rate": 7.6225122568944124e-06, + "loss": 0.6059385538101196, + "step": 3393 + }, + { + "epoch": 1.1959471365638767, + "grad_norm": 1.6275144870635614, + "learning_rate": 7.6168533269954045e-06, + "loss": 0.5154507160186768, + "step": 3394 + }, + { + "epoch": 1.1962995594713657, + "grad_norm": 1.8584269669132367, + "learning_rate": 7.611195206100529e-06, + "loss": 0.684306263923645, + "step": 3395 + }, + { + "epoch": 1.1966519823788546, + "grad_norm": 1.60676147024925, + "learning_rate": 7.605537896130537e-06, + "loss": 0.5637205839157104, + "step": 3396 + }, + { + "epoch": 1.1970044052863436, + "grad_norm": 2.099988274984523, + "learning_rate": 7.599881399005913e-06, + "loss": 0.700809121131897, + "step": 3397 + }, + { + "epoch": 1.1973568281938327, + "grad_norm": 1.8285381374549698, + "learning_rate": 7.594225716646859e-06, + "loss": 0.45139041543006897, + "step": 3398 + }, + { + "epoch": 1.1977092511013216, + "grad_norm": 1.9616153744225684, + "learning_rate": 7.588570850973301e-06, + "loss": 0.6623016595840454, + "step": 3399 + }, + { + "epoch": 1.1980616740088106, + "grad_norm": 1.5510325285611402, + "learning_rate": 7.582916803904899e-06, + "loss": 0.47430598735809326, + "step": 3400 + }, + { + "epoch": 1.1984140969162995, + "grad_norm": 1.7180906175268718, + "learning_rate": 7.57726357736101e-06, + "loss": 0.7190637588500977, + "step": 3401 + }, + { + "epoch": 1.1987665198237885, + "grad_norm": 1.4703339836450204, + "learning_rate": 7.571611173260747e-06, + "loss": 0.552079439163208, + "step": 3402 + }, + { + "epoch": 1.1991189427312776, + "grad_norm": 1.665813020849203, + "learning_rate": 7.565959593522914e-06, + "loss": 0.5499744415283203, + "step": 3403 + }, + { + "epoch": 1.1994713656387666, + "grad_norm": 1.6507149154277247, + "learning_rate": 7.560308840066046e-06, + "loss": 0.6013774871826172, + "step": 3404 + }, + { + "epoch": 1.1998237885462555, + "grad_norm": 1.5847999964914972, + "learning_rate": 7.554658914808404e-06, + "loss": 0.5489538908004761, + "step": 3405 + }, + { + "epoch": 1.2001762114537444, + "grad_norm": 1.72263968265959, + "learning_rate": 7.549009819667956e-06, + "loss": 0.6124382615089417, + "step": 3406 + }, + { + "epoch": 1.2005286343612336, + "grad_norm": 2.1073738195754594, + "learning_rate": 7.543361556562397e-06, + "loss": 0.6895862817764282, + "step": 3407 + }, + { + "epoch": 1.2008810572687225, + "grad_norm": 2.063900978481081, + "learning_rate": 7.537714127409139e-06, + "loss": 0.6632197499275208, + "step": 3408 + }, + { + "epoch": 1.2012334801762115, + "grad_norm": 1.6352648722318401, + "learning_rate": 7.5320675341253e-06, + "loss": 0.5940145254135132, + "step": 3409 + }, + { + "epoch": 1.2015859030837004, + "grad_norm": 1.884013328310988, + "learning_rate": 7.526421778627735e-06, + "loss": 0.646323561668396, + "step": 3410 + }, + { + "epoch": 1.2019383259911893, + "grad_norm": 1.7070941231545174, + "learning_rate": 7.520776862832993e-06, + "loss": 0.6173659563064575, + "step": 3411 + }, + { + "epoch": 1.2022907488986785, + "grad_norm": 1.8582208465763577, + "learning_rate": 7.515132788657347e-06, + "loss": 0.574191689491272, + "step": 3412 + }, + { + "epoch": 1.2026431718061674, + "grad_norm": 1.9220370982111243, + "learning_rate": 7.50948955801679e-06, + "loss": 0.6243089437484741, + "step": 3413 + }, + { + "epoch": 1.2029955947136564, + "grad_norm": 1.7949632694678572, + "learning_rate": 7.503847172827022e-06, + "loss": 0.692270040512085, + "step": 3414 + }, + { + "epoch": 1.2033480176211453, + "grad_norm": 1.6803082040464332, + "learning_rate": 7.498205635003451e-06, + "loss": 0.5929970145225525, + "step": 3415 + }, + { + "epoch": 1.2037004405286345, + "grad_norm": 1.6077232593078599, + "learning_rate": 7.4925649464612126e-06, + "loss": 0.5479272603988647, + "step": 3416 + }, + { + "epoch": 1.2040528634361234, + "grad_norm": 1.5415384890909907, + "learning_rate": 7.486925109115135e-06, + "loss": 0.5923635363578796, + "step": 3417 + }, + { + "epoch": 1.2044052863436123, + "grad_norm": 1.7506756122488851, + "learning_rate": 7.48128612487978e-06, + "loss": 0.6530192494392395, + "step": 3418 + }, + { + "epoch": 1.2047577092511013, + "grad_norm": 1.533550542452438, + "learning_rate": 7.475647995669397e-06, + "loss": 0.5104716420173645, + "step": 3419 + }, + { + "epoch": 1.2051101321585902, + "grad_norm": 1.8415327152950194, + "learning_rate": 7.470010723397958e-06, + "loss": 0.6526790261268616, + "step": 3420 + }, + { + "epoch": 1.2054625550660794, + "grad_norm": 1.746747219195987, + "learning_rate": 7.464374309979143e-06, + "loss": 0.5985254645347595, + "step": 3421 + }, + { + "epoch": 1.2058149779735683, + "grad_norm": 1.9679342498420438, + "learning_rate": 7.458738757326336e-06, + "loss": 0.6575271487236023, + "step": 3422 + }, + { + "epoch": 1.2061674008810572, + "grad_norm": 1.7353179250025277, + "learning_rate": 7.453104067352637e-06, + "loss": 0.5906708836555481, + "step": 3423 + }, + { + "epoch": 1.2065198237885462, + "grad_norm": 1.7518769855954601, + "learning_rate": 7.4474702419708465e-06, + "loss": 0.7992517352104187, + "step": 3424 + }, + { + "epoch": 1.2068722466960353, + "grad_norm": 1.7067520122752557, + "learning_rate": 7.4418372830934645e-06, + "loss": 0.5935543179512024, + "step": 3425 + }, + { + "epoch": 1.2072246696035243, + "grad_norm": 1.877304862966978, + "learning_rate": 7.436205192632719e-06, + "loss": 0.7166613340377808, + "step": 3426 + }, + { + "epoch": 1.2075770925110132, + "grad_norm": 1.7575954983917004, + "learning_rate": 7.430573972500519e-06, + "loss": 0.5254578590393066, + "step": 3427 + }, + { + "epoch": 1.2079295154185021, + "grad_norm": 1.7449214411247376, + "learning_rate": 7.42494362460849e-06, + "loss": 0.6586379408836365, + "step": 3428 + }, + { + "epoch": 1.208281938325991, + "grad_norm": 1.7864206478373184, + "learning_rate": 7.419314150867964e-06, + "loss": 0.6960606575012207, + "step": 3429 + }, + { + "epoch": 1.2086343612334802, + "grad_norm": 1.7557785377406303, + "learning_rate": 7.413685553189969e-06, + "loss": 0.6107728481292725, + "step": 3430 + }, + { + "epoch": 1.2089867841409692, + "grad_norm": 1.624755754090177, + "learning_rate": 7.408057833485241e-06, + "loss": 0.6446499824523926, + "step": 3431 + }, + { + "epoch": 1.209339207048458, + "grad_norm": 1.9153166988080477, + "learning_rate": 7.402430993664216e-06, + "loss": 0.7070472240447998, + "step": 3432 + }, + { + "epoch": 1.2096916299559473, + "grad_norm": 2.004011228140917, + "learning_rate": 7.396805035637023e-06, + "loss": 0.5919365882873535, + "step": 3433 + }, + { + "epoch": 1.2100440528634362, + "grad_norm": 1.7861550041093852, + "learning_rate": 7.391179961313512e-06, + "loss": 0.5975243449211121, + "step": 3434 + }, + { + "epoch": 1.2103964757709251, + "grad_norm": 1.6863010997131964, + "learning_rate": 7.385555772603212e-06, + "loss": 0.5772840976715088, + "step": 3435 + }, + { + "epoch": 1.210748898678414, + "grad_norm": 1.8451401620227157, + "learning_rate": 7.379932471415362e-06, + "loss": 0.7335072755813599, + "step": 3436 + }, + { + "epoch": 1.211101321585903, + "grad_norm": 2.0255796426124877, + "learning_rate": 7.3743100596589e-06, + "loss": 0.6214553713798523, + "step": 3437 + }, + { + "epoch": 1.2114537444933922, + "grad_norm": 1.8204785128516552, + "learning_rate": 7.368688539242457e-06, + "loss": 0.6515316963195801, + "step": 3438 + }, + { + "epoch": 1.211806167400881, + "grad_norm": 1.778475729690813, + "learning_rate": 7.3630679120743665e-06, + "loss": 0.6479551196098328, + "step": 3439 + }, + { + "epoch": 1.21215859030837, + "grad_norm": 1.8992442060407408, + "learning_rate": 7.357448180062657e-06, + "loss": 0.6195069551467896, + "step": 3440 + }, + { + "epoch": 1.212511013215859, + "grad_norm": 1.8044588174946172, + "learning_rate": 7.351829345115047e-06, + "loss": 0.5939193964004517, + "step": 3441 + }, + { + "epoch": 1.2128634361233481, + "grad_norm": 1.7404213735338998, + "learning_rate": 7.346211409138964e-06, + "loss": 0.6346434354782104, + "step": 3442 + }, + { + "epoch": 1.213215859030837, + "grad_norm": 1.7854241859310716, + "learning_rate": 7.340594374041516e-06, + "loss": 0.5924171209335327, + "step": 3443 + }, + { + "epoch": 1.213568281938326, + "grad_norm": 1.4550427635518266, + "learning_rate": 7.334978241729514e-06, + "loss": 0.48560285568237305, + "step": 3444 + }, + { + "epoch": 1.213920704845815, + "grad_norm": 2.0456790867838865, + "learning_rate": 7.329363014109463e-06, + "loss": 0.643998384475708, + "step": 3445 + }, + { + "epoch": 1.2142731277533039, + "grad_norm": 1.9340204732587762, + "learning_rate": 7.323748693087551e-06, + "loss": 0.6041159629821777, + "step": 3446 + }, + { + "epoch": 1.214625550660793, + "grad_norm": 1.991943883280592, + "learning_rate": 7.318135280569674e-06, + "loss": 0.7143498659133911, + "step": 3447 + }, + { + "epoch": 1.214977973568282, + "grad_norm": 1.910490525820005, + "learning_rate": 7.312522778461409e-06, + "loss": 0.5821564197540283, + "step": 3448 + }, + { + "epoch": 1.215330396475771, + "grad_norm": 1.9609409525419488, + "learning_rate": 7.3069111886680166e-06, + "loss": 0.5786745548248291, + "step": 3449 + }, + { + "epoch": 1.2156828193832598, + "grad_norm": 1.7004659993753848, + "learning_rate": 7.3013005130944666e-06, + "loss": 0.6740534901618958, + "step": 3450 + }, + { + "epoch": 1.216035242290749, + "grad_norm": 1.9264837774532027, + "learning_rate": 7.2956907536454045e-06, + "loss": 0.6353983879089355, + "step": 3451 + }, + { + "epoch": 1.216387665198238, + "grad_norm": 1.6467978200520468, + "learning_rate": 7.290081912225172e-06, + "loss": 0.6890027523040771, + "step": 3452 + }, + { + "epoch": 1.2167400881057269, + "grad_norm": 2.194089687314607, + "learning_rate": 7.284473990737795e-06, + "loss": 0.6485118269920349, + "step": 3453 + }, + { + "epoch": 1.2170925110132158, + "grad_norm": 1.8020323615419078, + "learning_rate": 7.2788669910869845e-06, + "loss": 0.5364162921905518, + "step": 3454 + }, + { + "epoch": 1.2174449339207047, + "grad_norm": 1.8770204171846867, + "learning_rate": 7.27326091517615e-06, + "loss": 0.6625754833221436, + "step": 3455 + }, + { + "epoch": 1.217797356828194, + "grad_norm": 1.9138778572255513, + "learning_rate": 7.267655764908374e-06, + "loss": 0.7090050578117371, + "step": 3456 + }, + { + "epoch": 1.2181497797356828, + "grad_norm": 1.7151154871040917, + "learning_rate": 7.26205154218643e-06, + "loss": 0.6556301116943359, + "step": 3457 + }, + { + "epoch": 1.2185022026431718, + "grad_norm": 2.12213118759585, + "learning_rate": 7.2564482489127815e-06, + "loss": 0.7998625636100769, + "step": 3458 + }, + { + "epoch": 1.2188546255506607, + "grad_norm": 1.8721449700246833, + "learning_rate": 7.250845886989568e-06, + "loss": 0.6336952447891235, + "step": 3459 + }, + { + "epoch": 1.2192070484581499, + "grad_norm": 1.7786932342182031, + "learning_rate": 7.245244458318621e-06, + "loss": 0.5072300434112549, + "step": 3460 + }, + { + "epoch": 1.2195594713656388, + "grad_norm": 1.9350920817100896, + "learning_rate": 7.23964396480145e-06, + "loss": 0.6297830939292908, + "step": 3461 + }, + { + "epoch": 1.2199118942731277, + "grad_norm": 1.7384183002767206, + "learning_rate": 7.234044408339243e-06, + "loss": 0.5560386180877686, + "step": 3462 + }, + { + "epoch": 1.2202643171806167, + "grad_norm": 1.7834281461054429, + "learning_rate": 7.228445790832885e-06, + "loss": 0.5180274844169617, + "step": 3463 + }, + { + "epoch": 1.2206167400881056, + "grad_norm": 1.5903839847735544, + "learning_rate": 7.222848114182926e-06, + "loss": 0.4870688319206238, + "step": 3464 + }, + { + "epoch": 1.2209691629955948, + "grad_norm": 1.5913924611315027, + "learning_rate": 7.217251380289602e-06, + "loss": 0.46914681792259216, + "step": 3465 + }, + { + "epoch": 1.2213215859030837, + "grad_norm": 1.6510218664086935, + "learning_rate": 7.211655591052833e-06, + "loss": 0.5980997085571289, + "step": 3466 + }, + { + "epoch": 1.2216740088105726, + "grad_norm": 2.0761228855668468, + "learning_rate": 7.206060748372212e-06, + "loss": 0.5982732772827148, + "step": 3467 + }, + { + "epoch": 1.2220264317180616, + "grad_norm": 1.5384750193393883, + "learning_rate": 7.200466854147019e-06, + "loss": 0.612629771232605, + "step": 3468 + }, + { + "epoch": 1.2223788546255507, + "grad_norm": 1.6776022561511, + "learning_rate": 7.194873910276205e-06, + "loss": 0.606558084487915, + "step": 3469 + }, + { + "epoch": 1.2227312775330397, + "grad_norm": 2.093853594654106, + "learning_rate": 7.189281918658396e-06, + "loss": 0.7133803367614746, + "step": 3470 + }, + { + "epoch": 1.2230837004405286, + "grad_norm": 1.737492396211302, + "learning_rate": 7.183690881191908e-06, + "loss": 0.5640908479690552, + "step": 3471 + }, + { + "epoch": 1.2234361233480175, + "grad_norm": 1.9131350962270206, + "learning_rate": 7.178100799774717e-06, + "loss": 0.6376210451126099, + "step": 3472 + }, + { + "epoch": 1.2237885462555067, + "grad_norm": 1.7418892302924867, + "learning_rate": 7.172511676304481e-06, + "loss": 0.6207184791564941, + "step": 3473 + }, + { + "epoch": 1.2241409691629956, + "grad_norm": 2.0136397077316133, + "learning_rate": 7.166923512678538e-06, + "loss": 0.47848421335220337, + "step": 3474 + }, + { + "epoch": 1.2244933920704846, + "grad_norm": 1.89946756738985, + "learning_rate": 7.161336310793894e-06, + "loss": 0.6052829027175903, + "step": 3475 + }, + { + "epoch": 1.2248458149779735, + "grad_norm": 1.968672987503914, + "learning_rate": 7.155750072547229e-06, + "loss": 0.6050940155982971, + "step": 3476 + }, + { + "epoch": 1.2251982378854627, + "grad_norm": 2.566995671782078, + "learning_rate": 7.150164799834902e-06, + "loss": 0.6121659278869629, + "step": 3477 + }, + { + "epoch": 1.2255506607929516, + "grad_norm": 1.9679344001124786, + "learning_rate": 7.144580494552929e-06, + "loss": 0.6886739730834961, + "step": 3478 + }, + { + "epoch": 1.2259030837004405, + "grad_norm": 1.5760234299307694, + "learning_rate": 7.13899715859702e-06, + "loss": 0.5001103281974792, + "step": 3479 + }, + { + "epoch": 1.2262555066079295, + "grad_norm": 2.1260048612910216, + "learning_rate": 7.133414793862532e-06, + "loss": 0.5948734283447266, + "step": 3480 + }, + { + "epoch": 1.2266079295154184, + "grad_norm": 2.593831579740968, + "learning_rate": 7.127833402244515e-06, + "loss": 0.6179298162460327, + "step": 3481 + }, + { + "epoch": 1.2269603524229076, + "grad_norm": 1.6926296837265904, + "learning_rate": 7.122252985637672e-06, + "loss": 0.5543676614761353, + "step": 3482 + }, + { + "epoch": 1.2273127753303965, + "grad_norm": 1.6008632106545562, + "learning_rate": 7.116673545936379e-06, + "loss": 0.6279658079147339, + "step": 3483 + }, + { + "epoch": 1.2276651982378854, + "grad_norm": 1.5383086530060461, + "learning_rate": 7.111095085034687e-06, + "loss": 0.6692230701446533, + "step": 3484 + }, + { + "epoch": 1.2280176211453744, + "grad_norm": 1.7218507243355061, + "learning_rate": 7.1055176048263085e-06, + "loss": 0.6124502420425415, + "step": 3485 + }, + { + "epoch": 1.2283700440528635, + "grad_norm": 2.0325469007846007, + "learning_rate": 7.09994110720462e-06, + "loss": 0.6241810321807861, + "step": 3486 + }, + { + "epoch": 1.2287224669603525, + "grad_norm": 1.7620353767255947, + "learning_rate": 7.094365594062675e-06, + "loss": 0.6556589603424072, + "step": 3487 + }, + { + "epoch": 1.2290748898678414, + "grad_norm": 1.660185756567605, + "learning_rate": 7.0887910672931815e-06, + "loss": 0.480433851480484, + "step": 3488 + }, + { + "epoch": 1.2294273127753303, + "grad_norm": 1.7666817554476708, + "learning_rate": 7.083217528788524e-06, + "loss": 0.6198803782463074, + "step": 3489 + }, + { + "epoch": 1.2297797356828193, + "grad_norm": 1.7945939958355666, + "learning_rate": 7.077644980440741e-06, + "loss": 0.6368751525878906, + "step": 3490 + }, + { + "epoch": 1.2301321585903084, + "grad_norm": 1.904999974616483, + "learning_rate": 7.072073424141538e-06, + "loss": 0.5992522239685059, + "step": 3491 + }, + { + "epoch": 1.2304845814977974, + "grad_norm": 1.6441410368294835, + "learning_rate": 7.066502861782289e-06, + "loss": 0.5917885303497314, + "step": 3492 + }, + { + "epoch": 1.2308370044052863, + "grad_norm": 1.9090985571817867, + "learning_rate": 7.060933295254027e-06, + "loss": 0.615925669670105, + "step": 3493 + }, + { + "epoch": 1.2311894273127753, + "grad_norm": 1.5510149338562214, + "learning_rate": 7.055364726447437e-06, + "loss": 0.4408820867538452, + "step": 3494 + }, + { + "epoch": 1.2315418502202644, + "grad_norm": 1.706805010144051, + "learning_rate": 7.049797157252889e-06, + "loss": 0.4918386936187744, + "step": 3495 + }, + { + "epoch": 1.2318942731277533, + "grad_norm": 2.0047166519470965, + "learning_rate": 7.0442305895603844e-06, + "loss": 0.6964970827102661, + "step": 3496 + }, + { + "epoch": 1.2322466960352423, + "grad_norm": 1.993882373770559, + "learning_rate": 7.038665025259615e-06, + "loss": 0.5269606113433838, + "step": 3497 + }, + { + "epoch": 1.2325991189427312, + "grad_norm": 1.7338430673292662, + "learning_rate": 7.033100466239908e-06, + "loss": 0.6146842241287231, + "step": 3498 + }, + { + "epoch": 1.2329515418502202, + "grad_norm": 1.8958783101408965, + "learning_rate": 7.027536914390257e-06, + "loss": 0.7163739800453186, + "step": 3499 + }, + { + "epoch": 1.2333039647577093, + "grad_norm": 1.5575657818438158, + "learning_rate": 7.021974371599318e-06, + "loss": 0.5851477980613708, + "step": 3500 + }, + { + "epoch": 1.2336563876651983, + "grad_norm": 1.3831914970718109, + "learning_rate": 7.0164128397554e-06, + "loss": 0.585768461227417, + "step": 3501 + }, + { + "epoch": 1.2340088105726872, + "grad_norm": 1.651121323438745, + "learning_rate": 7.0108523207464706e-06, + "loss": 0.5467718839645386, + "step": 3502 + }, + { + "epoch": 1.2343612334801761, + "grad_norm": 1.8179588757324485, + "learning_rate": 7.0052928164601564e-06, + "loss": 0.638299822807312, + "step": 3503 + }, + { + "epoch": 1.2347136563876653, + "grad_norm": 1.8158584952636452, + "learning_rate": 6.9997343287837275e-06, + "loss": 0.6737650036811829, + "step": 3504 + }, + { + "epoch": 1.2350660792951542, + "grad_norm": 1.7619528960945736, + "learning_rate": 6.9941768596041224e-06, + "loss": 0.6659837961196899, + "step": 3505 + }, + { + "epoch": 1.2354185022026432, + "grad_norm": 1.9059656133131788, + "learning_rate": 6.988620410807932e-06, + "loss": 0.6731020212173462, + "step": 3506 + }, + { + "epoch": 1.235770925110132, + "grad_norm": 1.8111638058637756, + "learning_rate": 6.983064984281389e-06, + "loss": 0.6236598491668701, + "step": 3507 + }, + { + "epoch": 1.236123348017621, + "grad_norm": 1.8485171900570894, + "learning_rate": 6.9775105819103985e-06, + "loss": 0.6233193874359131, + "step": 3508 + }, + { + "epoch": 1.2364757709251102, + "grad_norm": 1.7456936175280036, + "learning_rate": 6.971957205580497e-06, + "loss": 0.5914918184280396, + "step": 3509 + }, + { + "epoch": 1.2368281938325991, + "grad_norm": 2.069060854376664, + "learning_rate": 6.966404857176893e-06, + "loss": 0.6576484441757202, + "step": 3510 + }, + { + "epoch": 1.237180616740088, + "grad_norm": 1.6371442891988068, + "learning_rate": 6.960853538584431e-06, + "loss": 0.5609208941459656, + "step": 3511 + }, + { + "epoch": 1.2375330396475772, + "grad_norm": 1.8336206343046235, + "learning_rate": 6.955303251687609e-06, + "loss": 0.6405455470085144, + "step": 3512 + }, + { + "epoch": 1.2378854625550662, + "grad_norm": 1.6981959386126726, + "learning_rate": 6.949753998370579e-06, + "loss": 0.5621844530105591, + "step": 3513 + }, + { + "epoch": 1.238237885462555, + "grad_norm": 1.6040361718583698, + "learning_rate": 6.944205780517138e-06, + "loss": 0.5674207210540771, + "step": 3514 + }, + { + "epoch": 1.238590308370044, + "grad_norm": 1.8089615708578142, + "learning_rate": 6.938658600010734e-06, + "loss": 0.6744752526283264, + "step": 3515 + }, + { + "epoch": 1.238942731277533, + "grad_norm": 1.851260674535246, + "learning_rate": 6.9331124587344655e-06, + "loss": 0.537495493888855, + "step": 3516 + }, + { + "epoch": 1.2392951541850221, + "grad_norm": 1.7599394880527937, + "learning_rate": 6.92756735857107e-06, + "loss": 0.8405104875564575, + "step": 3517 + }, + { + "epoch": 1.239647577092511, + "grad_norm": 1.7838209985249966, + "learning_rate": 6.92202330140294e-06, + "loss": 0.6751723885536194, + "step": 3518 + }, + { + "epoch": 1.24, + "grad_norm": 1.8012761946666955, + "learning_rate": 6.9164802891121105e-06, + "loss": 0.5763178467750549, + "step": 3519 + }, + { + "epoch": 1.240352422907489, + "grad_norm": 1.7859481797599979, + "learning_rate": 6.910938323580256e-06, + "loss": 0.7713793516159058, + "step": 3520 + }, + { + "epoch": 1.240704845814978, + "grad_norm": 2.0598557028652356, + "learning_rate": 6.90539740668871e-06, + "loss": 0.6354435682296753, + "step": 3521 + }, + { + "epoch": 1.241057268722467, + "grad_norm": 1.6780280463346202, + "learning_rate": 6.899857540318434e-06, + "loss": 0.5121721625328064, + "step": 3522 + }, + { + "epoch": 1.241409691629956, + "grad_norm": 1.8470903920827393, + "learning_rate": 6.894318726350042e-06, + "loss": 0.586428165435791, + "step": 3523 + }, + { + "epoch": 1.241762114537445, + "grad_norm": 1.690234288859414, + "learning_rate": 6.888780966663792e-06, + "loss": 0.4868311285972595, + "step": 3524 + }, + { + "epoch": 1.2421145374449338, + "grad_norm": 1.7688170320163026, + "learning_rate": 6.883244263139578e-06, + "loss": 0.7057775259017944, + "step": 3525 + }, + { + "epoch": 1.242466960352423, + "grad_norm": 1.630207980484645, + "learning_rate": 6.877708617656942e-06, + "loss": 0.4993360638618469, + "step": 3526 + }, + { + "epoch": 1.242819383259912, + "grad_norm": 1.7093781024880734, + "learning_rate": 6.872174032095061e-06, + "loss": 0.6096793413162231, + "step": 3527 + }, + { + "epoch": 1.2431718061674009, + "grad_norm": 1.7005141830755592, + "learning_rate": 6.866640508332751e-06, + "loss": 0.584385871887207, + "step": 3528 + }, + { + "epoch": 1.2435242290748898, + "grad_norm": 1.6033098221924098, + "learning_rate": 6.861108048248477e-06, + "loss": 0.5857449173927307, + "step": 3529 + }, + { + "epoch": 1.243876651982379, + "grad_norm": 1.6447411339873705, + "learning_rate": 6.855576653720333e-06, + "loss": 0.4337875247001648, + "step": 3530 + }, + { + "epoch": 1.244229074889868, + "grad_norm": 1.924557656954366, + "learning_rate": 6.850046326626058e-06, + "loss": 0.6949163675308228, + "step": 3531 + }, + { + "epoch": 1.2445814977973568, + "grad_norm": 2.029468434582643, + "learning_rate": 6.844517068843025e-06, + "loss": 0.5876098871231079, + "step": 3532 + }, + { + "epoch": 1.2449339207048458, + "grad_norm": 2.0143379278356153, + "learning_rate": 6.838988882248243e-06, + "loss": 0.5460488796234131, + "step": 3533 + }, + { + "epoch": 1.2452863436123347, + "grad_norm": 2.284896657447092, + "learning_rate": 6.833461768718365e-06, + "loss": 0.6500875949859619, + "step": 3534 + }, + { + "epoch": 1.2456387665198239, + "grad_norm": 1.9702281980181484, + "learning_rate": 6.82793573012967e-06, + "loss": 0.6504626274108887, + "step": 3535 + }, + { + "epoch": 1.2459911894273128, + "grad_norm": 1.8635901517060365, + "learning_rate": 6.822410768358072e-06, + "loss": 0.6881722211837769, + "step": 3536 + }, + { + "epoch": 1.2463436123348017, + "grad_norm": 1.7111090644899583, + "learning_rate": 6.816886885279132e-06, + "loss": 0.6747599840164185, + "step": 3537 + }, + { + "epoch": 1.2466960352422907, + "grad_norm": 2.61809094535544, + "learning_rate": 6.811364082768028e-06, + "loss": 0.5987570285797119, + "step": 3538 + }, + { + "epoch": 1.2470484581497798, + "grad_norm": 1.8641726073707956, + "learning_rate": 6.8058423626995885e-06, + "loss": 0.6614603996276855, + "step": 3539 + }, + { + "epoch": 1.2474008810572688, + "grad_norm": 1.5529990518062367, + "learning_rate": 6.80032172694826e-06, + "loss": 0.542367696762085, + "step": 3540 + }, + { + "epoch": 1.2477533039647577, + "grad_norm": 1.7771584963866378, + "learning_rate": 6.7948021773881235e-06, + "loss": 0.6200593709945679, + "step": 3541 + }, + { + "epoch": 1.2481057268722466, + "grad_norm": 1.896811225090905, + "learning_rate": 6.789283715892905e-06, + "loss": 0.6425306797027588, + "step": 3542 + }, + { + "epoch": 1.2484581497797356, + "grad_norm": 1.4798584901842344, + "learning_rate": 6.78376634433594e-06, + "loss": 0.5277592539787292, + "step": 3543 + }, + { + "epoch": 1.2488105726872247, + "grad_norm": 1.8357663435279958, + "learning_rate": 6.778250064590206e-06, + "loss": 0.6120523810386658, + "step": 3544 + }, + { + "epoch": 1.2491629955947137, + "grad_norm": 2.0042129559914654, + "learning_rate": 6.772734878528313e-06, + "loss": 0.538428544998169, + "step": 3545 + }, + { + "epoch": 1.2495154185022026, + "grad_norm": 1.7456851140249008, + "learning_rate": 6.76722078802249e-06, + "loss": 0.6439732909202576, + "step": 3546 + }, + { + "epoch": 1.2498678414096915, + "grad_norm": 1.5580174742798336, + "learning_rate": 6.761707794944605e-06, + "loss": 0.5951697826385498, + "step": 3547 + }, + { + "epoch": 1.2502202643171807, + "grad_norm": 1.5461650468928614, + "learning_rate": 6.7561959011661456e-06, + "loss": 0.5548606514930725, + "step": 3548 + }, + { + "epoch": 1.2505726872246696, + "grad_norm": 1.936721806656616, + "learning_rate": 6.750685108558221e-06, + "loss": 0.4768974781036377, + "step": 3549 + }, + { + "epoch": 1.2509251101321586, + "grad_norm": 1.6130866640641843, + "learning_rate": 6.745175418991585e-06, + "loss": 0.6629552245140076, + "step": 3550 + }, + { + "epoch": 1.2512775330396475, + "grad_norm": 1.8826604922139925, + "learning_rate": 6.739666834336599e-06, + "loss": 0.6550329923629761, + "step": 3551 + }, + { + "epoch": 1.2516299559471364, + "grad_norm": 1.7091222991512534, + "learning_rate": 6.734159356463254e-06, + "loss": 0.37340015172958374, + "step": 3552 + }, + { + "epoch": 1.2519823788546256, + "grad_norm": 2.0454082069330424, + "learning_rate": 6.728652987241175e-06, + "loss": 0.6343201398849487, + "step": 3553 + }, + { + "epoch": 1.2523348017621145, + "grad_norm": 1.8938201811077042, + "learning_rate": 6.723147728539596e-06, + "loss": 0.7555221319198608, + "step": 3554 + }, + { + "epoch": 1.2526872246696035, + "grad_norm": 1.7356069524639768, + "learning_rate": 6.717643582227384e-06, + "loss": 0.5944523215293884, + "step": 3555 + }, + { + "epoch": 1.2530396475770926, + "grad_norm": 1.627279375354834, + "learning_rate": 6.71214055017303e-06, + "loss": 0.5686212778091431, + "step": 3556 + }, + { + "epoch": 1.2533920704845816, + "grad_norm": 1.697482530075543, + "learning_rate": 6.706638634244629e-06, + "loss": 0.6401857137680054, + "step": 3557 + }, + { + "epoch": 1.2537444933920705, + "grad_norm": 1.5933991655989903, + "learning_rate": 6.701137836309926e-06, + "loss": 0.4571516513824463, + "step": 3558 + }, + { + "epoch": 1.2540969162995594, + "grad_norm": 1.7606001647916119, + "learning_rate": 6.695638158236255e-06, + "loss": 0.5857570171356201, + "step": 3559 + }, + { + "epoch": 1.2544493392070484, + "grad_norm": 1.7187772621235449, + "learning_rate": 6.690139601890601e-06, + "loss": 0.6981472969055176, + "step": 3560 + }, + { + "epoch": 1.2548017621145373, + "grad_norm": 1.685629147285753, + "learning_rate": 6.684642169139544e-06, + "loss": 0.5120254755020142, + "step": 3561 + }, + { + "epoch": 1.2551541850220265, + "grad_norm": 2.043587366608814, + "learning_rate": 6.67914586184929e-06, + "loss": 0.6975923776626587, + "step": 3562 + }, + { + "epoch": 1.2555066079295154, + "grad_norm": 2.1694224742588233, + "learning_rate": 6.673650681885668e-06, + "loss": 0.5825072526931763, + "step": 3563 + }, + { + "epoch": 1.2558590308370043, + "grad_norm": 1.9388578444875513, + "learning_rate": 6.668156631114124e-06, + "loss": 0.5701749324798584, + "step": 3564 + }, + { + "epoch": 1.2562114537444935, + "grad_norm": 1.6715281124187895, + "learning_rate": 6.662663711399705e-06, + "loss": 0.5230482220649719, + "step": 3565 + }, + { + "epoch": 1.2565638766519824, + "grad_norm": 1.7540798103539514, + "learning_rate": 6.657171924607102e-06, + "loss": 0.6680361032485962, + "step": 3566 + }, + { + "epoch": 1.2569162995594714, + "grad_norm": 1.7792330481880054, + "learning_rate": 6.651681272600592e-06, + "loss": 0.6745159029960632, + "step": 3567 + }, + { + "epoch": 1.2572687224669603, + "grad_norm": 1.5777367956881352, + "learning_rate": 6.646191757244089e-06, + "loss": 0.587162971496582, + "step": 3568 + }, + { + "epoch": 1.2576211453744492, + "grad_norm": 2.0091715660610183, + "learning_rate": 6.640703380401111e-06, + "loss": 0.6170785427093506, + "step": 3569 + }, + { + "epoch": 1.2579735682819384, + "grad_norm": 1.8496931248102404, + "learning_rate": 6.6352161439347875e-06, + "loss": 0.4955494999885559, + "step": 3570 + }, + { + "epoch": 1.2583259911894273, + "grad_norm": 1.8039519732213443, + "learning_rate": 6.62973004970787e-06, + "loss": 0.7183424234390259, + "step": 3571 + }, + { + "epoch": 1.2586784140969163, + "grad_norm": 1.6920151696252388, + "learning_rate": 6.624245099582713e-06, + "loss": 0.6266030669212341, + "step": 3572 + }, + { + "epoch": 1.2590308370044052, + "grad_norm": 1.8260182971737482, + "learning_rate": 6.6187612954212845e-06, + "loss": 0.5234469175338745, + "step": 3573 + }, + { + "epoch": 1.2593832599118944, + "grad_norm": 2.0762206956902234, + "learning_rate": 6.6132786390851725e-06, + "loss": 0.7066231966018677, + "step": 3574 + }, + { + "epoch": 1.2597356828193833, + "grad_norm": 1.8486791061565373, + "learning_rate": 6.60779713243556e-06, + "loss": 0.622086226940155, + "step": 3575 + }, + { + "epoch": 1.2600881057268722, + "grad_norm": 2.003110770323092, + "learning_rate": 6.6023167773332554e-06, + "loss": 0.6607370376586914, + "step": 3576 + }, + { + "epoch": 1.2604405286343612, + "grad_norm": 1.9512971078148649, + "learning_rate": 6.596837575638663e-06, + "loss": 0.6846165657043457, + "step": 3577 + }, + { + "epoch": 1.2607929515418501, + "grad_norm": 2.1137757907106574, + "learning_rate": 6.5913595292118024e-06, + "loss": 0.6329103708267212, + "step": 3578 + }, + { + "epoch": 1.2611453744493393, + "grad_norm": 1.7067433363159659, + "learning_rate": 6.585882639912302e-06, + "loss": 0.7942261695861816, + "step": 3579 + }, + { + "epoch": 1.2614977973568282, + "grad_norm": 1.923592126322299, + "learning_rate": 6.580406909599393e-06, + "loss": 0.5446548461914062, + "step": 3580 + }, + { + "epoch": 1.2618502202643171, + "grad_norm": 2.584270827853736, + "learning_rate": 6.574932340131917e-06, + "loss": 0.581193208694458, + "step": 3581 + }, + { + "epoch": 1.2622026431718063, + "grad_norm": 1.789761494779322, + "learning_rate": 6.569458933368323e-06, + "loss": 0.6099729537963867, + "step": 3582 + }, + { + "epoch": 1.2625550660792952, + "grad_norm": 1.7689292642576144, + "learning_rate": 6.563986691166655e-06, + "loss": 0.45215970277786255, + "step": 3583 + }, + { + "epoch": 1.2629074889867842, + "grad_norm": 1.9037008934232844, + "learning_rate": 6.558515615384573e-06, + "loss": 0.6674731969833374, + "step": 3584 + }, + { + "epoch": 1.2632599118942731, + "grad_norm": 1.4782940862298068, + "learning_rate": 6.553045707879338e-06, + "loss": 0.4951098561286926, + "step": 3585 + }, + { + "epoch": 1.263612334801762, + "grad_norm": 1.7852149202748289, + "learning_rate": 6.54757697050781e-06, + "loss": 0.5853816270828247, + "step": 3586 + }, + { + "epoch": 1.263964757709251, + "grad_norm": 1.5907197274079232, + "learning_rate": 6.5421094051264575e-06, + "loss": 0.5236951112747192, + "step": 3587 + }, + { + "epoch": 1.2643171806167401, + "grad_norm": 1.733068587169355, + "learning_rate": 6.536643013591347e-06, + "loss": 0.5717612504959106, + "step": 3588 + }, + { + "epoch": 1.264669603524229, + "grad_norm": 2.033496211612474, + "learning_rate": 6.531177797758155e-06, + "loss": 0.6144098043441772, + "step": 3589 + }, + { + "epoch": 1.265022026431718, + "grad_norm": 1.6355266077439052, + "learning_rate": 6.525713759482144e-06, + "loss": 0.5634705424308777, + "step": 3590 + }, + { + "epoch": 1.2653744493392072, + "grad_norm": 1.7147225194337798, + "learning_rate": 6.520250900618186e-06, + "loss": 0.582956075668335, + "step": 3591 + }, + { + "epoch": 1.265726872246696, + "grad_norm": 1.843768096592032, + "learning_rate": 6.514789223020754e-06, + "loss": 0.7649297714233398, + "step": 3592 + }, + { + "epoch": 1.266079295154185, + "grad_norm": 1.6261733555902604, + "learning_rate": 6.509328728543918e-06, + "loss": 0.6035098433494568, + "step": 3593 + }, + { + "epoch": 1.266431718061674, + "grad_norm": 1.8493319579504743, + "learning_rate": 6.503869419041344e-06, + "loss": 0.6405705809593201, + "step": 3594 + }, + { + "epoch": 1.266784140969163, + "grad_norm": 2.26304309310324, + "learning_rate": 6.498411296366299e-06, + "loss": 0.674353301525116, + "step": 3595 + }, + { + "epoch": 1.2671365638766519, + "grad_norm": 1.7621656180677492, + "learning_rate": 6.492954362371644e-06, + "loss": 0.6018465757369995, + "step": 3596 + }, + { + "epoch": 1.267488986784141, + "grad_norm": 2.127137234030612, + "learning_rate": 6.487498618909845e-06, + "loss": 0.6491270065307617, + "step": 3597 + }, + { + "epoch": 1.26784140969163, + "grad_norm": 1.6636292273445474, + "learning_rate": 6.4820440678329474e-06, + "loss": 0.5126988887786865, + "step": 3598 + }, + { + "epoch": 1.2681938325991189, + "grad_norm": 1.7884980833676332, + "learning_rate": 6.476590710992605e-06, + "loss": 0.5931694507598877, + "step": 3599 + }, + { + "epoch": 1.268546255506608, + "grad_norm": 1.9386898901162777, + "learning_rate": 6.471138550240066e-06, + "loss": 0.5455423593521118, + "step": 3600 + }, + { + "epoch": 1.268898678414097, + "grad_norm": 1.6361281925349132, + "learning_rate": 6.465687587426166e-06, + "loss": 0.4870053231716156, + "step": 3601 + }, + { + "epoch": 1.269251101321586, + "grad_norm": 1.9069149245463006, + "learning_rate": 6.460237824401337e-06, + "loss": 0.6434903144836426, + "step": 3602 + }, + { + "epoch": 1.2696035242290749, + "grad_norm": 1.676899060774639, + "learning_rate": 6.454789263015609e-06, + "loss": 0.6256476640701294, + "step": 3603 + }, + { + "epoch": 1.2699559471365638, + "grad_norm": 1.8004511475353204, + "learning_rate": 6.449341905118589e-06, + "loss": 0.6304135322570801, + "step": 3604 + }, + { + "epoch": 1.2703083700440527, + "grad_norm": 1.9009929525157667, + "learning_rate": 6.443895752559498e-06, + "loss": 0.5315194725990295, + "step": 3605 + }, + { + "epoch": 1.2706607929515419, + "grad_norm": 1.4321615697348329, + "learning_rate": 6.438450807187127e-06, + "loss": 0.5232852697372437, + "step": 3606 + }, + { + "epoch": 1.2710132158590308, + "grad_norm": 1.6584356511216338, + "learning_rate": 6.433007070849863e-06, + "loss": 0.4462543725967407, + "step": 3607 + }, + { + "epoch": 1.2713656387665198, + "grad_norm": 1.6730765460300174, + "learning_rate": 6.4275645453956945e-06, + "loss": 0.6347709894180298, + "step": 3608 + }, + { + "epoch": 1.271718061674009, + "grad_norm": 1.625329738549371, + "learning_rate": 6.422123232672182e-06, + "loss": 0.5277259349822998, + "step": 3609 + }, + { + "epoch": 1.2720704845814979, + "grad_norm": 1.7954090025098361, + "learning_rate": 6.416683134526486e-06, + "loss": 0.6297650933265686, + "step": 3610 + }, + { + "epoch": 1.2724229074889868, + "grad_norm": 1.7743916636003476, + "learning_rate": 6.411244252805351e-06, + "loss": 0.503609836101532, + "step": 3611 + }, + { + "epoch": 1.2727753303964757, + "grad_norm": 1.7300375262211753, + "learning_rate": 6.405806589355099e-06, + "loss": 0.6026735305786133, + "step": 3612 + }, + { + "epoch": 1.2731277533039647, + "grad_norm": 1.543883502597784, + "learning_rate": 6.400370146021662e-06, + "loss": 0.4918368458747864, + "step": 3613 + }, + { + "epoch": 1.2734801762114538, + "grad_norm": 2.125830682883153, + "learning_rate": 6.394934924650532e-06, + "loss": 0.6215550899505615, + "step": 3614 + }, + { + "epoch": 1.2738325991189428, + "grad_norm": 2.1843858701221563, + "learning_rate": 6.389500927086801e-06, + "loss": 0.6979820728302002, + "step": 3615 + }, + { + "epoch": 1.2741850220264317, + "grad_norm": 1.9168565956279218, + "learning_rate": 6.384068155175143e-06, + "loss": 0.5661836266517639, + "step": 3616 + }, + { + "epoch": 1.2745374449339206, + "grad_norm": 2.2497484972303896, + "learning_rate": 6.378636610759812e-06, + "loss": 0.699792742729187, + "step": 3617 + }, + { + "epoch": 1.2748898678414098, + "grad_norm": 2.1298001613626765, + "learning_rate": 6.373206295684653e-06, + "loss": 0.6418631076812744, + "step": 3618 + }, + { + "epoch": 1.2752422907488987, + "grad_norm": 1.639324838954067, + "learning_rate": 6.3677772117930895e-06, + "loss": 0.4975489675998688, + "step": 3619 + }, + { + "epoch": 1.2755947136563877, + "grad_norm": 1.6787243090627195, + "learning_rate": 6.362349360928117e-06, + "loss": 0.5621567964553833, + "step": 3620 + }, + { + "epoch": 1.2759471365638766, + "grad_norm": 1.9441609125211634, + "learning_rate": 6.356922744932335e-06, + "loss": 0.538573682308197, + "step": 3621 + }, + { + "epoch": 1.2762995594713655, + "grad_norm": 1.8099521315485383, + "learning_rate": 6.351497365647903e-06, + "loss": 0.5726763010025024, + "step": 3622 + }, + { + "epoch": 1.2766519823788547, + "grad_norm": 1.509968688666824, + "learning_rate": 6.346073224916565e-06, + "loss": 0.5911343097686768, + "step": 3623 + }, + { + "epoch": 1.2770044052863436, + "grad_norm": 1.8960352229890238, + "learning_rate": 6.340650324579658e-06, + "loss": 0.6181383728981018, + "step": 3624 + }, + { + "epoch": 1.2773568281938326, + "grad_norm": 1.8065087463718459, + "learning_rate": 6.3352286664780785e-06, + "loss": 0.5941140651702881, + "step": 3625 + }, + { + "epoch": 1.2777092511013217, + "grad_norm": 1.980034412220703, + "learning_rate": 6.329808252452316e-06, + "loss": 0.7604472637176514, + "step": 3626 + }, + { + "epoch": 1.2780616740088107, + "grad_norm": 1.7265138262893938, + "learning_rate": 6.324389084342435e-06, + "loss": 0.6063867211341858, + "step": 3627 + }, + { + "epoch": 1.2784140969162996, + "grad_norm": 1.8844241099487, + "learning_rate": 6.3189711639880644e-06, + "loss": 0.7202302813529968, + "step": 3628 + }, + { + "epoch": 1.2787665198237885, + "grad_norm": 1.7295127580755116, + "learning_rate": 6.313554493228431e-06, + "loss": 0.5934856534004211, + "step": 3629 + }, + { + "epoch": 1.2791189427312775, + "grad_norm": 1.7905829637835577, + "learning_rate": 6.3081390739023175e-06, + "loss": 0.6403088569641113, + "step": 3630 + }, + { + "epoch": 1.2794713656387664, + "grad_norm": 1.9400757232043577, + "learning_rate": 6.302724907848096e-06, + "loss": 0.6679831743240356, + "step": 3631 + }, + { + "epoch": 1.2798237885462556, + "grad_norm": 1.9107919043768602, + "learning_rate": 6.297311996903703e-06, + "loss": 0.6914902329444885, + "step": 3632 + }, + { + "epoch": 1.2801762114537445, + "grad_norm": 1.4865016000129294, + "learning_rate": 6.2919003429066535e-06, + "loss": 0.5391600131988525, + "step": 3633 + }, + { + "epoch": 1.2805286343612334, + "grad_norm": 1.7774288854868727, + "learning_rate": 6.286489947694041e-06, + "loss": 0.5740962028503418, + "step": 3634 + }, + { + "epoch": 1.2808810572687226, + "grad_norm": 1.9144175178404335, + "learning_rate": 6.281080813102523e-06, + "loss": 0.6497045159339905, + "step": 3635 + }, + { + "epoch": 1.2812334801762115, + "grad_norm": 1.6649274023798961, + "learning_rate": 6.275672940968326e-06, + "loss": 0.5481048226356506, + "step": 3636 + }, + { + "epoch": 1.2815859030837005, + "grad_norm": 1.6547388155087517, + "learning_rate": 6.270266333127266e-06, + "loss": 0.5412508249282837, + "step": 3637 + }, + { + "epoch": 1.2819383259911894, + "grad_norm": 1.8289845737684471, + "learning_rate": 6.264860991414709e-06, + "loss": 0.5055446624755859, + "step": 3638 + }, + { + "epoch": 1.2822907488986783, + "grad_norm": 1.9772143213144648, + "learning_rate": 6.259456917665605e-06, + "loss": 0.6073929071426392, + "step": 3639 + }, + { + "epoch": 1.2826431718061673, + "grad_norm": 1.6297327309789957, + "learning_rate": 6.254054113714467e-06, + "loss": 0.5277928113937378, + "step": 3640 + }, + { + "epoch": 1.2829955947136564, + "grad_norm": 1.7440990717646376, + "learning_rate": 6.248652581395378e-06, + "loss": 0.5106299519538879, + "step": 3641 + }, + { + "epoch": 1.2833480176211454, + "grad_norm": 1.612143250274434, + "learning_rate": 6.243252322541993e-06, + "loss": 0.485049843788147, + "step": 3642 + }, + { + "epoch": 1.2837004405286343, + "grad_norm": 2.0115453178937894, + "learning_rate": 6.237853338987532e-06, + "loss": 0.5899066925048828, + "step": 3643 + }, + { + "epoch": 1.2840528634361235, + "grad_norm": 1.6956228425038977, + "learning_rate": 6.2324556325647745e-06, + "loss": 0.5761981010437012, + "step": 3644 + }, + { + "epoch": 1.2844052863436124, + "grad_norm": 1.732932337254408, + "learning_rate": 6.227059205106085e-06, + "loss": 0.6288208961486816, + "step": 3645 + }, + { + "epoch": 1.2847577092511013, + "grad_norm": 1.7671756166643349, + "learning_rate": 6.2216640584433726e-06, + "loss": 0.6122645139694214, + "step": 3646 + }, + { + "epoch": 1.2851101321585903, + "grad_norm": 1.8312838317562172, + "learning_rate": 6.2162701944081295e-06, + "loss": 0.5838489532470703, + "step": 3647 + }, + { + "epoch": 1.2854625550660792, + "grad_norm": 1.5533740438356287, + "learning_rate": 6.2108776148314005e-06, + "loss": 0.6020689606666565, + "step": 3648 + }, + { + "epoch": 1.2858149779735684, + "grad_norm": 1.9453055966993607, + "learning_rate": 6.205486321543798e-06, + "loss": 0.5852698683738708, + "step": 3649 + }, + { + "epoch": 1.2861674008810573, + "grad_norm": 1.7649785944212673, + "learning_rate": 6.2000963163755015e-06, + "loss": 0.560903012752533, + "step": 3650 + }, + { + "epoch": 1.2865198237885462, + "grad_norm": 2.053972717306982, + "learning_rate": 6.194707601156249e-06, + "loss": 0.7750356197357178, + "step": 3651 + }, + { + "epoch": 1.2868722466960352, + "grad_norm": 1.7842589241914402, + "learning_rate": 6.189320177715338e-06, + "loss": 0.5503605604171753, + "step": 3652 + }, + { + "epoch": 1.2872246696035243, + "grad_norm": 1.8162609150425584, + "learning_rate": 6.183934047881636e-06, + "loss": 0.6910672187805176, + "step": 3653 + }, + { + "epoch": 1.2875770925110133, + "grad_norm": 1.6952370527492193, + "learning_rate": 6.1785492134835626e-06, + "loss": 0.7773069739341736, + "step": 3654 + }, + { + "epoch": 1.2879295154185022, + "grad_norm": 1.7765631560225321, + "learning_rate": 6.173165676349103e-06, + "loss": 0.6777454018592834, + "step": 3655 + }, + { + "epoch": 1.2882819383259911, + "grad_norm": 1.6097825614884171, + "learning_rate": 6.167783438305803e-06, + "loss": 0.6103118658065796, + "step": 3656 + }, + { + "epoch": 1.28863436123348, + "grad_norm": 2.4016366240266454, + "learning_rate": 6.1624025011807595e-06, + "loss": 0.593717634677887, + "step": 3657 + }, + { + "epoch": 1.2889867841409692, + "grad_norm": 1.700445284940488, + "learning_rate": 6.1570228668006395e-06, + "loss": 0.5822824835777283, + "step": 3658 + }, + { + "epoch": 1.2893392070484582, + "grad_norm": 1.7095957018221146, + "learning_rate": 6.151644536991656e-06, + "loss": 0.5180603861808777, + "step": 3659 + }, + { + "epoch": 1.289691629955947, + "grad_norm": 1.799926440179644, + "learning_rate": 6.14626751357959e-06, + "loss": 0.6283069849014282, + "step": 3660 + }, + { + "epoch": 1.290044052863436, + "grad_norm": 2.2706339647511613, + "learning_rate": 6.14089179838977e-06, + "loss": 0.7590633630752563, + "step": 3661 + }, + { + "epoch": 1.2903964757709252, + "grad_norm": 1.4238309589699358, + "learning_rate": 6.135517393247081e-06, + "loss": 0.6044079661369324, + "step": 3662 + }, + { + "epoch": 1.2907488986784141, + "grad_norm": 2.078820338247561, + "learning_rate": 6.130144299975973e-06, + "loss": 0.603421688079834, + "step": 3663 + }, + { + "epoch": 1.291101321585903, + "grad_norm": 1.9398452395479244, + "learning_rate": 6.1247725204004395e-06, + "loss": 0.577094554901123, + "step": 3664 + }, + { + "epoch": 1.291453744493392, + "grad_norm": 1.7780187513951604, + "learning_rate": 6.119402056344033e-06, + "loss": 0.5752004981040955, + "step": 3665 + }, + { + "epoch": 1.291806167400881, + "grad_norm": 1.6979532493457608, + "learning_rate": 6.114032909629863e-06, + "loss": 0.730962872505188, + "step": 3666 + }, + { + "epoch": 1.29215859030837, + "grad_norm": 2.0386068832784465, + "learning_rate": 6.108665082080578e-06, + "loss": 0.5361749529838562, + "step": 3667 + }, + { + "epoch": 1.292511013215859, + "grad_norm": 1.470729033877409, + "learning_rate": 6.103298575518401e-06, + "loss": 0.4841603636741638, + "step": 3668 + }, + { + "epoch": 1.292863436123348, + "grad_norm": 1.706501413292354, + "learning_rate": 6.097933391765087e-06, + "loss": 0.6614999771118164, + "step": 3669 + }, + { + "epoch": 1.2932158590308371, + "grad_norm": 1.6930402108862321, + "learning_rate": 6.092569532641947e-06, + "loss": 0.6088405847549438, + "step": 3670 + }, + { + "epoch": 1.293568281938326, + "grad_norm": 1.9173247230823398, + "learning_rate": 6.087206999969848e-06, + "loss": 0.601859986782074, + "step": 3671 + }, + { + "epoch": 1.293920704845815, + "grad_norm": 1.8019332247534052, + "learning_rate": 6.081845795569204e-06, + "loss": 0.5724194049835205, + "step": 3672 + }, + { + "epoch": 1.294273127753304, + "grad_norm": 1.7101141845528827, + "learning_rate": 6.07648592125997e-06, + "loss": 0.7899144887924194, + "step": 3673 + }, + { + "epoch": 1.2946255506607929, + "grad_norm": 1.8438581079047975, + "learning_rate": 6.071127378861667e-06, + "loss": 0.5778594017028809, + "step": 3674 + }, + { + "epoch": 1.2949779735682818, + "grad_norm": 1.6768623613769682, + "learning_rate": 6.065770170193342e-06, + "loss": 0.6357566118240356, + "step": 3675 + }, + { + "epoch": 1.295330396475771, + "grad_norm": 1.5951400768860937, + "learning_rate": 6.0604142970736115e-06, + "loss": 0.511436939239502, + "step": 3676 + }, + { + "epoch": 1.29568281938326, + "grad_norm": 1.883542435313207, + "learning_rate": 6.0550597613206205e-06, + "loss": 0.6469998955726624, + "step": 3677 + }, + { + "epoch": 1.2960352422907488, + "grad_norm": 1.5730405198836903, + "learning_rate": 6.049706564752069e-06, + "loss": 0.5724819898605347, + "step": 3678 + }, + { + "epoch": 1.296387665198238, + "grad_norm": 1.5360587172523898, + "learning_rate": 6.044354709185203e-06, + "loss": 0.6567148566246033, + "step": 3679 + }, + { + "epoch": 1.296740088105727, + "grad_norm": 1.8931575903206552, + "learning_rate": 6.039004196436807e-06, + "loss": 0.6694033145904541, + "step": 3680 + }, + { + "epoch": 1.2970925110132159, + "grad_norm": 1.8190573258877898, + "learning_rate": 6.033655028323215e-06, + "loss": 0.5147275924682617, + "step": 3681 + }, + { + "epoch": 1.2974449339207048, + "grad_norm": 2.0405860057138256, + "learning_rate": 6.0283072066603075e-06, + "loss": 0.5881609320640564, + "step": 3682 + }, + { + "epoch": 1.2977973568281937, + "grad_norm": 1.7248898652229567, + "learning_rate": 6.022960733263493e-06, + "loss": 0.625927209854126, + "step": 3683 + }, + { + "epoch": 1.2981497797356827, + "grad_norm": 1.8738096752650604, + "learning_rate": 6.017615609947747e-06, + "loss": 0.693459153175354, + "step": 3684 + }, + { + "epoch": 1.2985022026431718, + "grad_norm": 1.6745028766810846, + "learning_rate": 6.0122718385275615e-06, + "loss": 0.5185744762420654, + "step": 3685 + }, + { + "epoch": 1.2988546255506608, + "grad_norm": 1.7625922291600025, + "learning_rate": 6.006929420816982e-06, + "loss": 0.5153995752334595, + "step": 3686 + }, + { + "epoch": 1.2992070484581497, + "grad_norm": 1.9617946738772851, + "learning_rate": 6.001588358629598e-06, + "loss": 0.5844067931175232, + "step": 3687 + }, + { + "epoch": 1.2995594713656389, + "grad_norm": 1.7999387557140187, + "learning_rate": 5.996248653778529e-06, + "loss": 0.6021767854690552, + "step": 3688 + }, + { + "epoch": 1.2999118942731278, + "grad_norm": 1.650868828635221, + "learning_rate": 5.990910308076443e-06, + "loss": 0.573150098323822, + "step": 3689 + }, + { + "epoch": 1.3002643171806167, + "grad_norm": 1.8809065032795727, + "learning_rate": 5.985573323335541e-06, + "loss": 0.5125507116317749, + "step": 3690 + }, + { + "epoch": 1.3006167400881057, + "grad_norm": 1.5884199689542184, + "learning_rate": 5.980237701367556e-06, + "loss": 0.541732668876648, + "step": 3691 + }, + { + "epoch": 1.3009691629955946, + "grad_norm": 2.0151748973563577, + "learning_rate": 5.974903443983778e-06, + "loss": 0.66359543800354, + "step": 3692 + }, + { + "epoch": 1.3013215859030838, + "grad_norm": 1.8831727632454829, + "learning_rate": 5.969570552995014e-06, + "loss": 0.6986300349235535, + "step": 3693 + }, + { + "epoch": 1.3016740088105727, + "grad_norm": 2.0800644206104195, + "learning_rate": 5.9642390302116125e-06, + "loss": 0.6829022169113159, + "step": 3694 + }, + { + "epoch": 1.3020264317180616, + "grad_norm": 1.9073088749861613, + "learning_rate": 5.9589088774434655e-06, + "loss": 0.5710464715957642, + "step": 3695 + }, + { + "epoch": 1.3023788546255506, + "grad_norm": 1.8154393300824316, + "learning_rate": 5.953580096499989e-06, + "loss": 0.5604938268661499, + "step": 3696 + }, + { + "epoch": 1.3027312775330397, + "grad_norm": 1.755426899711885, + "learning_rate": 5.948252689190141e-06, + "loss": 0.678723931312561, + "step": 3697 + }, + { + "epoch": 1.3030837004405287, + "grad_norm": 1.8845664461665383, + "learning_rate": 5.9429266573224145e-06, + "loss": 0.6652591228485107, + "step": 3698 + }, + { + "epoch": 1.3034361233480176, + "grad_norm": 1.8800654237619134, + "learning_rate": 5.937602002704819e-06, + "loss": 0.6141147017478943, + "step": 3699 + }, + { + "epoch": 1.3037885462555066, + "grad_norm": 1.937561336880738, + "learning_rate": 5.932278727144924e-06, + "loss": 0.5260860919952393, + "step": 3700 + }, + { + "epoch": 1.3041409691629955, + "grad_norm": 1.6945627397292862, + "learning_rate": 5.926956832449806e-06, + "loss": 0.464357852935791, + "step": 3701 + }, + { + "epoch": 1.3044933920704846, + "grad_norm": 1.8301641414278105, + "learning_rate": 5.921636320426085e-06, + "loss": 0.6513686180114746, + "step": 3702 + }, + { + "epoch": 1.3048458149779736, + "grad_norm": 1.7297134138158161, + "learning_rate": 5.91631719287991e-06, + "loss": 0.44547855854034424, + "step": 3703 + }, + { + "epoch": 1.3051982378854625, + "grad_norm": 1.8572950621020996, + "learning_rate": 5.910999451616959e-06, + "loss": 0.714026153087616, + "step": 3704 + }, + { + "epoch": 1.3055506607929517, + "grad_norm": 1.5164059156260825, + "learning_rate": 5.90568309844244e-06, + "loss": 0.48294252157211304, + "step": 3705 + }, + { + "epoch": 1.3059030837004406, + "grad_norm": 2.0148835282111275, + "learning_rate": 5.900368135161093e-06, + "loss": 0.587759256362915, + "step": 3706 + }, + { + "epoch": 1.3062555066079296, + "grad_norm": 1.7833437474608147, + "learning_rate": 5.895054563577172e-06, + "loss": 0.6251810789108276, + "step": 3707 + }, + { + "epoch": 1.3066079295154185, + "grad_norm": 1.98023378159902, + "learning_rate": 5.889742385494481e-06, + "loss": 0.6488438844680786, + "step": 3708 + }, + { + "epoch": 1.3069603524229074, + "grad_norm": 2.3062951128393325, + "learning_rate": 5.8844316027163315e-06, + "loss": 0.6682882308959961, + "step": 3709 + }, + { + "epoch": 1.3073127753303964, + "grad_norm": 1.9459894886811675, + "learning_rate": 5.879122217045573e-06, + "loss": 0.6537875533103943, + "step": 3710 + }, + { + "epoch": 1.3076651982378855, + "grad_norm": 1.994395753049965, + "learning_rate": 5.873814230284576e-06, + "loss": 0.6813541650772095, + "step": 3711 + }, + { + "epoch": 1.3080176211453745, + "grad_norm": 2.002875607232805, + "learning_rate": 5.868507644235233e-06, + "loss": 0.6962395906448364, + "step": 3712 + }, + { + "epoch": 1.3083700440528634, + "grad_norm": 1.8811127927416966, + "learning_rate": 5.863202460698972e-06, + "loss": 0.6872841119766235, + "step": 3713 + }, + { + "epoch": 1.3087224669603525, + "grad_norm": 2.007681646131619, + "learning_rate": 5.857898681476732e-06, + "loss": 0.7200508117675781, + "step": 3714 + }, + { + "epoch": 1.3090748898678415, + "grad_norm": 1.7850989505478374, + "learning_rate": 5.852596308368982e-06, + "loss": 0.6100003719329834, + "step": 3715 + }, + { + "epoch": 1.3094273127753304, + "grad_norm": 1.962305695853223, + "learning_rate": 5.847295343175714e-06, + "loss": 0.7347345352172852, + "step": 3716 + }, + { + "epoch": 1.3097797356828194, + "grad_norm": 1.8094012131106647, + "learning_rate": 5.841995787696438e-06, + "loss": 0.6955733895301819, + "step": 3717 + }, + { + "epoch": 1.3101321585903083, + "grad_norm": 1.6497459626323396, + "learning_rate": 5.836697643730193e-06, + "loss": 0.5266987085342407, + "step": 3718 + }, + { + "epoch": 1.3104845814977972, + "grad_norm": 1.7072540878561502, + "learning_rate": 5.83140091307553e-06, + "loss": 0.5978814363479614, + "step": 3719 + }, + { + "epoch": 1.3108370044052864, + "grad_norm": 1.9008641546548906, + "learning_rate": 5.826105597530526e-06, + "loss": 0.608231782913208, + "step": 3720 + }, + { + "epoch": 1.3111894273127753, + "grad_norm": 1.660571967924875, + "learning_rate": 5.820811698892775e-06, + "loss": 0.5834963321685791, + "step": 3721 + }, + { + "epoch": 1.3115418502202643, + "grad_norm": 1.7715871926900555, + "learning_rate": 5.8155192189593915e-06, + "loss": 0.6675208806991577, + "step": 3722 + }, + { + "epoch": 1.3118942731277534, + "grad_norm": 2.0125396897962156, + "learning_rate": 5.810228159527003e-06, + "loss": 0.655093789100647, + "step": 3723 + }, + { + "epoch": 1.3122466960352424, + "grad_norm": 1.832975656309839, + "learning_rate": 5.804938522391768e-06, + "loss": 0.5658842921257019, + "step": 3724 + }, + { + "epoch": 1.3125991189427313, + "grad_norm": 1.7484570770381627, + "learning_rate": 5.799650309349348e-06, + "loss": 0.4502618610858917, + "step": 3725 + }, + { + "epoch": 1.3129515418502202, + "grad_norm": 1.6150871905896036, + "learning_rate": 5.79436352219493e-06, + "loss": 0.6165845394134521, + "step": 3726 + }, + { + "epoch": 1.3133039647577092, + "grad_norm": 1.6734001609648903, + "learning_rate": 5.7890781627232115e-06, + "loss": 0.6315968036651611, + "step": 3727 + }, + { + "epoch": 1.313656387665198, + "grad_norm": 1.5048326218576167, + "learning_rate": 5.783794232728408e-06, + "loss": 0.58831787109375, + "step": 3728 + }, + { + "epoch": 1.3140088105726873, + "grad_norm": 1.7597864288310854, + "learning_rate": 5.778511734004248e-06, + "loss": 0.5056396722793579, + "step": 3729 + }, + { + "epoch": 1.3143612334801762, + "grad_norm": 2.3417954571274753, + "learning_rate": 5.773230668343978e-06, + "loss": 0.5469251871109009, + "step": 3730 + }, + { + "epoch": 1.3147136563876651, + "grad_norm": 1.768855633328091, + "learning_rate": 5.76795103754035e-06, + "loss": 0.7011934518814087, + "step": 3731 + }, + { + "epoch": 1.3150660792951543, + "grad_norm": 1.574817644372446, + "learning_rate": 5.762672843385643e-06, + "loss": 0.7080543041229248, + "step": 3732 + }, + { + "epoch": 1.3154185022026432, + "grad_norm": 1.7812689751161113, + "learning_rate": 5.757396087671634e-06, + "loss": 0.5180330276489258, + "step": 3733 + }, + { + "epoch": 1.3157709251101322, + "grad_norm": 1.6465709022018649, + "learning_rate": 5.75212077218962e-06, + "loss": 0.5282220840454102, + "step": 3734 + }, + { + "epoch": 1.316123348017621, + "grad_norm": 1.9100789844293367, + "learning_rate": 5.746846898730403e-06, + "loss": 0.7174440026283264, + "step": 3735 + }, + { + "epoch": 1.31647577092511, + "grad_norm": 1.7156784573652895, + "learning_rate": 5.7415744690843025e-06, + "loss": 0.537194013595581, + "step": 3736 + }, + { + "epoch": 1.3168281938325992, + "grad_norm": 1.714186482517803, + "learning_rate": 5.7363034850411415e-06, + "loss": 0.7514588832855225, + "step": 3737 + }, + { + "epoch": 1.3171806167400881, + "grad_norm": 1.6138774970176952, + "learning_rate": 5.731033948390252e-06, + "loss": 0.601151704788208, + "step": 3738 + }, + { + "epoch": 1.317533039647577, + "grad_norm": 1.9652638368208295, + "learning_rate": 5.7257658609204865e-06, + "loss": 0.6046192646026611, + "step": 3739 + }, + { + "epoch": 1.317885462555066, + "grad_norm": 1.9909773544544114, + "learning_rate": 5.720499224420196e-06, + "loss": 0.5003835558891296, + "step": 3740 + }, + { + "epoch": 1.3182378854625552, + "grad_norm": 2.7143275056165237, + "learning_rate": 5.715234040677229e-06, + "loss": 0.6251966953277588, + "step": 3741 + }, + { + "epoch": 1.318590308370044, + "grad_norm": 1.9483642954012013, + "learning_rate": 5.709970311478961e-06, + "loss": 0.6681240797042847, + "step": 3742 + }, + { + "epoch": 1.318942731277533, + "grad_norm": 1.6278748497204938, + "learning_rate": 5.704708038612261e-06, + "loss": 0.582561194896698, + "step": 3743 + }, + { + "epoch": 1.319295154185022, + "grad_norm": 1.8550137845260724, + "learning_rate": 5.699447223863508e-06, + "loss": 0.5616302490234375, + "step": 3744 + }, + { + "epoch": 1.319647577092511, + "grad_norm": 1.7452561285826282, + "learning_rate": 5.6941878690185835e-06, + "loss": 0.6131408214569092, + "step": 3745 + }, + { + "epoch": 1.32, + "grad_norm": 1.8334584062109562, + "learning_rate": 5.688929975862873e-06, + "loss": 0.5772547721862793, + "step": 3746 + }, + { + "epoch": 1.320352422907489, + "grad_norm": 1.7519534139582256, + "learning_rate": 5.683673546181274e-06, + "loss": 0.5927203893661499, + "step": 3747 + }, + { + "epoch": 1.320704845814978, + "grad_norm": 1.9849489030223588, + "learning_rate": 5.67841858175818e-06, + "loss": 0.6001334190368652, + "step": 3748 + }, + { + "epoch": 1.321057268722467, + "grad_norm": 1.584893703676267, + "learning_rate": 5.673165084377479e-06, + "loss": 0.4598100781440735, + "step": 3749 + }, + { + "epoch": 1.321409691629956, + "grad_norm": 1.9316178856088813, + "learning_rate": 5.667913055822578e-06, + "loss": 0.6455222368240356, + "step": 3750 + }, + { + "epoch": 1.321762114537445, + "grad_norm": 1.9234057001448424, + "learning_rate": 5.662662497876375e-06, + "loss": 0.6327164173126221, + "step": 3751 + }, + { + "epoch": 1.322114537444934, + "grad_norm": 1.7096288638222439, + "learning_rate": 5.657413412321271e-06, + "loss": 0.6699539422988892, + "step": 3752 + }, + { + "epoch": 1.3224669603524228, + "grad_norm": 2.0694083676949107, + "learning_rate": 5.6521658009391676e-06, + "loss": 0.7507830858230591, + "step": 3753 + }, + { + "epoch": 1.3228193832599118, + "grad_norm": 1.7615687866950613, + "learning_rate": 5.646919665511461e-06, + "loss": 0.5164662003517151, + "step": 3754 + }, + { + "epoch": 1.323171806167401, + "grad_norm": 2.267697288539615, + "learning_rate": 5.641675007819058e-06, + "loss": 0.7059702277183533, + "step": 3755 + }, + { + "epoch": 1.3235242290748899, + "grad_norm": 2.1165471311290243, + "learning_rate": 5.636431829642359e-06, + "loss": 0.6535515189170837, + "step": 3756 + }, + { + "epoch": 1.3238766519823788, + "grad_norm": 1.782117402624855, + "learning_rate": 5.631190132761247e-06, + "loss": 0.5912176370620728, + "step": 3757 + }, + { + "epoch": 1.324229074889868, + "grad_norm": 1.6111457739999588, + "learning_rate": 5.625949918955126e-06, + "loss": 0.6527940034866333, + "step": 3758 + }, + { + "epoch": 1.324581497797357, + "grad_norm": 1.9751426120017839, + "learning_rate": 5.620711190002879e-06, + "loss": 0.7236875295639038, + "step": 3759 + }, + { + "epoch": 1.3249339207048458, + "grad_norm": 2.042390900324052, + "learning_rate": 5.6154739476829e-06, + "loss": 0.6823146343231201, + "step": 3760 + }, + { + "epoch": 1.3252863436123348, + "grad_norm": 2.058457581887865, + "learning_rate": 5.610238193773061e-06, + "loss": 0.5795537233352661, + "step": 3761 + }, + { + "epoch": 1.3256387665198237, + "grad_norm": 1.90461931046175, + "learning_rate": 5.605003930050738e-06, + "loss": 0.5530939102172852, + "step": 3762 + }, + { + "epoch": 1.3259911894273126, + "grad_norm": 1.6978922894801083, + "learning_rate": 5.599771158292806e-06, + "loss": 0.5362278819084167, + "step": 3763 + }, + { + "epoch": 1.3263436123348018, + "grad_norm": 1.9521190182519916, + "learning_rate": 5.5945398802756315e-06, + "loss": 0.6136768460273743, + "step": 3764 + }, + { + "epoch": 1.3266960352422907, + "grad_norm": 1.7782753118174626, + "learning_rate": 5.589310097775055e-06, + "loss": 0.5979033708572388, + "step": 3765 + }, + { + "epoch": 1.3270484581497797, + "grad_norm": 1.810593191069574, + "learning_rate": 5.584081812566439e-06, + "loss": 0.6750006675720215, + "step": 3766 + }, + { + "epoch": 1.3274008810572688, + "grad_norm": 1.6815578779160076, + "learning_rate": 5.578855026424619e-06, + "loss": 0.6004951000213623, + "step": 3767 + }, + { + "epoch": 1.3277533039647578, + "grad_norm": 1.522422246822047, + "learning_rate": 5.573629741123926e-06, + "loss": 0.570702075958252, + "step": 3768 + }, + { + "epoch": 1.3281057268722467, + "grad_norm": 1.5435622334320813, + "learning_rate": 5.5684059584381826e-06, + "loss": 0.506945788860321, + "step": 3769 + }, + { + "epoch": 1.3284581497797356, + "grad_norm": 1.647967795112189, + "learning_rate": 5.563183680140696e-06, + "loss": 0.5935436487197876, + "step": 3770 + }, + { + "epoch": 1.3288105726872246, + "grad_norm": 2.7715355389110043, + "learning_rate": 5.5579629080042755e-06, + "loss": 0.641446590423584, + "step": 3771 + }, + { + "epoch": 1.3291629955947137, + "grad_norm": 1.7489195207611605, + "learning_rate": 5.552743643801209e-06, + "loss": 0.5816437005996704, + "step": 3772 + }, + { + "epoch": 1.3295154185022027, + "grad_norm": 1.7699530777692443, + "learning_rate": 5.547525889303265e-06, + "loss": 0.666487991809845, + "step": 3773 + }, + { + "epoch": 1.3298678414096916, + "grad_norm": 2.100750588167558, + "learning_rate": 5.542309646281718e-06, + "loss": 0.7961397767066956, + "step": 3774 + }, + { + "epoch": 1.3302202643171805, + "grad_norm": 1.5292695888779975, + "learning_rate": 5.53709491650732e-06, + "loss": 0.4736033082008362, + "step": 3775 + }, + { + "epoch": 1.3305726872246697, + "grad_norm": 1.8004482810288622, + "learning_rate": 5.531881701750304e-06, + "loss": 0.542208194732666, + "step": 3776 + }, + { + "epoch": 1.3309251101321586, + "grad_norm": 1.8151751535940353, + "learning_rate": 5.526670003780399e-06, + "loss": 0.6306429505348206, + "step": 3777 + }, + { + "epoch": 1.3312775330396476, + "grad_norm": 1.7520809852323194, + "learning_rate": 5.521459824366808e-06, + "loss": 0.531991720199585, + "step": 3778 + }, + { + "epoch": 1.3316299559471365, + "grad_norm": 1.9852873895231067, + "learning_rate": 5.516251165278235e-06, + "loss": 0.688262939453125, + "step": 3779 + }, + { + "epoch": 1.3319823788546254, + "grad_norm": 2.0026356133489416, + "learning_rate": 5.511044028282853e-06, + "loss": 0.7555293440818787, + "step": 3780 + }, + { + "epoch": 1.3323348017621146, + "grad_norm": 1.9387490035628434, + "learning_rate": 5.505838415148317e-06, + "loss": 0.7518796324729919, + "step": 3781 + }, + { + "epoch": 1.3326872246696035, + "grad_norm": 1.859399241253671, + "learning_rate": 5.500634327641777e-06, + "loss": 0.5161253809928894, + "step": 3782 + }, + { + "epoch": 1.3330396475770925, + "grad_norm": 1.5897606830745852, + "learning_rate": 5.4954317675298586e-06, + "loss": 0.5617681741714478, + "step": 3783 + }, + { + "epoch": 1.3333920704845814, + "grad_norm": 1.6894758792140483, + "learning_rate": 5.4902307365786676e-06, + "loss": 0.5707885026931763, + "step": 3784 + }, + { + "epoch": 1.3337444933920706, + "grad_norm": 1.9016603426520955, + "learning_rate": 5.485031236553792e-06, + "loss": 0.5842025876045227, + "step": 3785 + }, + { + "epoch": 1.3340969162995595, + "grad_norm": 2.278549510271659, + "learning_rate": 5.479833269220296e-06, + "loss": 0.7103949785232544, + "step": 3786 + }, + { + "epoch": 1.3344493392070484, + "grad_norm": 1.8432428404869632, + "learning_rate": 5.474636836342737e-06, + "loss": 0.7704740762710571, + "step": 3787 + }, + { + "epoch": 1.3348017621145374, + "grad_norm": 1.808727631247744, + "learning_rate": 5.469441939685137e-06, + "loss": 0.6402652263641357, + "step": 3788 + }, + { + "epoch": 1.3351541850220263, + "grad_norm": 1.892219877227891, + "learning_rate": 5.464248581011002e-06, + "loss": 0.8214348554611206, + "step": 3789 + }, + { + "epoch": 1.3355066079295155, + "grad_norm": 1.9758909531924576, + "learning_rate": 5.459056762083318e-06, + "loss": 0.6372429132461548, + "step": 3790 + }, + { + "epoch": 1.3358590308370044, + "grad_norm": 1.849044346394621, + "learning_rate": 5.453866484664543e-06, + "loss": 0.5418422222137451, + "step": 3791 + }, + { + "epoch": 1.3362114537444933, + "grad_norm": 1.7395663492002502, + "learning_rate": 5.448677750516613e-06, + "loss": 0.6574567556381226, + "step": 3792 + }, + { + "epoch": 1.3365638766519825, + "grad_norm": 1.9976311809706857, + "learning_rate": 5.443490561400948e-06, + "loss": 0.5174030661582947, + "step": 3793 + }, + { + "epoch": 1.3369162995594714, + "grad_norm": 1.5627335899600845, + "learning_rate": 5.4383049190784275e-06, + "loss": 0.595477819442749, + "step": 3794 + }, + { + "epoch": 1.3372687224669604, + "grad_norm": 1.845680624563864, + "learning_rate": 5.4331208253094255e-06, + "loss": 0.6177364587783813, + "step": 3795 + }, + { + "epoch": 1.3376211453744493, + "grad_norm": 1.6348460055259042, + "learning_rate": 5.4279382818537774e-06, + "loss": 0.6106897592544556, + "step": 3796 + }, + { + "epoch": 1.3379735682819383, + "grad_norm": 1.8500671496295353, + "learning_rate": 5.422757290470795e-06, + "loss": 0.46700483560562134, + "step": 3797 + }, + { + "epoch": 1.3383259911894272, + "grad_norm": 1.952200717602712, + "learning_rate": 5.417577852919262e-06, + "loss": 0.5408231019973755, + "step": 3798 + }, + { + "epoch": 1.3386784140969163, + "grad_norm": 1.8733329229880296, + "learning_rate": 5.412399970957439e-06, + "loss": 0.6430809497833252, + "step": 3799 + }, + { + "epoch": 1.3390308370044053, + "grad_norm": 1.9515663922431925, + "learning_rate": 5.4072236463430535e-06, + "loss": 0.6817858219146729, + "step": 3800 + }, + { + "epoch": 1.3393832599118942, + "grad_norm": 1.7386331074635664, + "learning_rate": 5.402048880833308e-06, + "loss": 0.5492604970932007, + "step": 3801 + }, + { + "epoch": 1.3397356828193834, + "grad_norm": 1.9883458715986422, + "learning_rate": 5.39687567618487e-06, + "loss": 0.6148543357849121, + "step": 3802 + }, + { + "epoch": 1.3400881057268723, + "grad_norm": 1.7245960691315507, + "learning_rate": 5.391704034153894e-06, + "loss": 0.5921820402145386, + "step": 3803 + }, + { + "epoch": 1.3404405286343613, + "grad_norm": 1.8759210914719033, + "learning_rate": 5.386533956495974e-06, + "loss": 0.49728113412857056, + "step": 3804 + }, + { + "epoch": 1.3407929515418502, + "grad_norm": 1.7899218455267007, + "learning_rate": 5.381365444966205e-06, + "loss": 0.5944808125495911, + "step": 3805 + }, + { + "epoch": 1.3411453744493391, + "grad_norm": 1.6022996204023598, + "learning_rate": 5.376198501319128e-06, + "loss": 0.5197580456733704, + "step": 3806 + }, + { + "epoch": 1.341497797356828, + "grad_norm": 1.5953524266203611, + "learning_rate": 5.3710331273087625e-06, + "loss": 0.6229256391525269, + "step": 3807 + }, + { + "epoch": 1.3418502202643172, + "grad_norm": 2.0736813734241073, + "learning_rate": 5.365869324688591e-06, + "loss": 0.5305753946304321, + "step": 3808 + }, + { + "epoch": 1.3422026431718062, + "grad_norm": 1.4520191291543518, + "learning_rate": 5.360707095211566e-06, + "loss": 0.4002259373664856, + "step": 3809 + }, + { + "epoch": 1.342555066079295, + "grad_norm": 1.8821320745162777, + "learning_rate": 5.3555464406300965e-06, + "loss": 0.5211426615715027, + "step": 3810 + }, + { + "epoch": 1.3429074889867842, + "grad_norm": 1.7112007743194535, + "learning_rate": 5.350387362696077e-06, + "loss": 0.5998013019561768, + "step": 3811 + }, + { + "epoch": 1.3432599118942732, + "grad_norm": 1.6128635046491597, + "learning_rate": 5.345229863160839e-06, + "loss": 0.5330953598022461, + "step": 3812 + }, + { + "epoch": 1.3436123348017621, + "grad_norm": 1.6570398271033384, + "learning_rate": 5.340073943775206e-06, + "loss": 0.6999118328094482, + "step": 3813 + }, + { + "epoch": 1.343964757709251, + "grad_norm": 2.060346240780723, + "learning_rate": 5.334919606289446e-06, + "loss": 0.6286367177963257, + "step": 3814 + }, + { + "epoch": 1.34431718061674, + "grad_norm": 1.4130805934733843, + "learning_rate": 5.329766852453296e-06, + "loss": 0.5793008804321289, + "step": 3815 + }, + { + "epoch": 1.3446696035242292, + "grad_norm": 1.7815340287164039, + "learning_rate": 5.324615684015957e-06, + "loss": 0.5811383128166199, + "step": 3816 + }, + { + "epoch": 1.345022026431718, + "grad_norm": 1.8888368809882845, + "learning_rate": 5.319466102726087e-06, + "loss": 0.7389675378799438, + "step": 3817 + }, + { + "epoch": 1.345374449339207, + "grad_norm": 1.9482215135863048, + "learning_rate": 5.314318110331815e-06, + "loss": 0.6105868220329285, + "step": 3818 + }, + { + "epoch": 1.345726872246696, + "grad_norm": 1.648111237588601, + "learning_rate": 5.3091717085807235e-06, + "loss": 0.5979465842247009, + "step": 3819 + }, + { + "epoch": 1.3460792951541851, + "grad_norm": 2.100772248921902, + "learning_rate": 5.304026899219846e-06, + "loss": 0.6722681522369385, + "step": 3820 + }, + { + "epoch": 1.346431718061674, + "grad_norm": 1.5469717835195365, + "learning_rate": 5.298883683995697e-06, + "loss": 0.4687497913837433, + "step": 3821 + }, + { + "epoch": 1.346784140969163, + "grad_norm": 1.6982574361909266, + "learning_rate": 5.29374206465423e-06, + "loss": 0.563692569732666, + "step": 3822 + }, + { + "epoch": 1.347136563876652, + "grad_norm": 1.7298606992172854, + "learning_rate": 5.2886020429408716e-06, + "loss": 0.604897141456604, + "step": 3823 + }, + { + "epoch": 1.3474889867841409, + "grad_norm": 2.111770720101543, + "learning_rate": 5.283463620600493e-06, + "loss": 0.6270164251327515, + "step": 3824 + }, + { + "epoch": 1.34784140969163, + "grad_norm": 2.1238324371472954, + "learning_rate": 5.278326799377428e-06, + "loss": 0.6487830877304077, + "step": 3825 + }, + { + "epoch": 1.348193832599119, + "grad_norm": 1.58718768900561, + "learning_rate": 5.273191581015474e-06, + "loss": 0.5816935896873474, + "step": 3826 + }, + { + "epoch": 1.348546255506608, + "grad_norm": 1.72099904065486, + "learning_rate": 5.26805796725788e-06, + "loss": 0.6281115412712097, + "step": 3827 + }, + { + "epoch": 1.348898678414097, + "grad_norm": 2.0975447662151288, + "learning_rate": 5.2629259598473335e-06, + "loss": 0.5031973123550415, + "step": 3828 + }, + { + "epoch": 1.349251101321586, + "grad_norm": 1.6391975654545219, + "learning_rate": 5.257795560526005e-06, + "loss": 0.6220165491104126, + "step": 3829 + }, + { + "epoch": 1.349603524229075, + "grad_norm": 1.8177506583957952, + "learning_rate": 5.2526667710354995e-06, + "loss": 0.6451058387756348, + "step": 3830 + }, + { + "epoch": 1.3499559471365639, + "grad_norm": 2.000132155225934, + "learning_rate": 5.247539593116884e-06, + "loss": 0.7524863481521606, + "step": 3831 + }, + { + "epoch": 1.3503083700440528, + "grad_norm": 1.7855711080776688, + "learning_rate": 5.242414028510674e-06, + "loss": 0.6270921230316162, + "step": 3832 + }, + { + "epoch": 1.3506607929515417, + "grad_norm": 1.8779302666662292, + "learning_rate": 5.237290078956836e-06, + "loss": 0.6196550130844116, + "step": 3833 + }, + { + "epoch": 1.351013215859031, + "grad_norm": 1.932517845360487, + "learning_rate": 5.232167746194798e-06, + "loss": 0.8512230515480042, + "step": 3834 + }, + { + "epoch": 1.3513656387665198, + "grad_norm": 1.672868645098828, + "learning_rate": 5.227047031963435e-06, + "loss": 0.5196807980537415, + "step": 3835 + }, + { + "epoch": 1.3517180616740088, + "grad_norm": 1.890472281368116, + "learning_rate": 5.2219279380010565e-06, + "loss": 0.6713111400604248, + "step": 3836 + }, + { + "epoch": 1.352070484581498, + "grad_norm": 1.8891048300322977, + "learning_rate": 5.216810466045448e-06, + "loss": 0.7150874137878418, + "step": 3837 + }, + { + "epoch": 1.3524229074889869, + "grad_norm": 1.9379344809365882, + "learning_rate": 5.211694617833827e-06, + "loss": 0.5812375545501709, + "step": 3838 + }, + { + "epoch": 1.3527753303964758, + "grad_norm": 1.6232111313971074, + "learning_rate": 5.2065803951028675e-06, + "loss": 0.5842182040214539, + "step": 3839 + }, + { + "epoch": 1.3531277533039647, + "grad_norm": 1.7655789614212678, + "learning_rate": 5.201467799588685e-06, + "loss": 0.5432665348052979, + "step": 3840 + }, + { + "epoch": 1.3534801762114537, + "grad_norm": 1.610757257105171, + "learning_rate": 5.196356833026845e-06, + "loss": 0.551771879196167, + "step": 3841 + }, + { + "epoch": 1.3538325991189426, + "grad_norm": 2.0105503681662076, + "learning_rate": 5.19124749715237e-06, + "loss": 0.6961710453033447, + "step": 3842 + }, + { + "epoch": 1.3541850220264318, + "grad_norm": 1.9510922019810755, + "learning_rate": 5.18613979369972e-06, + "loss": 0.7105714678764343, + "step": 3843 + }, + { + "epoch": 1.3545374449339207, + "grad_norm": 1.9369232024679732, + "learning_rate": 5.181033724402789e-06, + "loss": 0.7100229263305664, + "step": 3844 + }, + { + "epoch": 1.3548898678414096, + "grad_norm": 1.6852711649451124, + "learning_rate": 5.175929290994941e-06, + "loss": 0.651812732219696, + "step": 3845 + }, + { + "epoch": 1.3552422907488988, + "grad_norm": 2.308449923325572, + "learning_rate": 5.170826495208967e-06, + "loss": 0.5194147825241089, + "step": 3846 + }, + { + "epoch": 1.3555947136563877, + "grad_norm": 1.6095794520986102, + "learning_rate": 5.16572533877711e-06, + "loss": 0.5939956307411194, + "step": 3847 + }, + { + "epoch": 1.3559471365638767, + "grad_norm": 1.7731843322868706, + "learning_rate": 5.160625823431051e-06, + "loss": 0.6434104442596436, + "step": 3848 + }, + { + "epoch": 1.3562995594713656, + "grad_norm": 1.9584483919337772, + "learning_rate": 5.155527950901914e-06, + "loss": 0.5256108045578003, + "step": 3849 + }, + { + "epoch": 1.3566519823788545, + "grad_norm": 1.5746637659323357, + "learning_rate": 5.150431722920277e-06, + "loss": 0.5632717609405518, + "step": 3850 + }, + { + "epoch": 1.3570044052863435, + "grad_norm": 1.8450205582439452, + "learning_rate": 5.145337141216149e-06, + "loss": 0.5964382886886597, + "step": 3851 + }, + { + "epoch": 1.3573568281938326, + "grad_norm": 1.9383063853676261, + "learning_rate": 5.140244207518971e-06, + "loss": 0.7268366813659668, + "step": 3852 + }, + { + "epoch": 1.3577092511013216, + "grad_norm": 2.357958765027834, + "learning_rate": 5.135152923557647e-06, + "loss": 0.7376477122306824, + "step": 3853 + }, + { + "epoch": 1.3580616740088105, + "grad_norm": 1.9573550951394243, + "learning_rate": 5.130063291060505e-06, + "loss": 0.50569748878479, + "step": 3854 + }, + { + "epoch": 1.3584140969162997, + "grad_norm": 1.684535591269265, + "learning_rate": 5.12497531175532e-06, + "loss": 0.5639374256134033, + "step": 3855 + }, + { + "epoch": 1.3587665198237886, + "grad_norm": 2.0009335012534146, + "learning_rate": 5.1198889873692994e-06, + "loss": 0.5051915645599365, + "step": 3856 + }, + { + "epoch": 1.3591189427312775, + "grad_norm": 1.979939818228197, + "learning_rate": 5.114804319629088e-06, + "loss": 0.4718795120716095, + "step": 3857 + }, + { + "epoch": 1.3594713656387665, + "grad_norm": 1.7040447839749338, + "learning_rate": 5.109721310260781e-06, + "loss": 0.5684067606925964, + "step": 3858 + }, + { + "epoch": 1.3598237885462554, + "grad_norm": 1.687205926430453, + "learning_rate": 5.104639960989903e-06, + "loss": 0.5757609605789185, + "step": 3859 + }, + { + "epoch": 1.3601762114537446, + "grad_norm": 1.637859976815221, + "learning_rate": 5.099560273541401e-06, + "loss": 0.5971167087554932, + "step": 3860 + }, + { + "epoch": 1.3605286343612335, + "grad_norm": 1.9766573766085018, + "learning_rate": 5.094482249639683e-06, + "loss": 0.6959896683692932, + "step": 3861 + }, + { + "epoch": 1.3608810572687224, + "grad_norm": 1.8397057454745067, + "learning_rate": 5.089405891008574e-06, + "loss": 0.6954548358917236, + "step": 3862 + }, + { + "epoch": 1.3612334801762114, + "grad_norm": 1.5747472561310782, + "learning_rate": 5.084331199371343e-06, + "loss": 0.5659986138343811, + "step": 3863 + }, + { + "epoch": 1.3615859030837005, + "grad_norm": 1.9340659365358734, + "learning_rate": 5.079258176450687e-06, + "loss": 0.5582559108734131, + "step": 3864 + }, + { + "epoch": 1.3619383259911895, + "grad_norm": 1.5684621947501252, + "learning_rate": 5.0741868239687395e-06, + "loss": 0.5337075591087341, + "step": 3865 + }, + { + "epoch": 1.3622907488986784, + "grad_norm": 1.8617666338346237, + "learning_rate": 5.069117143647075e-06, + "loss": 0.621441125869751, + "step": 3866 + }, + { + "epoch": 1.3626431718061673, + "grad_norm": 1.7285404952370873, + "learning_rate": 5.064049137206677e-06, + "loss": 0.5476670861244202, + "step": 3867 + }, + { + "epoch": 1.3629955947136563, + "grad_norm": 1.9444577342582248, + "learning_rate": 5.058982806367989e-06, + "loss": 0.5357356071472168, + "step": 3868 + }, + { + "epoch": 1.3633480176211454, + "grad_norm": 2.032867685216442, + "learning_rate": 5.053918152850868e-06, + "loss": 0.5722761750221252, + "step": 3869 + }, + { + "epoch": 1.3637004405286344, + "grad_norm": 1.8019521015311857, + "learning_rate": 5.048855178374606e-06, + "loss": 0.7271207571029663, + "step": 3870 + }, + { + "epoch": 1.3640528634361233, + "grad_norm": 2.149716528128109, + "learning_rate": 5.043793884657926e-06, + "loss": 0.6213557720184326, + "step": 3871 + }, + { + "epoch": 1.3644052863436125, + "grad_norm": 1.9750542918701046, + "learning_rate": 5.03873427341898e-06, + "loss": 0.6509476900100708, + "step": 3872 + }, + { + "epoch": 1.3647577092511014, + "grad_norm": 1.8266690493980986, + "learning_rate": 5.0336763463753425e-06, + "loss": 0.5321642756462097, + "step": 3873 + }, + { + "epoch": 1.3651101321585903, + "grad_norm": 1.8114804761469812, + "learning_rate": 5.028620105244035e-06, + "loss": 0.7237476110458374, + "step": 3874 + }, + { + "epoch": 1.3654625550660793, + "grad_norm": 2.014453779183698, + "learning_rate": 5.0235655517414805e-06, + "loss": 0.6653447151184082, + "step": 3875 + }, + { + "epoch": 1.3658149779735682, + "grad_norm": 1.843622237552059, + "learning_rate": 5.018512687583552e-06, + "loss": 0.6188938617706299, + "step": 3876 + }, + { + "epoch": 1.3661674008810571, + "grad_norm": 1.8211870806299153, + "learning_rate": 5.013461514485536e-06, + "loss": 0.6341606378555298, + "step": 3877 + }, + { + "epoch": 1.3665198237885463, + "grad_norm": 1.6224290182707664, + "learning_rate": 5.00841203416215e-06, + "loss": 0.6148994565010071, + "step": 3878 + }, + { + "epoch": 1.3668722466960352, + "grad_norm": 1.8692541577175399, + "learning_rate": 5.003364248327533e-06, + "loss": 0.6292222142219543, + "step": 3879 + }, + { + "epoch": 1.3672246696035242, + "grad_norm": 1.618170468267519, + "learning_rate": 4.998318158695255e-06, + "loss": 0.6648836135864258, + "step": 3880 + }, + { + "epoch": 1.3675770925110133, + "grad_norm": 6.866040476375875, + "learning_rate": 4.993273766978297e-06, + "loss": 0.5175273418426514, + "step": 3881 + }, + { + "epoch": 1.3679295154185023, + "grad_norm": 1.5661461645683938, + "learning_rate": 4.98823107488909e-06, + "loss": 0.5686253309249878, + "step": 3882 + }, + { + "epoch": 1.3682819383259912, + "grad_norm": 1.9697672783538545, + "learning_rate": 4.983190084139452e-06, + "loss": 0.6128156185150146, + "step": 3883 + }, + { + "epoch": 1.3686343612334801, + "grad_norm": 1.9331016188284555, + "learning_rate": 4.978150796440656e-06, + "loss": 0.6849625110626221, + "step": 3884 + }, + { + "epoch": 1.368986784140969, + "grad_norm": 1.5986771035358114, + "learning_rate": 4.973113213503379e-06, + "loss": 0.5735955238342285, + "step": 3885 + }, + { + "epoch": 1.369339207048458, + "grad_norm": 1.6049593584012303, + "learning_rate": 4.968077337037724e-06, + "loss": 0.4584425091743469, + "step": 3886 + }, + { + "epoch": 1.3696916299559472, + "grad_norm": 1.9525312670752564, + "learning_rate": 4.963043168753212e-06, + "loss": 0.547109067440033, + "step": 3887 + }, + { + "epoch": 1.3700440528634361, + "grad_norm": 2.113357180829694, + "learning_rate": 4.9580107103587895e-06, + "loss": 0.6966128349304199, + "step": 3888 + }, + { + "epoch": 1.370396475770925, + "grad_norm": 1.7817002019358994, + "learning_rate": 4.952979963562814e-06, + "loss": 0.6275819540023804, + "step": 3889 + }, + { + "epoch": 1.3707488986784142, + "grad_norm": 1.6096829752005641, + "learning_rate": 4.94795093007308e-06, + "loss": 0.5678467750549316, + "step": 3890 + }, + { + "epoch": 1.3711013215859031, + "grad_norm": 1.8874234747665013, + "learning_rate": 4.942923611596772e-06, + "loss": 0.6516115665435791, + "step": 3891 + }, + { + "epoch": 1.371453744493392, + "grad_norm": 1.8638529672264463, + "learning_rate": 4.937898009840518e-06, + "loss": 0.6279621124267578, + "step": 3892 + }, + { + "epoch": 1.371806167400881, + "grad_norm": 1.6187117518672614, + "learning_rate": 4.932874126510353e-06, + "loss": 0.6123322248458862, + "step": 3893 + }, + { + "epoch": 1.37215859030837, + "grad_norm": 1.6259761787603553, + "learning_rate": 4.927851963311726e-06, + "loss": 0.43412432074546814, + "step": 3894 + }, + { + "epoch": 1.372511013215859, + "grad_norm": 1.859998329311036, + "learning_rate": 4.922831521949507e-06, + "loss": 0.6582022905349731, + "step": 3895 + }, + { + "epoch": 1.372863436123348, + "grad_norm": 1.8966645456702385, + "learning_rate": 4.917812804127976e-06, + "loss": 0.6219466328620911, + "step": 3896 + }, + { + "epoch": 1.373215859030837, + "grad_norm": 2.056798959647299, + "learning_rate": 4.9127958115508365e-06, + "loss": 0.5352981090545654, + "step": 3897 + }, + { + "epoch": 1.373568281938326, + "grad_norm": 1.5240218181276974, + "learning_rate": 4.907780545921205e-06, + "loss": 0.47646182775497437, + "step": 3898 + }, + { + "epoch": 1.373920704845815, + "grad_norm": 1.6949945802187276, + "learning_rate": 4.902767008941594e-06, + "loss": 0.5335453748703003, + "step": 3899 + }, + { + "epoch": 1.374273127753304, + "grad_norm": 1.7931951401372748, + "learning_rate": 4.897755202313954e-06, + "loss": 0.576435923576355, + "step": 3900 + }, + { + "epoch": 1.374625550660793, + "grad_norm": 1.6675338707159029, + "learning_rate": 4.8927451277396365e-06, + "loss": 0.533431887626648, + "step": 3901 + }, + { + "epoch": 1.3749779735682819, + "grad_norm": 1.7439550653197133, + "learning_rate": 4.8877367869194035e-06, + "loss": 0.6892110109329224, + "step": 3902 + }, + { + "epoch": 1.3753303964757708, + "grad_norm": 1.9209875137364842, + "learning_rate": 4.8827301815534335e-06, + "loss": 0.7028052806854248, + "step": 3903 + }, + { + "epoch": 1.37568281938326, + "grad_norm": 1.8413166797931897, + "learning_rate": 4.877725313341306e-06, + "loss": 0.6883414387702942, + "step": 3904 + }, + { + "epoch": 1.376035242290749, + "grad_norm": 2.145518516472349, + "learning_rate": 4.8727221839820285e-06, + "loss": 0.6712944507598877, + "step": 3905 + }, + { + "epoch": 1.3763876651982379, + "grad_norm": 1.6297297090329885, + "learning_rate": 4.867720795174006e-06, + "loss": 0.6139085292816162, + "step": 3906 + }, + { + "epoch": 1.3767400881057268, + "grad_norm": 1.8425831405666082, + "learning_rate": 4.862721148615043e-06, + "loss": 0.6463953256607056, + "step": 3907 + }, + { + "epoch": 1.377092511013216, + "grad_norm": 1.768461759599311, + "learning_rate": 4.857723246002376e-06, + "loss": 0.6790587306022644, + "step": 3908 + }, + { + "epoch": 1.3774449339207049, + "grad_norm": 1.7177146369820009, + "learning_rate": 4.852727089032634e-06, + "loss": 0.4996854066848755, + "step": 3909 + }, + { + "epoch": 1.3777973568281938, + "grad_norm": 1.8098347886488457, + "learning_rate": 4.847732679401855e-06, + "loss": 0.5826590061187744, + "step": 3910 + }, + { + "epoch": 1.3781497797356828, + "grad_norm": 1.8997892974208295, + "learning_rate": 4.842740018805489e-06, + "loss": 0.5044558048248291, + "step": 3911 + }, + { + "epoch": 1.3785022026431717, + "grad_norm": 1.873679943847948, + "learning_rate": 4.837749108938381e-06, + "loss": 0.49022918939590454, + "step": 3912 + }, + { + "epoch": 1.3788546255506609, + "grad_norm": 1.9497488299017371, + "learning_rate": 4.832759951494798e-06, + "loss": 0.7034850120544434, + "step": 3913 + }, + { + "epoch": 1.3792070484581498, + "grad_norm": 1.8582811393472771, + "learning_rate": 4.827772548168408e-06, + "loss": 0.5835636854171753, + "step": 3914 + }, + { + "epoch": 1.3795594713656387, + "grad_norm": 1.8615896532434415, + "learning_rate": 4.822786900652262e-06, + "loss": 0.6000608205795288, + "step": 3915 + }, + { + "epoch": 1.3799118942731279, + "grad_norm": 2.003742345218382, + "learning_rate": 4.817803010638847e-06, + "loss": 0.6121091842651367, + "step": 3916 + }, + { + "epoch": 1.3802643171806168, + "grad_norm": 1.80308866184307, + "learning_rate": 4.812820879820034e-06, + "loss": 0.457197904586792, + "step": 3917 + }, + { + "epoch": 1.3806167400881058, + "grad_norm": 1.8962611537179284, + "learning_rate": 4.807840509887102e-06, + "loss": 0.6495843529701233, + "step": 3918 + }, + { + "epoch": 1.3809691629955947, + "grad_norm": 1.9212587769996015, + "learning_rate": 4.80286190253073e-06, + "loss": 0.6245059967041016, + "step": 3919 + }, + { + "epoch": 1.3813215859030836, + "grad_norm": 2.020688644956673, + "learning_rate": 4.797885059440998e-06, + "loss": 0.5648606419563293, + "step": 3920 + }, + { + "epoch": 1.3816740088105726, + "grad_norm": 1.93208096226899, + "learning_rate": 4.7929099823073945e-06, + "loss": 0.6593670845031738, + "step": 3921 + }, + { + "epoch": 1.3820264317180617, + "grad_norm": 1.8973564890389945, + "learning_rate": 4.787936672818807e-06, + "loss": 0.6400346159934998, + "step": 3922 + }, + { + "epoch": 1.3823788546255507, + "grad_norm": 1.8684904083901948, + "learning_rate": 4.782965132663505e-06, + "loss": 0.6042170524597168, + "step": 3923 + }, + { + "epoch": 1.3827312775330396, + "grad_norm": 1.8230700495851246, + "learning_rate": 4.777995363529184e-06, + "loss": 0.6224586963653564, + "step": 3924 + }, + { + "epoch": 1.3830837004405288, + "grad_norm": 2.09797321253942, + "learning_rate": 4.7730273671029235e-06, + "loss": 0.6944444179534912, + "step": 3925 + }, + { + "epoch": 1.3834361233480177, + "grad_norm": 1.976613089140818, + "learning_rate": 4.768061145071201e-06, + "loss": 0.5871950387954712, + "step": 3926 + }, + { + "epoch": 1.3837885462555066, + "grad_norm": 1.7713632438369786, + "learning_rate": 4.763096699119897e-06, + "loss": 0.6438909769058228, + "step": 3927 + }, + { + "epoch": 1.3841409691629956, + "grad_norm": 1.6141008005869943, + "learning_rate": 4.75813403093428e-06, + "loss": 0.6338443756103516, + "step": 3928 + }, + { + "epoch": 1.3844933920704845, + "grad_norm": 2.2680544531424753, + "learning_rate": 4.753173142199036e-06, + "loss": 0.6343874931335449, + "step": 3929 + }, + { + "epoch": 1.3848458149779734, + "grad_norm": 1.7233771229601555, + "learning_rate": 4.7482140345982174e-06, + "loss": 0.5383629202842712, + "step": 3930 + }, + { + "epoch": 1.3851982378854626, + "grad_norm": 1.8699549247596075, + "learning_rate": 4.743256709815289e-06, + "loss": 0.5365063548088074, + "step": 3931 + }, + { + "epoch": 1.3855506607929515, + "grad_norm": 2.2583515376147694, + "learning_rate": 4.738301169533116e-06, + "loss": 0.6310757398605347, + "step": 3932 + }, + { + "epoch": 1.3859030837004405, + "grad_norm": 2.1022070754037476, + "learning_rate": 4.733347415433946e-06, + "loss": 0.7609038949012756, + "step": 3933 + }, + { + "epoch": 1.3862555066079296, + "grad_norm": 2.174490642392946, + "learning_rate": 4.728395449199423e-06, + "loss": 0.5837516784667969, + "step": 3934 + }, + { + "epoch": 1.3866079295154186, + "grad_norm": 1.719340289699717, + "learning_rate": 4.7234452725105875e-06, + "loss": 0.6075407862663269, + "step": 3935 + }, + { + "epoch": 1.3869603524229075, + "grad_norm": 1.7651152509667416, + "learning_rate": 4.718496887047864e-06, + "loss": 0.5246843099594116, + "step": 3936 + }, + { + "epoch": 1.3873127753303964, + "grad_norm": 1.6874306455639787, + "learning_rate": 4.713550294491091e-06, + "loss": 0.6256884336471558, + "step": 3937 + }, + { + "epoch": 1.3876651982378854, + "grad_norm": 1.632156841956259, + "learning_rate": 4.708605496519467e-06, + "loss": 0.5039727687835693, + "step": 3938 + }, + { + "epoch": 1.3880176211453745, + "grad_norm": 2.0143508196146196, + "learning_rate": 4.703662494811599e-06, + "loss": 0.5302769541740417, + "step": 3939 + }, + { + "epoch": 1.3883700440528635, + "grad_norm": 1.6358403288542849, + "learning_rate": 4.698721291045491e-06, + "loss": 0.654889702796936, + "step": 3940 + }, + { + "epoch": 1.3887224669603524, + "grad_norm": 1.8724260838054423, + "learning_rate": 4.693781886898521e-06, + "loss": 0.5571156740188599, + "step": 3941 + }, + { + "epoch": 1.3890748898678413, + "grad_norm": 1.8352093678478665, + "learning_rate": 4.688844284047466e-06, + "loss": 0.489155113697052, + "step": 3942 + }, + { + "epoch": 1.3894273127753305, + "grad_norm": 2.3056906716340793, + "learning_rate": 4.683908484168487e-06, + "loss": 0.6422649621963501, + "step": 3943 + }, + { + "epoch": 1.3897797356828194, + "grad_norm": 2.1056674936107345, + "learning_rate": 4.67897448893713e-06, + "loss": 0.6800041794776917, + "step": 3944 + }, + { + "epoch": 1.3901321585903084, + "grad_norm": 1.9512416893069657, + "learning_rate": 4.674042300028345e-06, + "loss": 0.6091655492782593, + "step": 3945 + }, + { + "epoch": 1.3904845814977973, + "grad_norm": 1.5832960247380383, + "learning_rate": 4.669111919116442e-06, + "loss": 0.6217864751815796, + "step": 3946 + }, + { + "epoch": 1.3908370044052862, + "grad_norm": 1.9328669999328483, + "learning_rate": 4.664183347875144e-06, + "loss": 0.6140862703323364, + "step": 3947 + }, + { + "epoch": 1.3911894273127754, + "grad_norm": 1.5467868836495022, + "learning_rate": 4.659256587977542e-06, + "loss": 0.5485835075378418, + "step": 3948 + }, + { + "epoch": 1.3915418502202643, + "grad_norm": 1.9704789330010746, + "learning_rate": 4.654331641096118e-06, + "loss": 0.642849862575531, + "step": 3949 + }, + { + "epoch": 1.3918942731277533, + "grad_norm": 3.421035640959237, + "learning_rate": 4.649408508902739e-06, + "loss": 0.7084407806396484, + "step": 3950 + }, + { + "epoch": 1.3922466960352424, + "grad_norm": 1.780782004302536, + "learning_rate": 4.644487193068653e-06, + "loss": 0.4798510670661926, + "step": 3951 + }, + { + "epoch": 1.3925991189427314, + "grad_norm": 2.0571809281532056, + "learning_rate": 4.639567695264493e-06, + "loss": 0.6350974440574646, + "step": 3952 + }, + { + "epoch": 1.3929515418502203, + "grad_norm": 1.6636780012798107, + "learning_rate": 4.634650017160285e-06, + "loss": 0.6046940684318542, + "step": 3953 + }, + { + "epoch": 1.3933039647577092, + "grad_norm": 1.8656342511774384, + "learning_rate": 4.629734160425412e-06, + "loss": 0.5262438058853149, + "step": 3954 + }, + { + "epoch": 1.3936563876651982, + "grad_norm": 1.6602375526420536, + "learning_rate": 4.6248201267286666e-06, + "loss": 0.4836997985839844, + "step": 3955 + }, + { + "epoch": 1.394008810572687, + "grad_norm": 1.8387545975251456, + "learning_rate": 4.619907917738206e-06, + "loss": 0.5491573810577393, + "step": 3956 + }, + { + "epoch": 1.3943612334801763, + "grad_norm": 1.7103638500009937, + "learning_rate": 4.614997535121574e-06, + "loss": 0.5778772830963135, + "step": 3957 + }, + { + "epoch": 1.3947136563876652, + "grad_norm": 1.886204345973439, + "learning_rate": 4.61008898054569e-06, + "loss": 0.6235651969909668, + "step": 3958 + }, + { + "epoch": 1.3950660792951541, + "grad_norm": 1.533461324415723, + "learning_rate": 4.605182255676857e-06, + "loss": 0.5192956924438477, + "step": 3959 + }, + { + "epoch": 1.3954185022026433, + "grad_norm": 1.6490801359766816, + "learning_rate": 4.600277362180753e-06, + "loss": 0.5652563571929932, + "step": 3960 + }, + { + "epoch": 1.3957709251101322, + "grad_norm": 2.0491508628562594, + "learning_rate": 4.595374301722445e-06, + "loss": 0.6451884508132935, + "step": 3961 + }, + { + "epoch": 1.3961233480176212, + "grad_norm": 1.6267669051180629, + "learning_rate": 4.5904730759663555e-06, + "loss": 0.6358006000518799, + "step": 3962 + }, + { + "epoch": 1.39647577092511, + "grad_norm": 1.9868299068304147, + "learning_rate": 4.5855736865763104e-06, + "loss": 0.6122751832008362, + "step": 3963 + }, + { + "epoch": 1.396828193832599, + "grad_norm": 1.6563994945684704, + "learning_rate": 4.580676135215495e-06, + "loss": 0.5563797354698181, + "step": 3964 + }, + { + "epoch": 1.397180616740088, + "grad_norm": 1.7043306637307543, + "learning_rate": 4.575780423546476e-06, + "loss": 0.5915960669517517, + "step": 3965 + }, + { + "epoch": 1.3975330396475771, + "grad_norm": 2.2793683384994363, + "learning_rate": 4.570886553231196e-06, + "loss": 0.5755159854888916, + "step": 3966 + }, + { + "epoch": 1.397885462555066, + "grad_norm": 1.713166792254198, + "learning_rate": 4.565994525930967e-06, + "loss": 0.7017625570297241, + "step": 3967 + }, + { + "epoch": 1.398237885462555, + "grad_norm": 1.901331269180062, + "learning_rate": 4.5611043433064875e-06, + "loss": 0.7623441815376282, + "step": 3968 + }, + { + "epoch": 1.3985903083700442, + "grad_norm": 1.772343766995311, + "learning_rate": 4.556216007017822e-06, + "loss": 0.5561864376068115, + "step": 3969 + }, + { + "epoch": 1.398942731277533, + "grad_norm": 1.7107369517825557, + "learning_rate": 4.5513295187243975e-06, + "loss": 0.516582727432251, + "step": 3970 + }, + { + "epoch": 1.399295154185022, + "grad_norm": 1.6087287767761917, + "learning_rate": 4.5464448800850366e-06, + "loss": 0.6324976682662964, + "step": 3971 + }, + { + "epoch": 1.399647577092511, + "grad_norm": 1.660721417089598, + "learning_rate": 4.541562092757918e-06, + "loss": 0.5926251411437988, + "step": 3972 + }, + { + "epoch": 1.4, + "grad_norm": 1.7443423550845751, + "learning_rate": 4.536681158400598e-06, + "loss": 0.5677082538604736, + "step": 3973 + }, + { + "epoch": 1.400352422907489, + "grad_norm": 1.791823926745788, + "learning_rate": 4.531802078669997e-06, + "loss": 0.5267887115478516, + "step": 3974 + }, + { + "epoch": 1.400704845814978, + "grad_norm": 2.3840846637544617, + "learning_rate": 4.526924855222411e-06, + "loss": 0.6361796855926514, + "step": 3975 + }, + { + "epoch": 1.401057268722467, + "grad_norm": 1.9992656380929168, + "learning_rate": 4.522049489713513e-06, + "loss": 0.5906916856765747, + "step": 3976 + }, + { + "epoch": 1.4014096916299559, + "grad_norm": 1.932616358578933, + "learning_rate": 4.517175983798334e-06, + "loss": 0.647320568561554, + "step": 3977 + }, + { + "epoch": 1.401762114537445, + "grad_norm": 1.7297380971513312, + "learning_rate": 4.512304339131271e-06, + "loss": 0.6129240989685059, + "step": 3978 + }, + { + "epoch": 1.402114537444934, + "grad_norm": 1.8820056515419912, + "learning_rate": 4.507434557366106e-06, + "loss": 0.5550417900085449, + "step": 3979 + }, + { + "epoch": 1.402466960352423, + "grad_norm": 3.410101687197828, + "learning_rate": 4.502566640155972e-06, + "loss": 0.5677829384803772, + "step": 3980 + }, + { + "epoch": 1.4028193832599118, + "grad_norm": 2.037826582552855, + "learning_rate": 4.497700589153379e-06, + "loss": 0.6627114415168762, + "step": 3981 + }, + { + "epoch": 1.4031718061674008, + "grad_norm": 2.0278559165710197, + "learning_rate": 4.492836406010197e-06, + "loss": 0.7225712537765503, + "step": 3982 + }, + { + "epoch": 1.40352422907489, + "grad_norm": 1.6877243893704514, + "learning_rate": 4.487974092377661e-06, + "loss": 0.5259708762168884, + "step": 3983 + }, + { + "epoch": 1.4038766519823789, + "grad_norm": 1.930838228409862, + "learning_rate": 4.4831136499063856e-06, + "loss": 0.5509500503540039, + "step": 3984 + }, + { + "epoch": 1.4042290748898678, + "grad_norm": 1.862328702111506, + "learning_rate": 4.478255080246338e-06, + "loss": 0.5436242818832397, + "step": 3985 + }, + { + "epoch": 1.4045814977973567, + "grad_norm": 1.9252586062101578, + "learning_rate": 4.473398385046839e-06, + "loss": 0.591008186340332, + "step": 3986 + }, + { + "epoch": 1.404933920704846, + "grad_norm": 1.8551590253300663, + "learning_rate": 4.4685435659565975e-06, + "loss": 0.7463438510894775, + "step": 3987 + }, + { + "epoch": 1.4052863436123348, + "grad_norm": 2.7212267236094445, + "learning_rate": 4.46369062462367e-06, + "loss": 0.5672414898872375, + "step": 3988 + }, + { + "epoch": 1.4056387665198238, + "grad_norm": 1.9023461618951703, + "learning_rate": 4.458839562695481e-06, + "loss": 0.6022762060165405, + "step": 3989 + }, + { + "epoch": 1.4059911894273127, + "grad_norm": 2.975414442801074, + "learning_rate": 4.453990381818811e-06, + "loss": 0.8312792181968689, + "step": 3990 + }, + { + "epoch": 1.4063436123348017, + "grad_norm": 1.5291152049255947, + "learning_rate": 4.4491430836398055e-06, + "loss": 0.475655198097229, + "step": 3991 + }, + { + "epoch": 1.4066960352422908, + "grad_norm": 2.205738960261052, + "learning_rate": 4.444297669803981e-06, + "loss": 0.6317172050476074, + "step": 3992 + }, + { + "epoch": 1.4070484581497797, + "grad_norm": 1.7590033801874774, + "learning_rate": 4.439454141956194e-06, + "loss": 0.5412036180496216, + "step": 3993 + }, + { + "epoch": 1.4074008810572687, + "grad_norm": 1.8534848369039538, + "learning_rate": 4.434612501740671e-06, + "loss": 0.6401170492172241, + "step": 3994 + }, + { + "epoch": 1.4077533039647578, + "grad_norm": 1.6819739888663638, + "learning_rate": 4.429772750801007e-06, + "loss": 0.6175628900527954, + "step": 3995 + }, + { + "epoch": 1.4081057268722468, + "grad_norm": 1.9863542351176011, + "learning_rate": 4.424934890780142e-06, + "loss": 0.6875946521759033, + "step": 3996 + }, + { + "epoch": 1.4084581497797357, + "grad_norm": 1.6357928529424866, + "learning_rate": 4.420098923320378e-06, + "loss": 0.6404017210006714, + "step": 3997 + }, + { + "epoch": 1.4088105726872246, + "grad_norm": 2.096371594852834, + "learning_rate": 4.415264850063378e-06, + "loss": 0.7569783329963684, + "step": 3998 + }, + { + "epoch": 1.4091629955947136, + "grad_norm": 1.9373448832520324, + "learning_rate": 4.410432672650154e-06, + "loss": 0.6125228404998779, + "step": 3999 + }, + { + "epoch": 1.4095154185022025, + "grad_norm": 1.8206271046178746, + "learning_rate": 4.405602392721091e-06, + "loss": 0.6187582015991211, + "step": 4000 + }, + { + "epoch": 1.4098678414096917, + "grad_norm": 1.6622405329305723, + "learning_rate": 4.400774011915907e-06, + "loss": 0.6086148023605347, + "step": 4001 + }, + { + "epoch": 1.4102202643171806, + "grad_norm": 1.4174012456939833, + "learning_rate": 4.3959475318736885e-06, + "loss": 0.4140232801437378, + "step": 4002 + }, + { + "epoch": 1.4105726872246696, + "grad_norm": 1.836512159334361, + "learning_rate": 4.391122954232883e-06, + "loss": 0.5065237879753113, + "step": 4003 + }, + { + "epoch": 1.4109251101321587, + "grad_norm": 1.458932644295331, + "learning_rate": 4.386300280631279e-06, + "loss": 0.4817734658718109, + "step": 4004 + }, + { + "epoch": 1.4112775330396476, + "grad_norm": 1.6662288245729417, + "learning_rate": 4.381479512706025e-06, + "loss": 0.6339706778526306, + "step": 4005 + }, + { + "epoch": 1.4116299559471366, + "grad_norm": 2.1459595089971653, + "learning_rate": 4.376660652093621e-06, + "loss": 0.6581720113754272, + "step": 4006 + }, + { + "epoch": 1.4119823788546255, + "grad_norm": 2.1052256395432503, + "learning_rate": 4.3718437004299174e-06, + "loss": 0.722156286239624, + "step": 4007 + }, + { + "epoch": 1.4123348017621145, + "grad_norm": 2.007137048045836, + "learning_rate": 4.36702865935013e-06, + "loss": 0.5262913703918457, + "step": 4008 + }, + { + "epoch": 1.4126872246696034, + "grad_norm": 1.6239575731802327, + "learning_rate": 4.362215530488805e-06, + "loss": 0.6242132186889648, + "step": 4009 + }, + { + "epoch": 1.4130396475770926, + "grad_norm": 1.6412038783326008, + "learning_rate": 4.35740431547985e-06, + "loss": 0.48776593804359436, + "step": 4010 + }, + { + "epoch": 1.4133920704845815, + "grad_norm": 1.4539922592281447, + "learning_rate": 4.352595015956528e-06, + "loss": 0.5528746843338013, + "step": 4011 + }, + { + "epoch": 1.4137444933920704, + "grad_norm": 1.881555645901769, + "learning_rate": 4.347787633551444e-06, + "loss": 0.6282942295074463, + "step": 4012 + }, + { + "epoch": 1.4140969162995596, + "grad_norm": 1.997464157113011, + "learning_rate": 4.342982169896555e-06, + "loss": 0.6113284826278687, + "step": 4013 + }, + { + "epoch": 1.4144493392070485, + "grad_norm": 1.696170493669202, + "learning_rate": 4.3381786266231685e-06, + "loss": 0.5756875872612, + "step": 4014 + }, + { + "epoch": 1.4148017621145375, + "grad_norm": 1.8012350757266906, + "learning_rate": 4.333377005361931e-06, + "loss": 0.6180154085159302, + "step": 4015 + }, + { + "epoch": 1.4151541850220264, + "grad_norm": 2.2454634074572146, + "learning_rate": 4.328577307742855e-06, + "loss": 0.5728827118873596, + "step": 4016 + }, + { + "epoch": 1.4155066079295153, + "grad_norm": 1.7928891595746113, + "learning_rate": 4.323779535395278e-06, + "loss": 0.5248062014579773, + "step": 4017 + }, + { + "epoch": 1.4158590308370045, + "grad_norm": 1.7454680737255013, + "learning_rate": 4.318983689947895e-06, + "loss": 0.5938228368759155, + "step": 4018 + }, + { + "epoch": 1.4162114537444934, + "grad_norm": 1.8931460456480809, + "learning_rate": 4.3141897730287544e-06, + "loss": 0.7085045576095581, + "step": 4019 + }, + { + "epoch": 1.4165638766519824, + "grad_norm": 2.566425134177144, + "learning_rate": 4.309397786265235e-06, + "loss": 0.599969744682312, + "step": 4020 + }, + { + "epoch": 1.4169162995594713, + "grad_norm": 2.186511304730039, + "learning_rate": 4.30460773128407e-06, + "loss": 0.5784738063812256, + "step": 4021 + }, + { + "epoch": 1.4172687224669605, + "grad_norm": 1.8802349185240168, + "learning_rate": 4.299819609711332e-06, + "loss": 0.6492793560028076, + "step": 4022 + }, + { + "epoch": 1.4176211453744494, + "grad_norm": 1.6886854891683005, + "learning_rate": 4.2950334231724375e-06, + "loss": 0.6690749526023865, + "step": 4023 + }, + { + "epoch": 1.4179735682819383, + "grad_norm": 1.8482135160791267, + "learning_rate": 4.290249173292158e-06, + "loss": 0.5919139981269836, + "step": 4024 + }, + { + "epoch": 1.4183259911894273, + "grad_norm": 1.6202611135629348, + "learning_rate": 4.285466861694583e-06, + "loss": 0.5661630630493164, + "step": 4025 + }, + { + "epoch": 1.4186784140969162, + "grad_norm": 1.7328062744712673, + "learning_rate": 4.280686490003169e-06, + "loss": 0.547730565071106, + "step": 4026 + }, + { + "epoch": 1.4190308370044054, + "grad_norm": 1.7270546788274348, + "learning_rate": 4.2759080598406985e-06, + "loss": 0.6150445938110352, + "step": 4027 + }, + { + "epoch": 1.4193832599118943, + "grad_norm": 2.048539568947664, + "learning_rate": 4.271131572829303e-06, + "loss": 0.6522917747497559, + "step": 4028 + }, + { + "epoch": 1.4197356828193832, + "grad_norm": 1.952118534937186, + "learning_rate": 4.266357030590449e-06, + "loss": 0.8456230163574219, + "step": 4029 + }, + { + "epoch": 1.4200881057268724, + "grad_norm": 1.810792149813479, + "learning_rate": 4.261584434744945e-06, + "loss": 0.6059526801109314, + "step": 4030 + }, + { + "epoch": 1.4204405286343613, + "grad_norm": 1.8213808222910857, + "learning_rate": 4.256813786912937e-06, + "loss": 0.6289907693862915, + "step": 4031 + }, + { + "epoch": 1.4207929515418503, + "grad_norm": 1.5510911353998291, + "learning_rate": 4.252045088713919e-06, + "loss": 0.48954638838768005, + "step": 4032 + }, + { + "epoch": 1.4211453744493392, + "grad_norm": 2.020061779490103, + "learning_rate": 4.2472783417667055e-06, + "loss": 0.6999461054801941, + "step": 4033 + }, + { + "epoch": 1.4214977973568281, + "grad_norm": 1.9629261898681878, + "learning_rate": 4.242513547689466e-06, + "loss": 0.5610899925231934, + "step": 4034 + }, + { + "epoch": 1.421850220264317, + "grad_norm": 1.8415242379631616, + "learning_rate": 4.237750708099697e-06, + "loss": 0.6240172386169434, + "step": 4035 + }, + { + "epoch": 1.4222026431718062, + "grad_norm": 1.887889822972652, + "learning_rate": 4.2329898246142356e-06, + "loss": 0.6368240118026733, + "step": 4036 + }, + { + "epoch": 1.4225550660792952, + "grad_norm": 2.0839652521207483, + "learning_rate": 4.228230898849253e-06, + "loss": 0.6242600679397583, + "step": 4037 + }, + { + "epoch": 1.422907488986784, + "grad_norm": 1.7622749957844728, + "learning_rate": 4.223473932420255e-06, + "loss": 0.6446138620376587, + "step": 4038 + }, + { + "epoch": 1.4232599118942733, + "grad_norm": 1.8800444061446486, + "learning_rate": 4.218718926942081e-06, + "loss": 0.7108229398727417, + "step": 4039 + }, + { + "epoch": 1.4236123348017622, + "grad_norm": 1.7917659431298882, + "learning_rate": 4.213965884028919e-06, + "loss": 0.5279660820960999, + "step": 4040 + }, + { + "epoch": 1.4239647577092511, + "grad_norm": 1.7747691200912903, + "learning_rate": 4.209214805294264e-06, + "loss": 0.6422853469848633, + "step": 4041 + }, + { + "epoch": 1.42431718061674, + "grad_norm": 1.848339978168105, + "learning_rate": 4.2044656923509704e-06, + "loss": 0.6355602741241455, + "step": 4042 + }, + { + "epoch": 1.424669603524229, + "grad_norm": 1.7787421175687093, + "learning_rate": 4.19971854681121e-06, + "loss": 0.5351370573043823, + "step": 4043 + }, + { + "epoch": 1.425022026431718, + "grad_norm": 2.0300248809256987, + "learning_rate": 4.194973370286492e-06, + "loss": 0.5573978424072266, + "step": 4044 + }, + { + "epoch": 1.425374449339207, + "grad_norm": 1.9433750628346875, + "learning_rate": 4.1902301643876555e-06, + "loss": 0.5865412950515747, + "step": 4045 + }, + { + "epoch": 1.425726872246696, + "grad_norm": 2.102324249123369, + "learning_rate": 4.185488930724868e-06, + "loss": 0.6231919527053833, + "step": 4046 + }, + { + "epoch": 1.426079295154185, + "grad_norm": 1.7135783989067233, + "learning_rate": 4.180749670907638e-06, + "loss": 0.48964112997055054, + "step": 4047 + }, + { + "epoch": 1.4264317180616741, + "grad_norm": 2.0973459527664686, + "learning_rate": 4.176012386544796e-06, + "loss": 0.6299121975898743, + "step": 4048 + }, + { + "epoch": 1.426784140969163, + "grad_norm": 1.7239115182277114, + "learning_rate": 4.171277079244492e-06, + "loss": 0.5612789392471313, + "step": 4049 + }, + { + "epoch": 1.427136563876652, + "grad_norm": 1.7396324571675468, + "learning_rate": 4.166543750614227e-06, + "loss": 0.4315321147441864, + "step": 4050 + }, + { + "epoch": 1.427488986784141, + "grad_norm": 2.0031203112343094, + "learning_rate": 4.1618124022608146e-06, + "loss": 0.6300851702690125, + "step": 4051 + }, + { + "epoch": 1.4278414096916299, + "grad_norm": 1.7808675185736187, + "learning_rate": 4.1570830357904e-06, + "loss": 0.6258795261383057, + "step": 4052 + }, + { + "epoch": 1.4281938325991188, + "grad_norm": 1.9069027085637078, + "learning_rate": 4.152355652808457e-06, + "loss": 0.7364479303359985, + "step": 4053 + }, + { + "epoch": 1.428546255506608, + "grad_norm": 1.8474323145969993, + "learning_rate": 4.147630254919781e-06, + "loss": 0.44845038652420044, + "step": 4054 + }, + { + "epoch": 1.428898678414097, + "grad_norm": 1.6823890398766386, + "learning_rate": 4.142906843728504e-06, + "loss": 0.516815185546875, + "step": 4055 + }, + { + "epoch": 1.4292511013215858, + "grad_norm": 1.6276914964492604, + "learning_rate": 4.138185420838079e-06, + "loss": 0.6296960711479187, + "step": 4056 + }, + { + "epoch": 1.429603524229075, + "grad_norm": 1.728227730408027, + "learning_rate": 4.133465987851269e-06, + "loss": 0.5709103345870972, + "step": 4057 + }, + { + "epoch": 1.429955947136564, + "grad_norm": 1.7709951139170081, + "learning_rate": 4.128748546370186e-06, + "loss": 0.5672547817230225, + "step": 4058 + }, + { + "epoch": 1.4303083700440529, + "grad_norm": 1.8161591736426752, + "learning_rate": 4.124033097996252e-06, + "loss": 0.5927014350891113, + "step": 4059 + }, + { + "epoch": 1.4306607929515418, + "grad_norm": 1.75056683772296, + "learning_rate": 4.119319644330214e-06, + "loss": 0.7021238803863525, + "step": 4060 + }, + { + "epoch": 1.4310132158590307, + "grad_norm": 1.7949933259038664, + "learning_rate": 4.114608186972143e-06, + "loss": 0.5940784215927124, + "step": 4061 + }, + { + "epoch": 1.43136563876652, + "grad_norm": 1.7958424742287702, + "learning_rate": 4.109898727521429e-06, + "loss": 0.46511122584342957, + "step": 4062 + }, + { + "epoch": 1.4317180616740088, + "grad_norm": 1.7489789285307085, + "learning_rate": 4.105191267576797e-06, + "loss": 0.4710976481437683, + "step": 4063 + }, + { + "epoch": 1.4320704845814978, + "grad_norm": 1.650142742870973, + "learning_rate": 4.100485808736273e-06, + "loss": 0.5947977900505066, + "step": 4064 + }, + { + "epoch": 1.4324229074889867, + "grad_norm": 1.7620222249444284, + "learning_rate": 4.095782352597214e-06, + "loss": 0.6312115788459778, + "step": 4065 + }, + { + "epoch": 1.4327753303964759, + "grad_norm": 1.7689711305484843, + "learning_rate": 4.091080900756303e-06, + "loss": 0.5709977149963379, + "step": 4066 + }, + { + "epoch": 1.4331277533039648, + "grad_norm": 1.8903042666510779, + "learning_rate": 4.086381454809535e-06, + "loss": 0.6183716058731079, + "step": 4067 + }, + { + "epoch": 1.4334801762114537, + "grad_norm": 1.8677159370638265, + "learning_rate": 4.081684016352223e-06, + "loss": 0.5139745473861694, + "step": 4068 + }, + { + "epoch": 1.4338325991189427, + "grad_norm": 1.9141879794373917, + "learning_rate": 4.076988586979004e-06, + "loss": 0.6611173152923584, + "step": 4069 + }, + { + "epoch": 1.4341850220264316, + "grad_norm": 1.7276457807578136, + "learning_rate": 4.072295168283824e-06, + "loss": 0.616943359375, + "step": 4070 + }, + { + "epoch": 1.4345374449339208, + "grad_norm": 2.331740237042665, + "learning_rate": 4.067603761859965e-06, + "loss": 0.5388625264167786, + "step": 4071 + }, + { + "epoch": 1.4348898678414097, + "grad_norm": 1.9571975377572324, + "learning_rate": 4.062914369300002e-06, + "loss": 0.5523884892463684, + "step": 4072 + }, + { + "epoch": 1.4352422907488986, + "grad_norm": 1.8860165198416616, + "learning_rate": 4.058226992195838e-06, + "loss": 0.5610285997390747, + "step": 4073 + }, + { + "epoch": 1.4355947136563878, + "grad_norm": 1.8522832262316333, + "learning_rate": 4.0535416321387e-06, + "loss": 0.583917498588562, + "step": 4074 + }, + { + "epoch": 1.4359471365638767, + "grad_norm": 1.677482186323321, + "learning_rate": 4.048858290719115e-06, + "loss": 0.6025276184082031, + "step": 4075 + }, + { + "epoch": 1.4362995594713657, + "grad_norm": 1.8037188167117204, + "learning_rate": 4.044176969526936e-06, + "loss": 0.5643888711929321, + "step": 4076 + }, + { + "epoch": 1.4366519823788546, + "grad_norm": 1.709713655992042, + "learning_rate": 4.0394976701513235e-06, + "loss": 0.550167977809906, + "step": 4077 + }, + { + "epoch": 1.4370044052863435, + "grad_norm": 2.1319034629476747, + "learning_rate": 4.034820394180749e-06, + "loss": 0.6182876825332642, + "step": 4078 + }, + { + "epoch": 1.4373568281938325, + "grad_norm": 2.018408244379198, + "learning_rate": 4.030145143203016e-06, + "loss": 0.5197434425354004, + "step": 4079 + }, + { + "epoch": 1.4377092511013216, + "grad_norm": 2.037308833831004, + "learning_rate": 4.025471918805214e-06, + "loss": 0.5351034998893738, + "step": 4080 + }, + { + "epoch": 1.4380616740088106, + "grad_norm": 1.988655670021041, + "learning_rate": 4.020800722573758e-06, + "loss": 0.5576729774475098, + "step": 4081 + }, + { + "epoch": 1.4384140969162995, + "grad_norm": 2.03830396836609, + "learning_rate": 4.016131556094381e-06, + "loss": 0.5557611584663391, + "step": 4082 + }, + { + "epoch": 1.4387665198237887, + "grad_norm": 1.6841558782049018, + "learning_rate": 4.011464420952115e-06, + "loss": 0.5300010442733765, + "step": 4083 + }, + { + "epoch": 1.4391189427312776, + "grad_norm": 2.5196291624747387, + "learning_rate": 4.0067993187313085e-06, + "loss": 0.5254991054534912, + "step": 4084 + }, + { + "epoch": 1.4394713656387665, + "grad_norm": 1.9569129587138865, + "learning_rate": 4.002136251015617e-06, + "loss": 0.5044848322868347, + "step": 4085 + }, + { + "epoch": 1.4398237885462555, + "grad_norm": 1.7587820286029368, + "learning_rate": 3.997475219388005e-06, + "loss": 0.6422302722930908, + "step": 4086 + }, + { + "epoch": 1.4401762114537444, + "grad_norm": 1.7785161433093049, + "learning_rate": 3.992816225430758e-06, + "loss": 0.5502497553825378, + "step": 4087 + }, + { + "epoch": 1.4405286343612334, + "grad_norm": 1.9272648866171629, + "learning_rate": 3.988159270725448e-06, + "loss": 0.7479537129402161, + "step": 4088 + }, + { + "epoch": 1.4408810572687225, + "grad_norm": 1.8882665464741835, + "learning_rate": 3.983504356852967e-06, + "loss": 0.5418091416358948, + "step": 4089 + }, + { + "epoch": 1.4412334801762114, + "grad_norm": 2.1909054908738805, + "learning_rate": 3.978851485393519e-06, + "loss": 0.5262568593025208, + "step": 4090 + }, + { + "epoch": 1.4415859030837004, + "grad_norm": 1.7855475608149034, + "learning_rate": 3.974200657926607e-06, + "loss": 0.5419692397117615, + "step": 4091 + }, + { + "epoch": 1.4419383259911895, + "grad_norm": 1.84908442821801, + "learning_rate": 3.9695518760310425e-06, + "loss": 0.5202164649963379, + "step": 4092 + }, + { + "epoch": 1.4422907488986785, + "grad_norm": 1.6256093479781946, + "learning_rate": 3.96490514128494e-06, + "loss": 0.7232608795166016, + "step": 4093 + }, + { + "epoch": 1.4426431718061674, + "grad_norm": 3.2107784732452473, + "learning_rate": 3.960260455265721e-06, + "loss": 0.5899156332015991, + "step": 4094 + }, + { + "epoch": 1.4429955947136563, + "grad_norm": 1.9995831956411032, + "learning_rate": 3.95561781955012e-06, + "loss": 0.629068911075592, + "step": 4095 + }, + { + "epoch": 1.4433480176211453, + "grad_norm": 1.9520751138167456, + "learning_rate": 3.950977235714154e-06, + "loss": 0.5584920644760132, + "step": 4096 + }, + { + "epoch": 1.4437004405286344, + "grad_norm": 1.7280125643736322, + "learning_rate": 3.9463387053331685e-06, + "loss": 0.713936984539032, + "step": 4097 + }, + { + "epoch": 1.4440528634361234, + "grad_norm": 2.7226452019662357, + "learning_rate": 3.9417022299817944e-06, + "loss": 0.7157076001167297, + "step": 4098 + }, + { + "epoch": 1.4444052863436123, + "grad_norm": 1.940369638230087, + "learning_rate": 3.937067811233972e-06, + "loss": 0.6540844440460205, + "step": 4099 + }, + { + "epoch": 1.4447577092511013, + "grad_norm": 1.6342043838390767, + "learning_rate": 3.9324354506629425e-06, + "loss": 0.5350022315979004, + "step": 4100 + }, + { + "epoch": 1.4451101321585904, + "grad_norm": 1.9186113150470587, + "learning_rate": 3.9278051498412475e-06, + "loss": 0.6852695941925049, + "step": 4101 + }, + { + "epoch": 1.4454625550660793, + "grad_norm": 1.8060312138879744, + "learning_rate": 3.923176910340728e-06, + "loss": 0.6059536337852478, + "step": 4102 + }, + { + "epoch": 1.4458149779735683, + "grad_norm": 1.6721278909458728, + "learning_rate": 3.918550733732536e-06, + "loss": 0.5787979364395142, + "step": 4103 + }, + { + "epoch": 1.4461674008810572, + "grad_norm": 1.8059605647431092, + "learning_rate": 3.9139266215871e-06, + "loss": 0.6068835258483887, + "step": 4104 + }, + { + "epoch": 1.4465198237885462, + "grad_norm": 1.7804694224195132, + "learning_rate": 3.909304575474175e-06, + "loss": 0.5123663544654846, + "step": 4105 + }, + { + "epoch": 1.4468722466960353, + "grad_norm": 1.832785857954117, + "learning_rate": 3.9046845969627975e-06, + "loss": 0.6285420656204224, + "step": 4106 + }, + { + "epoch": 1.4472246696035242, + "grad_norm": 1.8029701329975896, + "learning_rate": 3.9000666876213056e-06, + "loss": 0.6186035871505737, + "step": 4107 + }, + { + "epoch": 1.4475770925110132, + "grad_norm": 2.8121411727628174, + "learning_rate": 3.895450849017336e-06, + "loss": 0.6222661733627319, + "step": 4108 + }, + { + "epoch": 1.4479295154185021, + "grad_norm": 1.7965214044078308, + "learning_rate": 3.890837082717822e-06, + "loss": 0.5889515280723572, + "step": 4109 + }, + { + "epoch": 1.4482819383259913, + "grad_norm": 1.8839124618745182, + "learning_rate": 3.8862253902889925e-06, + "loss": 0.6160309314727783, + "step": 4110 + }, + { + "epoch": 1.4486343612334802, + "grad_norm": 1.7651875927016676, + "learning_rate": 3.881615773296381e-06, + "loss": 0.48093074560165405, + "step": 4111 + }, + { + "epoch": 1.4489867841409692, + "grad_norm": 1.8283039880345147, + "learning_rate": 3.877008233304796e-06, + "loss": 0.5851131677627563, + "step": 4112 + }, + { + "epoch": 1.449339207048458, + "grad_norm": 1.7366010221761805, + "learning_rate": 3.872402771878365e-06, + "loss": 0.5322093963623047, + "step": 4113 + }, + { + "epoch": 1.449691629955947, + "grad_norm": 1.7342840660368584, + "learning_rate": 3.8677993905804956e-06, + "loss": 0.652804970741272, + "step": 4114 + }, + { + "epoch": 1.4500440528634362, + "grad_norm": 1.9583669696557284, + "learning_rate": 3.863198090973891e-06, + "loss": 0.5494996309280396, + "step": 4115 + }, + { + "epoch": 1.4503964757709251, + "grad_norm": 1.910811405312081, + "learning_rate": 3.8585988746205505e-06, + "loss": 0.5641331672668457, + "step": 4116 + }, + { + "epoch": 1.450748898678414, + "grad_norm": 1.7616537450177998, + "learning_rate": 3.854001743081764e-06, + "loss": 0.5415998697280884, + "step": 4117 + }, + { + "epoch": 1.4511013215859032, + "grad_norm": 1.599490372210091, + "learning_rate": 3.849406697918113e-06, + "loss": 0.4399813711643219, + "step": 4118 + }, + { + "epoch": 1.4514537444933922, + "grad_norm": 2.0642862733318115, + "learning_rate": 3.84481374068948e-06, + "loss": 0.6228655576705933, + "step": 4119 + }, + { + "epoch": 1.451806167400881, + "grad_norm": 1.650547077673145, + "learning_rate": 3.8402228729550195e-06, + "loss": 0.5575108528137207, + "step": 4120 + }, + { + "epoch": 1.45215859030837, + "grad_norm": 2.4780057667058704, + "learning_rate": 3.835634096273197e-06, + "loss": 0.5705434679985046, + "step": 4121 + }, + { + "epoch": 1.452511013215859, + "grad_norm": 2.1620556917486184, + "learning_rate": 3.831047412201758e-06, + "loss": 0.5649456977844238, + "step": 4122 + }, + { + "epoch": 1.452863436123348, + "grad_norm": 1.9734169166383557, + "learning_rate": 3.826462822297736e-06, + "loss": 0.5656554698944092, + "step": 4123 + }, + { + "epoch": 1.453215859030837, + "grad_norm": 1.8883507101257415, + "learning_rate": 3.82188032811746e-06, + "loss": 0.6565591096878052, + "step": 4124 + }, + { + "epoch": 1.453568281938326, + "grad_norm": 1.9823635297408013, + "learning_rate": 3.817299931216537e-06, + "loss": 0.6553423404693604, + "step": 4125 + }, + { + "epoch": 1.453920704845815, + "grad_norm": 1.8362785094722764, + "learning_rate": 3.812721633149883e-06, + "loss": 0.5401671528816223, + "step": 4126 + }, + { + "epoch": 1.454273127753304, + "grad_norm": 2.008049720412482, + "learning_rate": 3.808145435471674e-06, + "loss": 0.7275381088256836, + "step": 4127 + }, + { + "epoch": 1.454625550660793, + "grad_norm": 1.827455905179675, + "learning_rate": 3.80357133973539e-06, + "loss": 0.6384310722351074, + "step": 4128 + }, + { + "epoch": 1.454977973568282, + "grad_norm": 1.986935058055083, + "learning_rate": 3.7989993474937993e-06, + "loss": 0.7783250212669373, + "step": 4129 + }, + { + "epoch": 1.455330396475771, + "grad_norm": 2.1923612655628624, + "learning_rate": 3.7944294602989473e-06, + "loss": 0.752954363822937, + "step": 4130 + }, + { + "epoch": 1.4556828193832598, + "grad_norm": 1.801491937261316, + "learning_rate": 3.789861679702169e-06, + "loss": 0.6099411249160767, + "step": 4131 + }, + { + "epoch": 1.4560352422907488, + "grad_norm": 2.12230143233965, + "learning_rate": 3.7852960072540845e-06, + "loss": 0.6608012914657593, + "step": 4132 + }, + { + "epoch": 1.456387665198238, + "grad_norm": 1.836228759881875, + "learning_rate": 3.7807324445045924e-06, + "loss": 0.5119853615760803, + "step": 4133 + }, + { + "epoch": 1.4567400881057269, + "grad_norm": 2.036719543857632, + "learning_rate": 3.7761709930028923e-06, + "loss": 0.6353520750999451, + "step": 4134 + }, + { + "epoch": 1.4570925110132158, + "grad_norm": 1.9234147822597618, + "learning_rate": 3.7716116542974434e-06, + "loss": 0.6427614688873291, + "step": 4135 + }, + { + "epoch": 1.457444933920705, + "grad_norm": 2.34139645382815, + "learning_rate": 3.7670544299360003e-06, + "loss": 0.6205203533172607, + "step": 4136 + }, + { + "epoch": 1.457797356828194, + "grad_norm": 1.940401751978381, + "learning_rate": 3.7624993214656046e-06, + "loss": 0.5957762002944946, + "step": 4137 + }, + { + "epoch": 1.4581497797356828, + "grad_norm": 1.8842452122457418, + "learning_rate": 3.7579463304325714e-06, + "loss": 0.6698043346405029, + "step": 4138 + }, + { + "epoch": 1.4585022026431718, + "grad_norm": 1.825534553754035, + "learning_rate": 3.7533954583824982e-06, + "loss": 0.5947796106338501, + "step": 4139 + }, + { + "epoch": 1.4588546255506607, + "grad_norm": 1.7817965501913557, + "learning_rate": 3.7488467068602664e-06, + "loss": 0.5905463695526123, + "step": 4140 + }, + { + "epoch": 1.4592070484581499, + "grad_norm": 1.8530726474927524, + "learning_rate": 3.74430007741003e-06, + "loss": 0.6218722462654114, + "step": 4141 + }, + { + "epoch": 1.4595594713656388, + "grad_norm": 1.9872212615104103, + "learning_rate": 3.739755571575241e-06, + "loss": 0.6124013066291809, + "step": 4142 + }, + { + "epoch": 1.4599118942731277, + "grad_norm": 1.8897226451904012, + "learning_rate": 3.7352131908986046e-06, + "loss": 0.5816842317581177, + "step": 4143 + }, + { + "epoch": 1.4602643171806167, + "grad_norm": 1.780742815029414, + "learning_rate": 3.7306729369221197e-06, + "loss": 0.5225531458854675, + "step": 4144 + }, + { + "epoch": 1.4606167400881058, + "grad_norm": 1.5899946748394236, + "learning_rate": 3.7261348111870663e-06, + "loss": 0.4536696672439575, + "step": 4145 + }, + { + "epoch": 1.4609691629955948, + "grad_norm": 1.6530094281559282, + "learning_rate": 3.7215988152339968e-06, + "loss": 0.5777832269668579, + "step": 4146 + }, + { + "epoch": 1.4613215859030837, + "grad_norm": 2.0042576738233993, + "learning_rate": 3.717064950602737e-06, + "loss": 0.5964622497558594, + "step": 4147 + }, + { + "epoch": 1.4616740088105726, + "grad_norm": 1.634683701176406, + "learning_rate": 3.7125332188323937e-06, + "loss": 0.47224390506744385, + "step": 4148 + }, + { + "epoch": 1.4620264317180616, + "grad_norm": 2.0219703130043474, + "learning_rate": 3.708003621461347e-06, + "loss": 0.5989271402359009, + "step": 4149 + }, + { + "epoch": 1.4623788546255507, + "grad_norm": 1.7865027495889427, + "learning_rate": 3.7034761600272627e-06, + "loss": 0.6171919107437134, + "step": 4150 + }, + { + "epoch": 1.4627312775330397, + "grad_norm": 1.742831115289917, + "learning_rate": 3.6989508360670647e-06, + "loss": 0.7064418792724609, + "step": 4151 + }, + { + "epoch": 1.4630837004405286, + "grad_norm": 2.236539087690149, + "learning_rate": 3.6944276511169577e-06, + "loss": 0.6055941581726074, + "step": 4152 + }, + { + "epoch": 1.4634361233480178, + "grad_norm": 1.7433765587507288, + "learning_rate": 3.689906606712429e-06, + "loss": 0.4550645351409912, + "step": 4153 + }, + { + "epoch": 1.4637885462555067, + "grad_norm": 1.929810725161399, + "learning_rate": 3.68538770438823e-06, + "loss": 0.5958502292633057, + "step": 4154 + }, + { + "epoch": 1.4641409691629956, + "grad_norm": 2.057185852502653, + "learning_rate": 3.680870945678389e-06, + "loss": 0.5574297904968262, + "step": 4155 + }, + { + "epoch": 1.4644933920704846, + "grad_norm": 1.443537567568116, + "learning_rate": 3.676356332116202e-06, + "loss": 0.46494683623313904, + "step": 4156 + }, + { + "epoch": 1.4648458149779735, + "grad_norm": 1.7341220293452018, + "learning_rate": 3.671843865234238e-06, + "loss": 0.5549977421760559, + "step": 4157 + }, + { + "epoch": 1.4651982378854624, + "grad_norm": 1.7585158502615206, + "learning_rate": 3.6673335465643488e-06, + "loss": 0.5620779395103455, + "step": 4158 + }, + { + "epoch": 1.4655506607929516, + "grad_norm": 1.845448976603358, + "learning_rate": 3.662825377637638e-06, + "loss": 0.5945389270782471, + "step": 4159 + }, + { + "epoch": 1.4659030837004405, + "grad_norm": 1.9218401758762256, + "learning_rate": 3.6583193599844867e-06, + "loss": 0.6923668384552002, + "step": 4160 + }, + { + "epoch": 1.4662555066079295, + "grad_norm": 2.16834694145402, + "learning_rate": 3.653815495134557e-06, + "loss": 0.6848515868186951, + "step": 4161 + }, + { + "epoch": 1.4666079295154186, + "grad_norm": 1.922504159473904, + "learning_rate": 3.649313784616765e-06, + "loss": 0.640125036239624, + "step": 4162 + }, + { + "epoch": 1.4669603524229076, + "grad_norm": 1.816415927402479, + "learning_rate": 3.6448142299593026e-06, + "loss": 0.6879653930664062, + "step": 4163 + }, + { + "epoch": 1.4673127753303965, + "grad_norm": 1.9534619637738762, + "learning_rate": 3.6403168326896286e-06, + "loss": 0.6757794618606567, + "step": 4164 + }, + { + "epoch": 1.4676651982378854, + "grad_norm": 1.7476054801499117, + "learning_rate": 3.6358215943344664e-06, + "loss": 0.6405826807022095, + "step": 4165 + }, + { + "epoch": 1.4680176211453744, + "grad_norm": 2.1448885390527064, + "learning_rate": 3.6313285164198187e-06, + "loss": 0.692136287689209, + "step": 4166 + }, + { + "epoch": 1.4683700440528633, + "grad_norm": 1.8449983835752888, + "learning_rate": 3.626837600470935e-06, + "loss": 0.6305568218231201, + "step": 4167 + }, + { + "epoch": 1.4687224669603525, + "grad_norm": 2.1026866185280264, + "learning_rate": 3.6223488480123427e-06, + "loss": 0.7040449380874634, + "step": 4168 + }, + { + "epoch": 1.4690748898678414, + "grad_norm": 1.5463095765444386, + "learning_rate": 3.6178622605678403e-06, + "loss": 0.5064427852630615, + "step": 4169 + }, + { + "epoch": 1.4694273127753303, + "grad_norm": 1.6549157120829303, + "learning_rate": 3.6133778396604813e-06, + "loss": 0.46597155928611755, + "step": 4170 + }, + { + "epoch": 1.4697797356828195, + "grad_norm": 1.9774014610728103, + "learning_rate": 3.6088955868125864e-06, + "loss": 0.5764908790588379, + "step": 4171 + }, + { + "epoch": 1.4701321585903084, + "grad_norm": 1.9347279402338318, + "learning_rate": 3.6044155035457405e-06, + "loss": 0.5808656215667725, + "step": 4172 + }, + { + "epoch": 1.4704845814977974, + "grad_norm": 2.0116811142135202, + "learning_rate": 3.599937591380791e-06, + "loss": 0.5439287424087524, + "step": 4173 + }, + { + "epoch": 1.4708370044052863, + "grad_norm": 1.5674669602592264, + "learning_rate": 3.595461851837857e-06, + "loss": 0.5943965911865234, + "step": 4174 + }, + { + "epoch": 1.4711894273127752, + "grad_norm": 1.8847509954427386, + "learning_rate": 3.590988286436302e-06, + "loss": 0.631833016872406, + "step": 4175 + }, + { + "epoch": 1.4715418502202642, + "grad_norm": 1.9232774716266652, + "learning_rate": 3.5865168966947718e-06, + "loss": 0.514176070690155, + "step": 4176 + }, + { + "epoch": 1.4718942731277533, + "grad_norm": 1.7211351925277203, + "learning_rate": 3.582047684131159e-06, + "loss": 0.584772527217865, + "step": 4177 + }, + { + "epoch": 1.4722466960352423, + "grad_norm": 1.7726013207799318, + "learning_rate": 3.5775806502626244e-06, + "loss": 0.5085974931716919, + "step": 4178 + }, + { + "epoch": 1.4725991189427312, + "grad_norm": 2.1422494719737464, + "learning_rate": 3.573115796605584e-06, + "loss": 0.62562495470047, + "step": 4179 + }, + { + "epoch": 1.4729515418502204, + "grad_norm": 1.9507950967896, + "learning_rate": 3.5686531246757206e-06, + "loss": 0.5815824270248413, + "step": 4180 + }, + { + "epoch": 1.4733039647577093, + "grad_norm": 1.8811159721586839, + "learning_rate": 3.5641926359879663e-06, + "loss": 0.6639705300331116, + "step": 4181 + }, + { + "epoch": 1.4736563876651982, + "grad_norm": 1.8978737039698366, + "learning_rate": 3.5597343320565293e-06, + "loss": 0.6265684962272644, + "step": 4182 + }, + { + "epoch": 1.4740088105726872, + "grad_norm": 1.877895350809495, + "learning_rate": 3.5552782143948504e-06, + "loss": 0.6113626956939697, + "step": 4183 + }, + { + "epoch": 1.4743612334801761, + "grad_norm": 1.8492974346484832, + "learning_rate": 3.550824284515655e-06, + "loss": 0.5247244834899902, + "step": 4184 + }, + { + "epoch": 1.4747136563876653, + "grad_norm": 1.871370335191458, + "learning_rate": 3.5463725439309083e-06, + "loss": 0.5524521470069885, + "step": 4185 + }, + { + "epoch": 1.4750660792951542, + "grad_norm": 1.9955136913094453, + "learning_rate": 3.5419229941518384e-06, + "loss": 0.5462251305580139, + "step": 4186 + }, + { + "epoch": 1.4754185022026431, + "grad_norm": 1.6609337480864497, + "learning_rate": 3.5374756366889297e-06, + "loss": 0.6500638723373413, + "step": 4187 + }, + { + "epoch": 1.475770925110132, + "grad_norm": 2.0744035602538586, + "learning_rate": 3.5330304730519216e-06, + "loss": 0.4445904791355133, + "step": 4188 + }, + { + "epoch": 1.4761233480176212, + "grad_norm": 1.7788816335434026, + "learning_rate": 3.5285875047498075e-06, + "loss": 0.6068017482757568, + "step": 4189 + }, + { + "epoch": 1.4764757709251102, + "grad_norm": 1.683605461123042, + "learning_rate": 3.5241467332908384e-06, + "loss": 0.5577334761619568, + "step": 4190 + }, + { + "epoch": 1.4768281938325991, + "grad_norm": 1.9605228698426533, + "learning_rate": 3.5197081601825135e-06, + "loss": 0.6596503257751465, + "step": 4191 + }, + { + "epoch": 1.477180616740088, + "grad_norm": 1.9912955738456768, + "learning_rate": 3.5152717869315965e-06, + "loss": 0.6260303258895874, + "step": 4192 + }, + { + "epoch": 1.477533039647577, + "grad_norm": 2.010278739994815, + "learning_rate": 3.510837615044097e-06, + "loss": 0.5467355251312256, + "step": 4193 + }, + { + "epoch": 1.4778854625550661, + "grad_norm": 2.516516212561754, + "learning_rate": 3.506405646025276e-06, + "loss": 0.5306693911552429, + "step": 4194 + }, + { + "epoch": 1.478237885462555, + "grad_norm": 1.7497505820795882, + "learning_rate": 3.5019758813796513e-06, + "loss": 0.6130725741386414, + "step": 4195 + }, + { + "epoch": 1.478590308370044, + "grad_norm": 2.2199157894914143, + "learning_rate": 3.4975483226109874e-06, + "loss": 0.6656272411346436, + "step": 4196 + }, + { + "epoch": 1.4789427312775332, + "grad_norm": 1.8654097488268417, + "learning_rate": 3.4931229712223047e-06, + "loss": 0.6018439531326294, + "step": 4197 + }, + { + "epoch": 1.479295154185022, + "grad_norm": 2.0982915779378137, + "learning_rate": 3.488699828715871e-06, + "loss": 0.6635257005691528, + "step": 4198 + }, + { + "epoch": 1.479647577092511, + "grad_norm": 1.8412514150393455, + "learning_rate": 3.4842788965932038e-06, + "loss": 0.5760075449943542, + "step": 4199 + }, + { + "epoch": 1.48, + "grad_norm": 1.7009881043074442, + "learning_rate": 3.4798601763550778e-06, + "loss": 0.6951982975006104, + "step": 4200 + }, + { + "epoch": 1.480352422907489, + "grad_norm": 1.880170776358824, + "learning_rate": 3.475443669501508e-06, + "loss": 0.6574405431747437, + "step": 4201 + }, + { + "epoch": 1.4807048458149779, + "grad_norm": 1.8075997179509888, + "learning_rate": 3.4710293775317593e-06, + "loss": 0.5912263989448547, + "step": 4202 + }, + { + "epoch": 1.481057268722467, + "grad_norm": 1.7703606198961421, + "learning_rate": 3.4666173019443485e-06, + "loss": 0.5169661045074463, + "step": 4203 + }, + { + "epoch": 1.481409691629956, + "grad_norm": 1.6923587460137135, + "learning_rate": 3.4622074442370345e-06, + "loss": 0.5707660913467407, + "step": 4204 + }, + { + "epoch": 1.4817621145374449, + "grad_norm": 1.7929036165873167, + "learning_rate": 3.4577998059068354e-06, + "loss": 0.5856584310531616, + "step": 4205 + }, + { + "epoch": 1.482114537444934, + "grad_norm": 2.0144464412272636, + "learning_rate": 3.4533943884499975e-06, + "loss": 0.6306010484695435, + "step": 4206 + }, + { + "epoch": 1.482466960352423, + "grad_norm": 1.9708292107625427, + "learning_rate": 3.4489911933620245e-06, + "loss": 0.6177140474319458, + "step": 4207 + }, + { + "epoch": 1.482819383259912, + "grad_norm": 1.6187910458828605, + "learning_rate": 3.4445902221376694e-06, + "loss": 0.5527759790420532, + "step": 4208 + }, + { + "epoch": 1.4831718061674009, + "grad_norm": 1.792049785406371, + "learning_rate": 3.440191476270922e-06, + "loss": 0.6838431358337402, + "step": 4209 + }, + { + "epoch": 1.4835242290748898, + "grad_norm": 2.0640892173970933, + "learning_rate": 3.4357949572550196e-06, + "loss": 0.4876987636089325, + "step": 4210 + }, + { + "epoch": 1.4838766519823787, + "grad_norm": 1.66358265635652, + "learning_rate": 3.4314006665824427e-06, + "loss": 0.5639296770095825, + "step": 4211 + }, + { + "epoch": 1.4842290748898679, + "grad_norm": 1.9264745517709694, + "learning_rate": 3.427008605744915e-06, + "loss": 0.4189109802246094, + "step": 4212 + }, + { + "epoch": 1.4845814977973568, + "grad_norm": 1.7041726412059042, + "learning_rate": 3.422618776233413e-06, + "loss": 0.6602882146835327, + "step": 4213 + }, + { + "epoch": 1.4849339207048458, + "grad_norm": 2.105857994769297, + "learning_rate": 3.4182311795381373e-06, + "loss": 0.7642478942871094, + "step": 4214 + }, + { + "epoch": 1.485286343612335, + "grad_norm": 1.703090383184888, + "learning_rate": 3.41384581714854e-06, + "loss": 0.5550031065940857, + "step": 4215 + }, + { + "epoch": 1.4856387665198239, + "grad_norm": 1.956165427853548, + "learning_rate": 3.4094626905533223e-06, + "loss": 0.7036092281341553, + "step": 4216 + }, + { + "epoch": 1.4859911894273128, + "grad_norm": 1.9055824872661757, + "learning_rate": 3.4050818012404165e-06, + "loss": 0.693780779838562, + "step": 4217 + }, + { + "epoch": 1.4863436123348017, + "grad_norm": 1.750544621689218, + "learning_rate": 3.4007031506969977e-06, + "loss": 0.6315299868583679, + "step": 4218 + }, + { + "epoch": 1.4866960352422907, + "grad_norm": 2.0036955114247355, + "learning_rate": 3.396326740409481e-06, + "loss": 0.47849225997924805, + "step": 4219 + }, + { + "epoch": 1.4870484581497798, + "grad_norm": 1.9433930854735686, + "learning_rate": 3.3919525718635195e-06, + "loss": 0.6200336217880249, + "step": 4220 + }, + { + "epoch": 1.4874008810572688, + "grad_norm": 1.7540152253976415, + "learning_rate": 3.3875806465440152e-06, + "loss": 0.7594903707504272, + "step": 4221 + }, + { + "epoch": 1.4877533039647577, + "grad_norm": 1.8336468945254887, + "learning_rate": 3.383210965935093e-06, + "loss": 0.47159409523010254, + "step": 4222 + }, + { + "epoch": 1.4881057268722466, + "grad_norm": 1.6169320059495966, + "learning_rate": 3.3788435315201216e-06, + "loss": 0.5272495150566101, + "step": 4223 + }, + { + "epoch": 1.4884581497797358, + "grad_norm": 1.8268891947791475, + "learning_rate": 3.3744783447817177e-06, + "loss": 0.43847334384918213, + "step": 4224 + }, + { + "epoch": 1.4888105726872247, + "grad_norm": 1.7778298915864024, + "learning_rate": 3.370115407201724e-06, + "loss": 0.656914234161377, + "step": 4225 + }, + { + "epoch": 1.4891629955947137, + "grad_norm": 1.9329427480605288, + "learning_rate": 3.3657547202612128e-06, + "loss": 0.6379527449607849, + "step": 4226 + }, + { + "epoch": 1.4895154185022026, + "grad_norm": 2.0864597290427103, + "learning_rate": 3.3613962854405136e-06, + "loss": 0.6254120469093323, + "step": 4227 + }, + { + "epoch": 1.4898678414096915, + "grad_norm": 1.9356514762449182, + "learning_rate": 3.35704010421917e-06, + "loss": 0.6567566990852356, + "step": 4228 + }, + { + "epoch": 1.4902202643171807, + "grad_norm": 1.7537675986626187, + "learning_rate": 3.352686178075981e-06, + "loss": 0.5121499300003052, + "step": 4229 + }, + { + "epoch": 1.4905726872246696, + "grad_norm": 1.5674669685831402, + "learning_rate": 3.3483345084889595e-06, + "loss": 0.5727466344833374, + "step": 4230 + }, + { + "epoch": 1.4909251101321586, + "grad_norm": 2.005473410378335, + "learning_rate": 3.3439850969353614e-06, + "loss": 0.6013318300247192, + "step": 4231 + }, + { + "epoch": 1.4912775330396475, + "grad_norm": 1.8391832358116647, + "learning_rate": 3.3396379448916836e-06, + "loss": 0.6350653767585754, + "step": 4232 + }, + { + "epoch": 1.4916299559471367, + "grad_norm": 1.8543607360516363, + "learning_rate": 3.335293053833645e-06, + "loss": 0.7072123885154724, + "step": 4233 + }, + { + "epoch": 1.4919823788546256, + "grad_norm": 1.7154205163032374, + "learning_rate": 3.330950425236201e-06, + "loss": 0.6208378076553345, + "step": 4234 + }, + { + "epoch": 1.4923348017621145, + "grad_norm": 2.0942315596519667, + "learning_rate": 3.3266100605735397e-06, + "loss": 0.656146764755249, + "step": 4235 + }, + { + "epoch": 1.4926872246696035, + "grad_norm": 1.953931788636606, + "learning_rate": 3.322271961319076e-06, + "loss": 0.6145347952842712, + "step": 4236 + }, + { + "epoch": 1.4930396475770924, + "grad_norm": 1.7528969029549952, + "learning_rate": 3.3179361289454694e-06, + "loss": 0.5876312255859375, + "step": 4237 + }, + { + "epoch": 1.4933920704845816, + "grad_norm": 1.601290490679199, + "learning_rate": 3.3136025649245897e-06, + "loss": 0.48365384340286255, + "step": 4238 + }, + { + "epoch": 1.4937444933920705, + "grad_norm": 2.189978154300805, + "learning_rate": 3.3092712707275467e-06, + "loss": 0.566576361656189, + "step": 4239 + }, + { + "epoch": 1.4940969162995594, + "grad_norm": 1.9878421762040837, + "learning_rate": 3.3049422478246886e-06, + "loss": 0.6982032060623169, + "step": 4240 + }, + { + "epoch": 1.4944493392070486, + "grad_norm": 1.9039317781349454, + "learning_rate": 3.3006154976855787e-06, + "loss": 0.5802686214447021, + "step": 4241 + }, + { + "epoch": 1.4948017621145375, + "grad_norm": 1.8292762393050834, + "learning_rate": 3.296291021779016e-06, + "loss": 0.6656724214553833, + "step": 4242 + }, + { + "epoch": 1.4951541850220265, + "grad_norm": 1.8194685949700777, + "learning_rate": 3.2919688215730227e-06, + "loss": 0.5081402063369751, + "step": 4243 + }, + { + "epoch": 1.4955066079295154, + "grad_norm": 1.858251792062496, + "learning_rate": 3.28764889853485e-06, + "loss": 0.6963785290718079, + "step": 4244 + }, + { + "epoch": 1.4958590308370043, + "grad_norm": 1.6076782907375928, + "learning_rate": 3.283331254130987e-06, + "loss": 0.4953869581222534, + "step": 4245 + }, + { + "epoch": 1.4962114537444933, + "grad_norm": 1.7009256372822803, + "learning_rate": 3.2790158898271283e-06, + "loss": 0.5495179295539856, + "step": 4246 + }, + { + "epoch": 1.4965638766519824, + "grad_norm": 1.5282320768079813, + "learning_rate": 3.274702807088208e-06, + "loss": 0.6238610148429871, + "step": 4247 + }, + { + "epoch": 1.4969162995594714, + "grad_norm": 1.7145305152154042, + "learning_rate": 3.270392007378389e-06, + "loss": 0.557083249092102, + "step": 4248 + }, + { + "epoch": 1.4972687224669603, + "grad_norm": 1.8458971793579602, + "learning_rate": 3.2660834921610495e-06, + "loss": 0.6317561864852905, + "step": 4249 + }, + { + "epoch": 1.4976211453744495, + "grad_norm": 2.025909664851984, + "learning_rate": 3.2617772628987974e-06, + "loss": 0.5957529544830322, + "step": 4250 + }, + { + "epoch": 1.4979735682819384, + "grad_norm": 1.8950835934769208, + "learning_rate": 3.2574733210534637e-06, + "loss": 0.7661205530166626, + "step": 4251 + }, + { + "epoch": 1.4983259911894273, + "grad_norm": 1.6920068443310292, + "learning_rate": 3.2531716680861024e-06, + "loss": 0.5616782903671265, + "step": 4252 + }, + { + "epoch": 1.4986784140969163, + "grad_norm": 1.770897830706882, + "learning_rate": 3.2488723054569905e-06, + "loss": 0.5679990649223328, + "step": 4253 + }, + { + "epoch": 1.4990308370044052, + "grad_norm": 1.8387315322882807, + "learning_rate": 3.2445752346256244e-06, + "loss": 0.6355923414230347, + "step": 4254 + }, + { + "epoch": 1.4993832599118941, + "grad_norm": 1.5734033631685667, + "learning_rate": 3.2402804570507316e-06, + "loss": 0.5050745010375977, + "step": 4255 + }, + { + "epoch": 1.4997356828193833, + "grad_norm": 1.9535070131295427, + "learning_rate": 3.2359879741902557e-06, + "loss": 0.6585286855697632, + "step": 4256 + }, + { + "epoch": 1.5000881057268722, + "grad_norm": 1.9369843836982625, + "learning_rate": 3.2316977875013567e-06, + "loss": 0.5108245611190796, + "step": 4257 + }, + { + "epoch": 1.5004405286343614, + "grad_norm": 1.7460361732263896, + "learning_rate": 3.2274098984404223e-06, + "loss": 0.5270702838897705, + "step": 4258 + }, + { + "epoch": 1.5007929515418503, + "grad_norm": 2.09582870313145, + "learning_rate": 3.223124308463057e-06, + "loss": 0.6421051025390625, + "step": 4259 + }, + { + "epoch": 1.5011453744493393, + "grad_norm": 2.0173715825527454, + "learning_rate": 3.218841019024084e-06, + "loss": 0.6040945053100586, + "step": 4260 + }, + { + "epoch": 1.5014977973568282, + "grad_norm": 3.5488261180155023, + "learning_rate": 3.214560031577548e-06, + "loss": 0.6389988660812378, + "step": 4261 + }, + { + "epoch": 1.5018502202643171, + "grad_norm": 2.1449229280338096, + "learning_rate": 3.210281347576707e-06, + "loss": 0.6474273800849915, + "step": 4262 + }, + { + "epoch": 1.502202643171806, + "grad_norm": 2.0726789637634666, + "learning_rate": 3.206004968474048e-06, + "loss": 0.7020560503005981, + "step": 4263 + }, + { + "epoch": 1.502555066079295, + "grad_norm": 1.7677587583992656, + "learning_rate": 3.2017308957212644e-06, + "loss": 0.574647068977356, + "step": 4264 + }, + { + "epoch": 1.5029074889867842, + "grad_norm": 1.8152121117445819, + "learning_rate": 3.1974591307692724e-06, + "loss": 0.6912944316864014, + "step": 4265 + }, + { + "epoch": 1.503259911894273, + "grad_norm": 1.7825438750387144, + "learning_rate": 3.1931896750682036e-06, + "loss": 0.7738592028617859, + "step": 4266 + }, + { + "epoch": 1.5036123348017623, + "grad_norm": 1.7835054391965142, + "learning_rate": 3.188922530067402e-06, + "loss": 0.6418012380599976, + "step": 4267 + }, + { + "epoch": 1.5039647577092512, + "grad_norm": 2.0481798246782628, + "learning_rate": 3.1846576972154343e-06, + "loss": 0.639055609703064, + "step": 4268 + }, + { + "epoch": 1.5043171806167401, + "grad_norm": 1.8365579809471801, + "learning_rate": 3.1803951779600774e-06, + "loss": 0.5512406229972839, + "step": 4269 + }, + { + "epoch": 1.504669603524229, + "grad_norm": 1.7182877192220278, + "learning_rate": 3.1761349737483194e-06, + "loss": 0.5838354229927063, + "step": 4270 + }, + { + "epoch": 1.505022026431718, + "grad_norm": 1.5090233544437164, + "learning_rate": 3.1718770860263747e-06, + "loss": 0.5903568267822266, + "step": 4271 + }, + { + "epoch": 1.505374449339207, + "grad_norm": 1.761348463041135, + "learning_rate": 3.1676215162396604e-06, + "loss": 0.5610073804855347, + "step": 4272 + }, + { + "epoch": 1.5057268722466959, + "grad_norm": 1.9899291186285208, + "learning_rate": 3.163368265832809e-06, + "loss": 0.6543136835098267, + "step": 4273 + }, + { + "epoch": 1.506079295154185, + "grad_norm": 1.9484911821126696, + "learning_rate": 3.1591173362496686e-06, + "loss": 0.6586440801620483, + "step": 4274 + }, + { + "epoch": 1.506431718061674, + "grad_norm": 1.7389367867721892, + "learning_rate": 3.1548687289332958e-06, + "loss": 0.5360713601112366, + "step": 4275 + }, + { + "epoch": 1.5067841409691631, + "grad_norm": 2.1157677007043243, + "learning_rate": 3.1506224453259615e-06, + "loss": 0.6695356369018555, + "step": 4276 + }, + { + "epoch": 1.507136563876652, + "grad_norm": 1.7594436585853632, + "learning_rate": 3.146378486869146e-06, + "loss": 0.5708016753196716, + "step": 4277 + }, + { + "epoch": 1.507488986784141, + "grad_norm": 1.997964983412431, + "learning_rate": 3.142136855003538e-06, + "loss": 0.5412342548370361, + "step": 4278 + }, + { + "epoch": 1.50784140969163, + "grad_norm": 1.645092688511499, + "learning_rate": 3.1378975511690468e-06, + "loss": 0.5392874479293823, + "step": 4279 + }, + { + "epoch": 1.5081938325991189, + "grad_norm": 2.1591157791946256, + "learning_rate": 3.133660576804781e-06, + "loss": 0.6559237241744995, + "step": 4280 + }, + { + "epoch": 1.5085462555066078, + "grad_norm": 1.6625372644532221, + "learning_rate": 3.1294259333490597e-06, + "loss": 0.49973511695861816, + "step": 4281 + }, + { + "epoch": 1.5088986784140968, + "grad_norm": 1.9292311285357981, + "learning_rate": 3.1251936222394152e-06, + "loss": 0.5458316206932068, + "step": 4282 + }, + { + "epoch": 1.509251101321586, + "grad_norm": 1.7771232071244591, + "learning_rate": 3.120963644912579e-06, + "loss": 0.628986656665802, + "step": 4283 + }, + { + "epoch": 1.5096035242290748, + "grad_norm": 1.956059007614116, + "learning_rate": 3.1167360028045103e-06, + "loss": 0.6234235167503357, + "step": 4284 + }, + { + "epoch": 1.509955947136564, + "grad_norm": 1.832929038299875, + "learning_rate": 3.112510697350348e-06, + "loss": 0.49892476201057434, + "step": 4285 + }, + { + "epoch": 1.510308370044053, + "grad_norm": 1.859590789761001, + "learning_rate": 3.1082877299844562e-06, + "loss": 0.42951709032058716, + "step": 4286 + }, + { + "epoch": 1.5106607929515419, + "grad_norm": 1.9658176092994974, + "learning_rate": 3.1040671021404045e-06, + "loss": 0.6392263770103455, + "step": 4287 + }, + { + "epoch": 1.5110132158590308, + "grad_norm": 1.9240075529588605, + "learning_rate": 3.099848815250964e-06, + "loss": 0.6198933124542236, + "step": 4288 + }, + { + "epoch": 1.5113656387665197, + "grad_norm": 2.6550374581713436, + "learning_rate": 3.0956328707481055e-06, + "loss": 0.7626048922538757, + "step": 4289 + }, + { + "epoch": 1.5117180616740087, + "grad_norm": 2.057470135822257, + "learning_rate": 3.0914192700630175e-06, + "loss": 0.5245747566223145, + "step": 4290 + }, + { + "epoch": 1.5120704845814978, + "grad_norm": 2.016409834872785, + "learning_rate": 3.0872080146260818e-06, + "loss": 0.6788556575775146, + "step": 4291 + }, + { + "epoch": 1.5124229074889868, + "grad_norm": 1.8970717527543317, + "learning_rate": 3.082999105866897e-06, + "loss": 0.6224241852760315, + "step": 4292 + }, + { + "epoch": 1.512775330396476, + "grad_norm": 1.8828342237083628, + "learning_rate": 3.0787925452142477e-06, + "loss": 0.706061840057373, + "step": 4293 + }, + { + "epoch": 1.5131277533039649, + "grad_norm": 1.8530285911040203, + "learning_rate": 3.07458833409613e-06, + "loss": 0.7075262665748596, + "step": 4294 + }, + { + "epoch": 1.5134801762114538, + "grad_norm": 1.8075779914700747, + "learning_rate": 3.0703864739397494e-06, + "loss": 0.4912101626396179, + "step": 4295 + }, + { + "epoch": 1.5138325991189427, + "grad_norm": 1.908543179959353, + "learning_rate": 3.066186966171507e-06, + "loss": 0.6530265808105469, + "step": 4296 + }, + { + "epoch": 1.5141850220264317, + "grad_norm": 2.12821134565194, + "learning_rate": 3.0619898122169946e-06, + "loss": 0.4905887246131897, + "step": 4297 + }, + { + "epoch": 1.5145374449339206, + "grad_norm": 1.626400447189927, + "learning_rate": 3.057795013501025e-06, + "loss": 0.5025225281715393, + "step": 4298 + }, + { + "epoch": 1.5148898678414096, + "grad_norm": 1.945132129374327, + "learning_rate": 3.0536025714475946e-06, + "loss": 0.5769479274749756, + "step": 4299 + }, + { + "epoch": 1.5152422907488987, + "grad_norm": 2.0281621255217526, + "learning_rate": 3.049412487479919e-06, + "loss": 0.6275384426116943, + "step": 4300 + }, + { + "epoch": 1.5155947136563876, + "grad_norm": 1.6860684781531563, + "learning_rate": 3.04522476302039e-06, + "loss": 0.5555096864700317, + "step": 4301 + }, + { + "epoch": 1.5159471365638768, + "grad_norm": 1.9260394424858205, + "learning_rate": 3.0410393994906096e-06, + "loss": 0.5605635643005371, + "step": 4302 + }, + { + "epoch": 1.5162995594713657, + "grad_norm": 2.127824876873509, + "learning_rate": 3.0368563983113864e-06, + "loss": 0.6006621718406677, + "step": 4303 + }, + { + "epoch": 1.5166519823788547, + "grad_norm": 2.012171091410243, + "learning_rate": 3.0326757609027147e-06, + "loss": 0.5288259983062744, + "step": 4304 + }, + { + "epoch": 1.5170044052863436, + "grad_norm": 1.7034257525965926, + "learning_rate": 3.0284974886837903e-06, + "loss": 0.5671676993370056, + "step": 4305 + }, + { + "epoch": 1.5173568281938326, + "grad_norm": 2.496889571382279, + "learning_rate": 3.0243215830730075e-06, + "loss": 0.6072134971618652, + "step": 4306 + }, + { + "epoch": 1.5177092511013215, + "grad_norm": 1.726261889224961, + "learning_rate": 3.020148045487953e-06, + "loss": 0.6010481119155884, + "step": 4307 + }, + { + "epoch": 1.5180616740088104, + "grad_norm": 1.6250908189476003, + "learning_rate": 3.0159768773454225e-06, + "loss": 0.6126751899719238, + "step": 4308 + }, + { + "epoch": 1.5184140969162996, + "grad_norm": 1.6123380534859018, + "learning_rate": 3.011808080061387e-06, + "loss": 0.5408819317817688, + "step": 4309 + }, + { + "epoch": 1.5187665198237885, + "grad_norm": 1.6792977324898095, + "learning_rate": 3.0076416550510255e-06, + "loss": 0.6528562307357788, + "step": 4310 + }, + { + "epoch": 1.5191189427312777, + "grad_norm": 1.6431948485087644, + "learning_rate": 3.003477603728715e-06, + "loss": 0.6355241537094116, + "step": 4311 + }, + { + "epoch": 1.5194713656387666, + "grad_norm": 1.7630338655444058, + "learning_rate": 2.9993159275080174e-06, + "loss": 0.5511878728866577, + "step": 4312 + }, + { + "epoch": 1.5198237885462555, + "grad_norm": 1.9093354982688662, + "learning_rate": 2.9951566278016943e-06, + "loss": 0.5066816806793213, + "step": 4313 + }, + { + "epoch": 1.5201762114537445, + "grad_norm": 1.676344611272679, + "learning_rate": 2.9909997060216966e-06, + "loss": 0.5636533498764038, + "step": 4314 + }, + { + "epoch": 1.5205286343612334, + "grad_norm": 1.8885420705538216, + "learning_rate": 2.9868451635791706e-06, + "loss": 0.49742352962493896, + "step": 4315 + }, + { + "epoch": 1.5208810572687224, + "grad_norm": 2.013877525146858, + "learning_rate": 2.9826930018844533e-06, + "loss": 0.7264617681503296, + "step": 4316 + }, + { + "epoch": 1.5212334801762113, + "grad_norm": 1.8792043539230026, + "learning_rate": 2.978543222347076e-06, + "loss": 0.5342350006103516, + "step": 4317 + }, + { + "epoch": 1.5215859030837005, + "grad_norm": 1.7569176385310192, + "learning_rate": 2.9743958263757554e-06, + "loss": 0.4324883818626404, + "step": 4318 + }, + { + "epoch": 1.5219383259911894, + "grad_norm": 1.8546496052344164, + "learning_rate": 2.970250815378409e-06, + "loss": 0.5867510437965393, + "step": 4319 + }, + { + "epoch": 1.5222907488986785, + "grad_norm": 1.8415605839915816, + "learning_rate": 2.966108190762138e-06, + "loss": 0.7176594734191895, + "step": 4320 + }, + { + "epoch": 1.5226431718061675, + "grad_norm": 1.967906535494615, + "learning_rate": 2.9619679539332337e-06, + "loss": 0.5810995101928711, + "step": 4321 + }, + { + "epoch": 1.5229955947136564, + "grad_norm": 2.478705006420029, + "learning_rate": 2.957830106297177e-06, + "loss": 0.6262675523757935, + "step": 4322 + }, + { + "epoch": 1.5233480176211454, + "grad_norm": 2.1743670559442245, + "learning_rate": 2.9536946492586383e-06, + "loss": 0.7743325233459473, + "step": 4323 + }, + { + "epoch": 1.5237004405286343, + "grad_norm": 1.7129659102014092, + "learning_rate": 2.9495615842214776e-06, + "loss": 0.7706553936004639, + "step": 4324 + }, + { + "epoch": 1.5240528634361232, + "grad_norm": 1.6835245148440698, + "learning_rate": 2.9454309125887405e-06, + "loss": 0.5982425808906555, + "step": 4325 + }, + { + "epoch": 1.5244052863436124, + "grad_norm": 1.8547174799711497, + "learning_rate": 2.9413026357626596e-06, + "loss": 0.5580830574035645, + "step": 4326 + }, + { + "epoch": 1.5247577092511013, + "grad_norm": 1.8954299514318398, + "learning_rate": 2.937176755144662e-06, + "loss": 0.5316063165664673, + "step": 4327 + }, + { + "epoch": 1.5251101321585903, + "grad_norm": 1.7578719545795178, + "learning_rate": 2.9330532721353523e-06, + "loss": 0.574161171913147, + "step": 4328 + }, + { + "epoch": 1.5254625550660794, + "grad_norm": 1.7055567103896054, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.5339558720588684, + "step": 4329 + }, + { + "epoch": 1.5258149779735684, + "grad_norm": 1.7071106155323514, + "learning_rate": 2.9248135045411607e-06, + "loss": 0.594109296798706, + "step": 4330 + }, + { + "epoch": 1.5261674008810573, + "grad_norm": 2.0022142230843873, + "learning_rate": 2.9206972227534237e-06, + "loss": 0.5953024625778198, + "step": 4331 + }, + { + "epoch": 1.5265198237885462, + "grad_norm": 1.6369885387081085, + "learning_rate": 2.916583344168663e-06, + "loss": 0.5142296552658081, + "step": 4332 + }, + { + "epoch": 1.5268722466960352, + "grad_norm": 1.7205930689665365, + "learning_rate": 2.912471870183411e-06, + "loss": 0.5796314477920532, + "step": 4333 + }, + { + "epoch": 1.527224669603524, + "grad_norm": 1.984086822092815, + "learning_rate": 2.9083628021933886e-06, + "loss": 0.7202566862106323, + "step": 4334 + }, + { + "epoch": 1.5275770925110133, + "grad_norm": 2.2205082372485756, + "learning_rate": 2.9042561415934956e-06, + "loss": 0.6684188842773438, + "step": 4335 + }, + { + "epoch": 1.5279295154185022, + "grad_norm": 1.7000543577524454, + "learning_rate": 2.9001518897778147e-06, + "loss": 0.5377634763717651, + "step": 4336 + }, + { + "epoch": 1.5282819383259914, + "grad_norm": 1.7985805373418047, + "learning_rate": 2.8960500481396115e-06, + "loss": 0.5780486464500427, + "step": 4337 + }, + { + "epoch": 1.5286343612334803, + "grad_norm": 1.7528900086241466, + "learning_rate": 2.891950618071333e-06, + "loss": 0.6020476818084717, + "step": 4338 + }, + { + "epoch": 1.5289867841409692, + "grad_norm": 2.1939247460310303, + "learning_rate": 2.8878536009646106e-06, + "loss": 0.6076337099075317, + "step": 4339 + }, + { + "epoch": 1.5293392070484582, + "grad_norm": 1.9795227787355654, + "learning_rate": 2.883758998210251e-06, + "loss": 0.6370673179626465, + "step": 4340 + }, + { + "epoch": 1.529691629955947, + "grad_norm": 1.8686242611734982, + "learning_rate": 2.879666811198244e-06, + "loss": 0.41594892740249634, + "step": 4341 + }, + { + "epoch": 1.530044052863436, + "grad_norm": 1.9135398095116771, + "learning_rate": 2.8755770413177632e-06, + "loss": 0.4506857693195343, + "step": 4342 + }, + { + "epoch": 1.530396475770925, + "grad_norm": 1.730478313082556, + "learning_rate": 2.8714896899571575e-06, + "loss": 0.5883188247680664, + "step": 4343 + }, + { + "epoch": 1.5307488986784141, + "grad_norm": 1.9672614741507624, + "learning_rate": 2.8674047585039545e-06, + "loss": 0.6327757239341736, + "step": 4344 + }, + { + "epoch": 1.531101321585903, + "grad_norm": 1.958310227558085, + "learning_rate": 2.863322248344862e-06, + "loss": 0.6241307258605957, + "step": 4345 + }, + { + "epoch": 1.5314537444933922, + "grad_norm": 1.9476460174005816, + "learning_rate": 2.859242160865764e-06, + "loss": 0.6982603669166565, + "step": 4346 + }, + { + "epoch": 1.5318061674008812, + "grad_norm": 1.8365437525119523, + "learning_rate": 2.8551644974517236e-06, + "loss": 0.6293624639511108, + "step": 4347 + }, + { + "epoch": 1.53215859030837, + "grad_norm": 1.8744498682554205, + "learning_rate": 2.85108925948698e-06, + "loss": 0.5630898475646973, + "step": 4348 + }, + { + "epoch": 1.532511013215859, + "grad_norm": 1.9156047998547734, + "learning_rate": 2.847016448354948e-06, + "loss": 0.5300726294517517, + "step": 4349 + }, + { + "epoch": 1.532863436123348, + "grad_norm": 1.763805411540431, + "learning_rate": 2.8429460654382257e-06, + "loss": 0.6302311420440674, + "step": 4350 + }, + { + "epoch": 1.533215859030837, + "grad_norm": 2.0169994553834467, + "learning_rate": 2.8388781121185815e-06, + "loss": 0.5063371658325195, + "step": 4351 + }, + { + "epoch": 1.5335682819383258, + "grad_norm": 1.995201540637565, + "learning_rate": 2.8348125897769496e-06, + "loss": 0.6116877198219299, + "step": 4352 + }, + { + "epoch": 1.533920704845815, + "grad_norm": 1.7792408625607217, + "learning_rate": 2.830749499793458e-06, + "loss": 0.5671982169151306, + "step": 4353 + }, + { + "epoch": 1.534273127753304, + "grad_norm": 1.927509688688397, + "learning_rate": 2.826688843547395e-06, + "loss": 0.6537752747535706, + "step": 4354 + }, + { + "epoch": 1.534625550660793, + "grad_norm": 2.0558781996543805, + "learning_rate": 2.8226306224172283e-06, + "loss": 0.6608545780181885, + "step": 4355 + }, + { + "epoch": 1.534977973568282, + "grad_norm": 2.2266474146630655, + "learning_rate": 2.8185748377805977e-06, + "loss": 0.7038587331771851, + "step": 4356 + }, + { + "epoch": 1.535330396475771, + "grad_norm": 2.084521400671401, + "learning_rate": 2.8145214910143128e-06, + "loss": 0.7422336339950562, + "step": 4357 + }, + { + "epoch": 1.53568281938326, + "grad_norm": 1.7545985325343467, + "learning_rate": 2.8104705834943625e-06, + "loss": 0.5739270448684692, + "step": 4358 + }, + { + "epoch": 1.5360352422907488, + "grad_norm": 1.8063052751864486, + "learning_rate": 2.8064221165959073e-06, + "loss": 0.6429908275604248, + "step": 4359 + }, + { + "epoch": 1.5363876651982378, + "grad_norm": 2.071223390835828, + "learning_rate": 2.802376091693264e-06, + "loss": 0.5660578012466431, + "step": 4360 + }, + { + "epoch": 1.5367400881057267, + "grad_norm": 1.7313945668822706, + "learning_rate": 2.798332510159942e-06, + "loss": 0.4507398009300232, + "step": 4361 + }, + { + "epoch": 1.5370925110132159, + "grad_norm": 1.638677595892734, + "learning_rate": 2.7942913733686063e-06, + "loss": 0.5107634663581848, + "step": 4362 + }, + { + "epoch": 1.5374449339207048, + "grad_norm": 1.8351099655043759, + "learning_rate": 2.790252682691106e-06, + "loss": 0.505529522895813, + "step": 4363 + }, + { + "epoch": 1.537797356828194, + "grad_norm": 1.7171378177734038, + "learning_rate": 2.7862164394984405e-06, + "loss": 0.459098219871521, + "step": 4364 + }, + { + "epoch": 1.538149779735683, + "grad_norm": 1.6256824509042396, + "learning_rate": 2.782182645160789e-06, + "loss": 0.5200169086456299, + "step": 4365 + }, + { + "epoch": 1.5385022026431718, + "grad_norm": 2.1593491644665908, + "learning_rate": 2.778151301047506e-06, + "loss": 0.6723796725273132, + "step": 4366 + }, + { + "epoch": 1.5388546255506608, + "grad_norm": 1.7552269414614434, + "learning_rate": 2.7741224085271067e-06, + "loss": 0.5385584831237793, + "step": 4367 + }, + { + "epoch": 1.5392070484581497, + "grad_norm": 1.6358174263890735, + "learning_rate": 2.770095968967267e-06, + "loss": 0.5766934156417847, + "step": 4368 + }, + { + "epoch": 1.5395594713656386, + "grad_norm": 1.6116842273066272, + "learning_rate": 2.766071983734845e-06, + "loss": 0.6303011178970337, + "step": 4369 + }, + { + "epoch": 1.5399118942731278, + "grad_norm": 2.0294439046284736, + "learning_rate": 2.7620504541958525e-06, + "loss": 0.6192827224731445, + "step": 4370 + }, + { + "epoch": 1.5402643171806167, + "grad_norm": 1.9731545400175885, + "learning_rate": 2.758031381715485e-06, + "loss": 0.543215811252594, + "step": 4371 + }, + { + "epoch": 1.5406167400881057, + "grad_norm": 1.8102023756492311, + "learning_rate": 2.7540147676580808e-06, + "loss": 0.6364312171936035, + "step": 4372 + }, + { + "epoch": 1.5409691629955948, + "grad_norm": 1.7440307883728075, + "learning_rate": 2.750000613387157e-06, + "loss": 0.5625254511833191, + "step": 4373 + }, + { + "epoch": 1.5413215859030838, + "grad_norm": 2.1646055145888377, + "learning_rate": 2.7459889202654e-06, + "loss": 0.7304128408432007, + "step": 4374 + }, + { + "epoch": 1.5416740088105727, + "grad_norm": 2.1627384337401296, + "learning_rate": 2.7419796896546536e-06, + "loss": 0.676097571849823, + "step": 4375 + }, + { + "epoch": 1.5420264317180616, + "grad_norm": 1.9373952441867042, + "learning_rate": 2.7379729229159193e-06, + "loss": 0.7024539709091187, + "step": 4376 + }, + { + "epoch": 1.5423788546255506, + "grad_norm": 1.6778176206961017, + "learning_rate": 2.7339686214093774e-06, + "loss": 0.6357964277267456, + "step": 4377 + }, + { + "epoch": 1.5427312775330395, + "grad_norm": 1.8606991682829432, + "learning_rate": 2.729966786494361e-06, + "loss": 0.5254555940628052, + "step": 4378 + }, + { + "epoch": 1.5430837004405287, + "grad_norm": 1.527570009912515, + "learning_rate": 2.7259674195293697e-06, + "loss": 0.4899883270263672, + "step": 4379 + }, + { + "epoch": 1.5434361233480176, + "grad_norm": 1.725531709071361, + "learning_rate": 2.721970521872063e-06, + "loss": 0.5750056505203247, + "step": 4380 + }, + { + "epoch": 1.5437885462555068, + "grad_norm": 1.8900737960638598, + "learning_rate": 2.71797609487926e-06, + "loss": 0.5852059125900269, + "step": 4381 + }, + { + "epoch": 1.5441409691629957, + "grad_norm": 1.8258629839457563, + "learning_rate": 2.71398413990695e-06, + "loss": 0.6360914707183838, + "step": 4382 + }, + { + "epoch": 1.5444933920704846, + "grad_norm": 1.7586915096989222, + "learning_rate": 2.7099946583102764e-06, + "loss": 0.5120062828063965, + "step": 4383 + }, + { + "epoch": 1.5448458149779736, + "grad_norm": 2.068877348919367, + "learning_rate": 2.706007651443543e-06, + "loss": 0.5798901319503784, + "step": 4384 + }, + { + "epoch": 1.5451982378854625, + "grad_norm": 2.014366735127449, + "learning_rate": 2.702023120660213e-06, + "loss": 0.5112065076828003, + "step": 4385 + }, + { + "epoch": 1.5455506607929514, + "grad_norm": 1.9281653354114374, + "learning_rate": 2.6980410673129133e-06, + "loss": 0.6136611700057983, + "step": 4386 + }, + { + "epoch": 1.5459030837004404, + "grad_norm": 1.6841076662412324, + "learning_rate": 2.694061492753426e-06, + "loss": 0.5944457054138184, + "step": 4387 + }, + { + "epoch": 1.5462555066079295, + "grad_norm": 1.9404009079173157, + "learning_rate": 2.690084398332692e-06, + "loss": 0.5931667685508728, + "step": 4388 + }, + { + "epoch": 1.5466079295154185, + "grad_norm": 1.607840859056915, + "learning_rate": 2.686109785400809e-06, + "loss": 0.6112217307090759, + "step": 4389 + }, + { + "epoch": 1.5469603524229076, + "grad_norm": 1.9090904865448288, + "learning_rate": 2.68213765530704e-06, + "loss": 0.549437940120697, + "step": 4390 + }, + { + "epoch": 1.5473127753303966, + "grad_norm": 1.9826888565576624, + "learning_rate": 2.6781680093997965e-06, + "loss": 0.674758791923523, + "step": 4391 + }, + { + "epoch": 1.5476651982378855, + "grad_norm": 1.874293916028551, + "learning_rate": 2.6742008490266504e-06, + "loss": 0.6015446186065674, + "step": 4392 + }, + { + "epoch": 1.5480176211453744, + "grad_norm": 2.2556212033260223, + "learning_rate": 2.6702361755343278e-06, + "loss": 0.5512514710426331, + "step": 4393 + }, + { + "epoch": 1.5483700440528634, + "grad_norm": 2.607818594949077, + "learning_rate": 2.666273990268713e-06, + "loss": 0.6443158984184265, + "step": 4394 + }, + { + "epoch": 1.5487224669603523, + "grad_norm": 1.840692212890546, + "learning_rate": 2.6623142945748447e-06, + "loss": 0.5682512521743774, + "step": 4395 + }, + { + "epoch": 1.5490748898678413, + "grad_norm": 2.173690182254911, + "learning_rate": 2.658357089796917e-06, + "loss": 0.5544074773788452, + "step": 4396 + }, + { + "epoch": 1.5494273127753304, + "grad_norm": 1.726846155573174, + "learning_rate": 2.6544023772782736e-06, + "loss": 0.5811636447906494, + "step": 4397 + }, + { + "epoch": 1.5497797356828193, + "grad_norm": 1.790573455353959, + "learning_rate": 2.650450158361422e-06, + "loss": 0.4696553647518158, + "step": 4398 + }, + { + "epoch": 1.5501321585903085, + "grad_norm": 1.918353319441468, + "learning_rate": 2.6465004343880153e-06, + "loss": 0.6897521615028381, + "step": 4399 + }, + { + "epoch": 1.5504845814977974, + "grad_norm": 1.9780672696205217, + "learning_rate": 2.6425532066988613e-06, + "loss": 0.6154924631118774, + "step": 4400 + }, + { + "epoch": 1.5508370044052864, + "grad_norm": 2.0803038103367815, + "learning_rate": 2.6386084766339214e-06, + "loss": 0.5333596467971802, + "step": 4401 + }, + { + "epoch": 1.5511894273127753, + "grad_norm": 1.8190921046801005, + "learning_rate": 2.634666245532309e-06, + "loss": 0.6633985042572021, + "step": 4402 + }, + { + "epoch": 1.5515418502202643, + "grad_norm": 1.6722563074159322, + "learning_rate": 2.630726514732289e-06, + "loss": 0.7913509607315063, + "step": 4403 + }, + { + "epoch": 1.5518942731277532, + "grad_norm": 1.8118598393520884, + "learning_rate": 2.6267892855712763e-06, + "loss": 0.5776455402374268, + "step": 4404 + }, + { + "epoch": 1.5522466960352423, + "grad_norm": 1.68862603841886, + "learning_rate": 2.6228545593858357e-06, + "loss": 0.5912357568740845, + "step": 4405 + }, + { + "epoch": 1.5525991189427313, + "grad_norm": 2.1281645633634274, + "learning_rate": 2.618922337511689e-06, + "loss": 0.49319127202033997, + "step": 4406 + }, + { + "epoch": 1.5529515418502202, + "grad_norm": 1.9165140700223777, + "learning_rate": 2.6149926212837016e-06, + "loss": 0.5805023908615112, + "step": 4407 + }, + { + "epoch": 1.5533039647577094, + "grad_norm": 1.8889646736612442, + "learning_rate": 2.6110654120358902e-06, + "loss": 0.5635806918144226, + "step": 4408 + }, + { + "epoch": 1.5536563876651983, + "grad_norm": 1.6927734818193383, + "learning_rate": 2.6071407111014178e-06, + "loss": 0.5006709694862366, + "step": 4409 + }, + { + "epoch": 1.5540088105726872, + "grad_norm": 2.1152391301202695, + "learning_rate": 2.6032185198126005e-06, + "loss": 0.6035311818122864, + "step": 4410 + }, + { + "epoch": 1.5543612334801762, + "grad_norm": 1.7863466908276826, + "learning_rate": 2.599298839500899e-06, + "loss": 0.5978977680206299, + "step": 4411 + }, + { + "epoch": 1.5547136563876651, + "grad_norm": 1.8741259739913476, + "learning_rate": 2.5953816714969194e-06, + "loss": 0.6330617070198059, + "step": 4412 + }, + { + "epoch": 1.555066079295154, + "grad_norm": 2.090756152665107, + "learning_rate": 2.591467017130426e-06, + "loss": 0.6541750431060791, + "step": 4413 + }, + { + "epoch": 1.5554185022026432, + "grad_norm": 2.229850729984303, + "learning_rate": 2.5875548777303204e-06, + "loss": 0.5503655076026917, + "step": 4414 + }, + { + "epoch": 1.5557709251101322, + "grad_norm": 1.7715926792210983, + "learning_rate": 2.583645254624645e-06, + "loss": 0.5117509365081787, + "step": 4415 + }, + { + "epoch": 1.5561233480176213, + "grad_norm": 2.2945620883910953, + "learning_rate": 2.5797381491406027e-06, + "loss": 0.6699894070625305, + "step": 4416 + }, + { + "epoch": 1.5564757709251102, + "grad_norm": 2.0695182526571765, + "learning_rate": 2.5758335626045308e-06, + "loss": 0.6870071291923523, + "step": 4417 + }, + { + "epoch": 1.5568281938325992, + "grad_norm": 1.7821456199762375, + "learning_rate": 2.571931496341916e-06, + "loss": 0.7680954933166504, + "step": 4418 + }, + { + "epoch": 1.5571806167400881, + "grad_norm": 2.2345593449058203, + "learning_rate": 2.568031951677389e-06, + "loss": 0.6504727602005005, + "step": 4419 + }, + { + "epoch": 1.557533039647577, + "grad_norm": 1.6796846625470907, + "learning_rate": 2.5641349299347196e-06, + "loss": 0.7101249098777771, + "step": 4420 + }, + { + "epoch": 1.557885462555066, + "grad_norm": 1.6800594114237326, + "learning_rate": 2.560240432436831e-06, + "loss": 0.5734864473342896, + "step": 4421 + }, + { + "epoch": 1.558237885462555, + "grad_norm": 2.065356657851052, + "learning_rate": 2.5563484605057854e-06, + "loss": 0.48660311102867126, + "step": 4422 + }, + { + "epoch": 1.558590308370044, + "grad_norm": 2.0678483817870847, + "learning_rate": 2.552459015462776e-06, + "loss": 0.6442986726760864, + "step": 4423 + }, + { + "epoch": 1.558942731277533, + "grad_norm": 1.721146259770593, + "learning_rate": 2.548572098628158e-06, + "loss": 0.5871995091438293, + "step": 4424 + }, + { + "epoch": 1.5592951541850222, + "grad_norm": 1.8900651182173844, + "learning_rate": 2.544687711321415e-06, + "loss": 0.5899579524993896, + "step": 4425 + }, + { + "epoch": 1.5596475770925111, + "grad_norm": 1.589021195095579, + "learning_rate": 2.540805854861177e-06, + "loss": 0.571341872215271, + "step": 4426 + }, + { + "epoch": 1.56, + "grad_norm": 2.074587814486514, + "learning_rate": 2.5369265305652112e-06, + "loss": 0.6297308206558228, + "step": 4427 + }, + { + "epoch": 1.560352422907489, + "grad_norm": 1.957815284803115, + "learning_rate": 2.5330497397504274e-06, + "loss": 0.6277692317962646, + "step": 4428 + }, + { + "epoch": 1.560704845814978, + "grad_norm": 1.8075270549654299, + "learning_rate": 2.5291754837328787e-06, + "loss": 0.5124595165252686, + "step": 4429 + }, + { + "epoch": 1.5610572687224669, + "grad_norm": 2.027466093132035, + "learning_rate": 2.5253037638277557e-06, + "loss": 0.6777669191360474, + "step": 4430 + }, + { + "epoch": 1.5614096916299558, + "grad_norm": 2.299371691906574, + "learning_rate": 2.521434581349378e-06, + "loss": 0.7380247116088867, + "step": 4431 + }, + { + "epoch": 1.561762114537445, + "grad_norm": 2.0566157739817825, + "learning_rate": 2.5175679376112206e-06, + "loss": 0.6605849266052246, + "step": 4432 + }, + { + "epoch": 1.562114537444934, + "grad_norm": 1.7899790415054606, + "learning_rate": 2.5137038339258837e-06, + "loss": 0.5688329935073853, + "step": 4433 + }, + { + "epoch": 1.562466960352423, + "grad_norm": 2.1227992795896258, + "learning_rate": 2.5098422716051197e-06, + "loss": 0.6731508374214172, + "step": 4434 + }, + { + "epoch": 1.562819383259912, + "grad_norm": 1.766889438914358, + "learning_rate": 2.505983251959798e-06, + "loss": 0.5177330374717712, + "step": 4435 + }, + { + "epoch": 1.563171806167401, + "grad_norm": 1.793841264632356, + "learning_rate": 2.502126776299938e-06, + "loss": 0.5307918787002563, + "step": 4436 + }, + { + "epoch": 1.5635242290748899, + "grad_norm": 1.8402321267228738, + "learning_rate": 2.4982728459346974e-06, + "loss": 0.59647536277771, + "step": 4437 + }, + { + "epoch": 1.5638766519823788, + "grad_norm": 2.049156650890273, + "learning_rate": 2.494421462172365e-06, + "loss": 0.6215553283691406, + "step": 4438 + }, + { + "epoch": 1.5642290748898677, + "grad_norm": 1.7976631043220852, + "learning_rate": 2.490572626320359e-06, + "loss": 0.49461615085601807, + "step": 4439 + }, + { + "epoch": 1.5645814977973567, + "grad_norm": 2.4138380625358757, + "learning_rate": 2.486726339685247e-06, + "loss": 0.6625338196754456, + "step": 4440 + }, + { + "epoch": 1.5649339207048458, + "grad_norm": 1.5979739892152505, + "learning_rate": 2.4828826035727214e-06, + "loss": 0.4059983193874359, + "step": 4441 + }, + { + "epoch": 1.5652863436123348, + "grad_norm": 1.7298713789472393, + "learning_rate": 2.47904141928761e-06, + "loss": 0.6234895586967468, + "step": 4442 + }, + { + "epoch": 1.565638766519824, + "grad_norm": 1.8282339040044808, + "learning_rate": 2.4752027881338757e-06, + "loss": 0.513421893119812, + "step": 4443 + }, + { + "epoch": 1.5659911894273129, + "grad_norm": 2.0213648562049693, + "learning_rate": 2.4713667114146123e-06, + "loss": 0.6168510913848877, + "step": 4444 + }, + { + "epoch": 1.5663436123348018, + "grad_norm": 1.8904853102151467, + "learning_rate": 2.4675331904320533e-06, + "loss": 0.5474672317504883, + "step": 4445 + }, + { + "epoch": 1.5666960352422907, + "grad_norm": 2.020157324166176, + "learning_rate": 2.46370222648756e-06, + "loss": 0.7464281916618347, + "step": 4446 + }, + { + "epoch": 1.5670484581497797, + "grad_norm": 1.8187430699226648, + "learning_rate": 2.4598738208816155e-06, + "loss": 0.5890274047851562, + "step": 4447 + }, + { + "epoch": 1.5674008810572686, + "grad_norm": 2.0160604417207293, + "learning_rate": 2.4560479749138554e-06, + "loss": 0.7577700018882751, + "step": 4448 + }, + { + "epoch": 1.5677533039647578, + "grad_norm": 1.6711759350664435, + "learning_rate": 2.4522246898830302e-06, + "loss": 0.5374037027359009, + "step": 4449 + }, + { + "epoch": 1.5681057268722467, + "grad_norm": 1.7947512315133625, + "learning_rate": 2.4484039670870286e-06, + "loss": 0.44840407371520996, + "step": 4450 + }, + { + "epoch": 1.5684581497797356, + "grad_norm": 1.8087906354095658, + "learning_rate": 2.4445858078228647e-06, + "loss": 0.5144427418708801, + "step": 4451 + }, + { + "epoch": 1.5688105726872248, + "grad_norm": 1.7889124821216469, + "learning_rate": 2.440770213386684e-06, + "loss": 0.39119952917099, + "step": 4452 + }, + { + "epoch": 1.5691629955947137, + "grad_norm": 1.6376212389282347, + "learning_rate": 2.436957185073766e-06, + "loss": 0.5287434458732605, + "step": 4453 + }, + { + "epoch": 1.5695154185022027, + "grad_norm": 2.2578778571267315, + "learning_rate": 2.4331467241785157e-06, + "loss": 0.568587064743042, + "step": 4454 + }, + { + "epoch": 1.5698678414096916, + "grad_norm": 3.2977149916111608, + "learning_rate": 2.429338831994458e-06, + "loss": 0.5522792339324951, + "step": 4455 + }, + { + "epoch": 1.5702202643171805, + "grad_norm": 1.7594156491061212, + "learning_rate": 2.425533509814262e-06, + "loss": 0.48070845007896423, + "step": 4456 + }, + { + "epoch": 1.5705726872246695, + "grad_norm": 1.631888097687176, + "learning_rate": 2.4217307589297135e-06, + "loss": 0.44293439388275146, + "step": 4457 + }, + { + "epoch": 1.5709251101321586, + "grad_norm": 1.933449446432769, + "learning_rate": 2.4179305806317266e-06, + "loss": 0.5753301382064819, + "step": 4458 + }, + { + "epoch": 1.5712775330396476, + "grad_norm": 1.9958241636570169, + "learning_rate": 2.414132976210346e-06, + "loss": 0.5873000025749207, + "step": 4459 + }, + { + "epoch": 1.5716299559471367, + "grad_norm": 2.2068877987049955, + "learning_rate": 2.410337946954736e-06, + "loss": 0.6084823608398438, + "step": 4460 + }, + { + "epoch": 1.5719823788546257, + "grad_norm": 1.743876311662913, + "learning_rate": 2.4065454941531963e-06, + "loss": 0.541124165058136, + "step": 4461 + }, + { + "epoch": 1.5723348017621146, + "grad_norm": 1.8080812306830252, + "learning_rate": 2.4027556190931446e-06, + "loss": 0.5170080661773682, + "step": 4462 + }, + { + "epoch": 1.5726872246696035, + "grad_norm": 1.817245899938438, + "learning_rate": 2.398968323061125e-06, + "loss": 0.5613514184951782, + "step": 4463 + }, + { + "epoch": 1.5730396475770925, + "grad_norm": 1.7097401781842303, + "learning_rate": 2.395183607342807e-06, + "loss": 0.6645728349685669, + "step": 4464 + }, + { + "epoch": 1.5733920704845814, + "grad_norm": 1.8730205237982336, + "learning_rate": 2.391401473222983e-06, + "loss": 0.7077093124389648, + "step": 4465 + }, + { + "epoch": 1.5737444933920703, + "grad_norm": 1.7460518248753176, + "learning_rate": 2.387621921985571e-06, + "loss": 0.5687523484230042, + "step": 4466 + }, + { + "epoch": 1.5740969162995595, + "grad_norm": 1.9850945169232843, + "learning_rate": 2.38384495491361e-06, + "loss": 0.5837362408638, + "step": 4467 + }, + { + "epoch": 1.5744493392070484, + "grad_norm": 2.051593268912329, + "learning_rate": 2.3800705732892615e-06, + "loss": 0.5552037358283997, + "step": 4468 + }, + { + "epoch": 1.5748017621145376, + "grad_norm": 1.8128967121473578, + "learning_rate": 2.376298778393814e-06, + "loss": 0.5502952337265015, + "step": 4469 + }, + { + "epoch": 1.5751541850220265, + "grad_norm": 1.958629504700592, + "learning_rate": 2.3725295715076734e-06, + "loss": 0.5621509552001953, + "step": 4470 + }, + { + "epoch": 1.5755066079295155, + "grad_norm": 2.20917213599842, + "learning_rate": 2.3687629539103676e-06, + "loss": 0.6703782081604004, + "step": 4471 + }, + { + "epoch": 1.5758590308370044, + "grad_norm": 1.6659443121840707, + "learning_rate": 2.3649989268805453e-06, + "loss": 0.5681235194206238, + "step": 4472 + }, + { + "epoch": 1.5762114537444933, + "grad_norm": 1.6009126465101926, + "learning_rate": 2.361237491695978e-06, + "loss": 0.611667811870575, + "step": 4473 + }, + { + "epoch": 1.5765638766519823, + "grad_norm": 1.7200740539010873, + "learning_rate": 2.3574786496335546e-06, + "loss": 0.5758671760559082, + "step": 4474 + }, + { + "epoch": 1.5769162995594712, + "grad_norm": 1.9125579541010735, + "learning_rate": 2.3537224019692863e-06, + "loss": 0.4865596294403076, + "step": 4475 + }, + { + "epoch": 1.5772687224669604, + "grad_norm": 1.8564502689111453, + "learning_rate": 2.3499687499782976e-06, + "loss": 0.6356204152107239, + "step": 4476 + }, + { + "epoch": 1.5776211453744493, + "grad_norm": 2.1421860610476022, + "learning_rate": 2.346217694934847e-06, + "loss": 0.7177166938781738, + "step": 4477 + }, + { + "epoch": 1.5779735682819385, + "grad_norm": 1.5480906826266605, + "learning_rate": 2.3424692381122882e-06, + "loss": 0.5727916955947876, + "step": 4478 + }, + { + "epoch": 1.5783259911894274, + "grad_norm": 1.8719733775312895, + "learning_rate": 2.3387233807831144e-06, + "loss": 0.4904511570930481, + "step": 4479 + }, + { + "epoch": 1.5786784140969163, + "grad_norm": 1.781780296857209, + "learning_rate": 2.3349801242189262e-06, + "loss": 0.6029622554779053, + "step": 4480 + }, + { + "epoch": 1.5790308370044053, + "grad_norm": 1.7377028122196188, + "learning_rate": 2.3312394696904404e-06, + "loss": 0.6462864875793457, + "step": 4481 + }, + { + "epoch": 1.5793832599118942, + "grad_norm": 2.2050402923740555, + "learning_rate": 2.327501418467495e-06, + "loss": 0.6000367403030396, + "step": 4482 + }, + { + "epoch": 1.5797356828193831, + "grad_norm": 1.8056795992302546, + "learning_rate": 2.3237659718190398e-06, + "loss": 0.5498829483985901, + "step": 4483 + }, + { + "epoch": 1.580088105726872, + "grad_norm": 1.9193344841770834, + "learning_rate": 2.320033131013142e-06, + "loss": 0.5445006489753723, + "step": 4484 + }, + { + "epoch": 1.5804405286343612, + "grad_norm": 1.737360484366453, + "learning_rate": 2.316302897316992e-06, + "loss": 0.4878338575363159, + "step": 4485 + }, + { + "epoch": 1.5807929515418502, + "grad_norm": 1.9395301127212525, + "learning_rate": 2.3125752719968763e-06, + "loss": 0.473583459854126, + "step": 4486 + }, + { + "epoch": 1.5811453744493393, + "grad_norm": 1.885736275905952, + "learning_rate": 2.308850256318218e-06, + "loss": 0.6530570983886719, + "step": 4487 + }, + { + "epoch": 1.5814977973568283, + "grad_norm": 1.9957270393411881, + "learning_rate": 2.30512785154554e-06, + "loss": 0.6925215721130371, + "step": 4488 + }, + { + "epoch": 1.5818502202643172, + "grad_norm": 2.319012517660613, + "learning_rate": 2.3014080589424837e-06, + "loss": 0.6210705637931824, + "step": 4489 + }, + { + "epoch": 1.5822026431718061, + "grad_norm": 1.9814470349632005, + "learning_rate": 2.2976908797718013e-06, + "loss": 0.5843231678009033, + "step": 4490 + }, + { + "epoch": 1.582555066079295, + "grad_norm": 1.8411432529202023, + "learning_rate": 2.2939763152953576e-06, + "loss": 0.7014307379722595, + "step": 4491 + }, + { + "epoch": 1.582907488986784, + "grad_norm": 2.432500927945977, + "learning_rate": 2.2902643667741386e-06, + "loss": 0.563744843006134, + "step": 4492 + }, + { + "epoch": 1.5832599118942732, + "grad_norm": 2.0467865020897227, + "learning_rate": 2.286555035468233e-06, + "loss": 0.6067275404930115, + "step": 4493 + }, + { + "epoch": 1.5836123348017621, + "grad_norm": 1.4471777617782167, + "learning_rate": 2.282848322636836e-06, + "loss": 0.5471328496932983, + "step": 4494 + }, + { + "epoch": 1.583964757709251, + "grad_norm": 1.8188988721843682, + "learning_rate": 2.2791442295382693e-06, + "loss": 0.4994550943374634, + "step": 4495 + }, + { + "epoch": 1.5843171806167402, + "grad_norm": 1.9672025899108128, + "learning_rate": 2.275442757429954e-06, + "loss": 0.6064262390136719, + "step": 4496 + }, + { + "epoch": 1.5846696035242291, + "grad_norm": 1.8109350365291292, + "learning_rate": 2.2717439075684268e-06, + "loss": 0.5119039416313171, + "step": 4497 + }, + { + "epoch": 1.585022026431718, + "grad_norm": 2.2031235285356883, + "learning_rate": 2.26804768120933e-06, + "loss": 0.7276502251625061, + "step": 4498 + }, + { + "epoch": 1.585374449339207, + "grad_norm": 2.0480046358265827, + "learning_rate": 2.264354079607416e-06, + "loss": 0.6175409555435181, + "step": 4499 + }, + { + "epoch": 1.585726872246696, + "grad_norm": 2.165546737643913, + "learning_rate": 2.2606631040165517e-06, + "loss": 0.6289592981338501, + "step": 4500 + }, + { + "epoch": 1.5860792951541849, + "grad_norm": 1.626913781336784, + "learning_rate": 2.2569747556897103e-06, + "loss": 0.5802761316299438, + "step": 4501 + }, + { + "epoch": 1.586431718061674, + "grad_norm": 1.6717876401169283, + "learning_rate": 2.2532890358789604e-06, + "loss": 0.5883978605270386, + "step": 4502 + }, + { + "epoch": 1.586784140969163, + "grad_norm": 1.7433478934489002, + "learning_rate": 2.2496059458355e-06, + "loss": 0.6915061473846436, + "step": 4503 + }, + { + "epoch": 1.5871365638766521, + "grad_norm": 1.7904879000491816, + "learning_rate": 2.2459254868096194e-06, + "loss": 0.6255539655685425, + "step": 4504 + }, + { + "epoch": 1.587488986784141, + "grad_norm": 2.0290072373401706, + "learning_rate": 2.2422476600507203e-06, + "loss": 0.6788307428359985, + "step": 4505 + }, + { + "epoch": 1.58784140969163, + "grad_norm": 1.8646329547804459, + "learning_rate": 2.2385724668073104e-06, + "loss": 0.5651443004608154, + "step": 4506 + }, + { + "epoch": 1.588193832599119, + "grad_norm": 1.6858252262208455, + "learning_rate": 2.2348999083270005e-06, + "loss": 0.5308901071548462, + "step": 4507 + }, + { + "epoch": 1.5885462555066079, + "grad_norm": 2.3264820621642084, + "learning_rate": 2.2312299858565156e-06, + "loss": 0.60570228099823, + "step": 4508 + }, + { + "epoch": 1.5888986784140968, + "grad_norm": 1.8330509972931788, + "learning_rate": 2.22756270064168e-06, + "loss": 0.6544185876846313, + "step": 4509 + }, + { + "epoch": 1.5892511013215858, + "grad_norm": 1.7565673285953047, + "learning_rate": 2.2238980539274156e-06, + "loss": 0.667883038520813, + "step": 4510 + }, + { + "epoch": 1.589603524229075, + "grad_norm": 1.7707733782287267, + "learning_rate": 2.2202360469577622e-06, + "loss": 0.647671103477478, + "step": 4511 + }, + { + "epoch": 1.5899559471365639, + "grad_norm": 1.8031539733499908, + "learning_rate": 2.216576680975856e-06, + "loss": 0.6990867257118225, + "step": 4512 + }, + { + "epoch": 1.590308370044053, + "grad_norm": 1.6913080596921681, + "learning_rate": 2.212919957223938e-06, + "loss": 0.6292023658752441, + "step": 4513 + }, + { + "epoch": 1.590660792951542, + "grad_norm": 2.0512598736304763, + "learning_rate": 2.2092658769433504e-06, + "loss": 0.638721227645874, + "step": 4514 + }, + { + "epoch": 1.5910132158590309, + "grad_norm": 2.0710919586830365, + "learning_rate": 2.2056144413745396e-06, + "loss": 0.5622225403785706, + "step": 4515 + }, + { + "epoch": 1.5913656387665198, + "grad_norm": 1.9225600729192178, + "learning_rate": 2.2019656517570576e-06, + "loss": 0.44093507528305054, + "step": 4516 + }, + { + "epoch": 1.5917180616740088, + "grad_norm": 1.9689195876449703, + "learning_rate": 2.198319509329556e-06, + "loss": 0.6889619827270508, + "step": 4517 + }, + { + "epoch": 1.5920704845814977, + "grad_norm": 1.8723694409082583, + "learning_rate": 2.1946760153297773e-06, + "loss": 0.5873552560806274, + "step": 4518 + }, + { + "epoch": 1.5924229074889866, + "grad_norm": 2.3733819724747245, + "learning_rate": 2.191035170994584e-06, + "loss": 0.7172325849533081, + "step": 4519 + }, + { + "epoch": 1.5927753303964758, + "grad_norm": 1.5631566998768178, + "learning_rate": 2.187396977559927e-06, + "loss": 0.520845890045166, + "step": 4520 + }, + { + "epoch": 1.5931277533039647, + "grad_norm": 1.5657344992000655, + "learning_rate": 2.1837614362608574e-06, + "loss": 0.5241606831550598, + "step": 4521 + }, + { + "epoch": 1.5934801762114539, + "grad_norm": 2.0290302307971433, + "learning_rate": 2.1801285483315303e-06, + "loss": 0.583808422088623, + "step": 4522 + }, + { + "epoch": 1.5938325991189428, + "grad_norm": 1.829890026298915, + "learning_rate": 2.1764983150051955e-06, + "loss": 0.4648814797401428, + "step": 4523 + }, + { + "epoch": 1.5941850220264318, + "grad_norm": 1.9603824667877958, + "learning_rate": 2.1728707375142087e-06, + "loss": 0.590090274810791, + "step": 4524 + }, + { + "epoch": 1.5945374449339207, + "grad_norm": 2.0292397946897527, + "learning_rate": 2.16924581709002e-06, + "loss": 0.6554102897644043, + "step": 4525 + }, + { + "epoch": 1.5948898678414096, + "grad_norm": 2.011864917811992, + "learning_rate": 2.1656235549631677e-06, + "loss": 0.5880511999130249, + "step": 4526 + }, + { + "epoch": 1.5952422907488986, + "grad_norm": 1.6246832017365502, + "learning_rate": 2.1620039523633074e-06, + "loss": 0.5779908299446106, + "step": 4527 + }, + { + "epoch": 1.5955947136563877, + "grad_norm": 1.9147900218294176, + "learning_rate": 2.1583870105191775e-06, + "loss": 0.5030412673950195, + "step": 4528 + }, + { + "epoch": 1.5959471365638767, + "grad_norm": 1.9632795275127009, + "learning_rate": 2.1547727306586173e-06, + "loss": 0.5667461156845093, + "step": 4529 + }, + { + "epoch": 1.5962995594713656, + "grad_norm": 2.3190730605108882, + "learning_rate": 2.151161114008563e-06, + "loss": 0.6820607781410217, + "step": 4530 + }, + { + "epoch": 1.5966519823788548, + "grad_norm": 1.7640709477354637, + "learning_rate": 2.1475521617950425e-06, + "loss": 0.6165209412574768, + "step": 4531 + }, + { + "epoch": 1.5970044052863437, + "grad_norm": 1.897918487033638, + "learning_rate": 2.1439458752431887e-06, + "loss": 0.5987168550491333, + "step": 4532 + }, + { + "epoch": 1.5973568281938326, + "grad_norm": 1.8946893490374197, + "learning_rate": 2.1403422555772226e-06, + "loss": 0.5161086320877075, + "step": 4533 + }, + { + "epoch": 1.5977092511013216, + "grad_norm": 1.817150642667859, + "learning_rate": 2.1367413040204543e-06, + "loss": 0.5216903686523438, + "step": 4534 + }, + { + "epoch": 1.5980616740088105, + "grad_norm": 1.7820775067820096, + "learning_rate": 2.133143021795302e-06, + "loss": 0.5664666891098022, + "step": 4535 + }, + { + "epoch": 1.5984140969162994, + "grad_norm": 1.8205676682468495, + "learning_rate": 2.129547410123268e-06, + "loss": 0.501051127910614, + "step": 4536 + }, + { + "epoch": 1.5987665198237886, + "grad_norm": 1.5799563385798543, + "learning_rate": 2.1259544702249515e-06, + "loss": 0.5466792583465576, + "step": 4537 + }, + { + "epoch": 1.5991189427312775, + "grad_norm": 1.9007615560911546, + "learning_rate": 2.122364203320043e-06, + "loss": 0.5295613408088684, + "step": 4538 + }, + { + "epoch": 1.5994713656387667, + "grad_norm": 1.6670646942482272, + "learning_rate": 2.1187766106273224e-06, + "loss": 0.5406922101974487, + "step": 4539 + }, + { + "epoch": 1.5998237885462556, + "grad_norm": 2.0700620230157125, + "learning_rate": 2.1151916933646764e-06, + "loss": 0.5908178687095642, + "step": 4540 + }, + { + "epoch": 1.6001762114537446, + "grad_norm": 1.8405525752725544, + "learning_rate": 2.1116094527490594e-06, + "loss": 0.6207743883132935, + "step": 4541 + }, + { + "epoch": 1.6005286343612335, + "grad_norm": 2.7642600887250652, + "learning_rate": 2.1080298899965413e-06, + "loss": 0.5655614137649536, + "step": 4542 + }, + { + "epoch": 1.6008810572687224, + "grad_norm": 1.5764846584358823, + "learning_rate": 2.104453006322268e-06, + "loss": 0.6019319295883179, + "step": 4543 + }, + { + "epoch": 1.6012334801762114, + "grad_norm": 1.8499785252270624, + "learning_rate": 2.1008788029404794e-06, + "loss": 0.6109766364097595, + "step": 4544 + }, + { + "epoch": 1.6015859030837003, + "grad_norm": 1.8285934792669327, + "learning_rate": 2.0973072810645078e-06, + "loss": 0.5309078693389893, + "step": 4545 + }, + { + "epoch": 1.6019383259911895, + "grad_norm": 1.7116030885611606, + "learning_rate": 2.093738441906774e-06, + "loss": 0.5440298318862915, + "step": 4546 + }, + { + "epoch": 1.6022907488986784, + "grad_norm": 1.6012955775631803, + "learning_rate": 2.0901722866787842e-06, + "loss": 0.46502384543418884, + "step": 4547 + }, + { + "epoch": 1.6026431718061676, + "grad_norm": 1.7999501734847188, + "learning_rate": 2.086608816591146e-06, + "loss": 0.4822906255722046, + "step": 4548 + }, + { + "epoch": 1.6029955947136565, + "grad_norm": 1.8169323717501906, + "learning_rate": 2.083048032853534e-06, + "loss": 0.6382625699043274, + "step": 4549 + }, + { + "epoch": 1.6033480176211454, + "grad_norm": 1.7542851479568786, + "learning_rate": 2.0794899366747334e-06, + "loss": 0.6070914268493652, + "step": 4550 + }, + { + "epoch": 1.6037004405286344, + "grad_norm": 1.8496689505105712, + "learning_rate": 2.0759345292626045e-06, + "loss": 0.5953283309936523, + "step": 4551 + }, + { + "epoch": 1.6040528634361233, + "grad_norm": 1.6448363622587787, + "learning_rate": 2.0723818118240958e-06, + "loss": 0.47553640604019165, + "step": 4552 + }, + { + "epoch": 1.6044052863436122, + "grad_norm": 2.007835441279153, + "learning_rate": 2.0688317855652463e-06, + "loss": 0.7020712494850159, + "step": 4553 + }, + { + "epoch": 1.6047577092511012, + "grad_norm": 1.739770344308816, + "learning_rate": 2.0652844516911776e-06, + "loss": 0.5998836159706116, + "step": 4554 + }, + { + "epoch": 1.6051101321585903, + "grad_norm": 1.7690620328907303, + "learning_rate": 2.0617398114060983e-06, + "loss": 0.6501786708831787, + "step": 4555 + }, + { + "epoch": 1.6054625550660793, + "grad_norm": 1.7628232586759778, + "learning_rate": 2.0581978659133097e-06, + "loss": 0.6444278955459595, + "step": 4556 + }, + { + "epoch": 1.6058149779735684, + "grad_norm": 1.8812364367093761, + "learning_rate": 2.0546586164151827e-06, + "loss": 0.6756579875946045, + "step": 4557 + }, + { + "epoch": 1.6061674008810574, + "grad_norm": 1.9541887465796286, + "learning_rate": 2.051122064113189e-06, + "loss": 0.6043737530708313, + "step": 4558 + }, + { + "epoch": 1.6065198237885463, + "grad_norm": 1.7992795463772795, + "learning_rate": 2.047588210207877e-06, + "loss": 0.6504104137420654, + "step": 4559 + }, + { + "epoch": 1.6068722466960352, + "grad_norm": 1.8447157864854533, + "learning_rate": 2.044057055898879e-06, + "loss": 0.6586685180664062, + "step": 4560 + }, + { + "epoch": 1.6072246696035242, + "grad_norm": 1.6895598009184531, + "learning_rate": 2.0405286023849125e-06, + "loss": 0.4463368058204651, + "step": 4561 + }, + { + "epoch": 1.607577092511013, + "grad_norm": 1.626067629091748, + "learning_rate": 2.037002850863777e-06, + "loss": 0.5208157896995544, + "step": 4562 + }, + { + "epoch": 1.607929515418502, + "grad_norm": 2.325947552099387, + "learning_rate": 2.033479802532354e-06, + "loss": 0.612602174282074, + "step": 4563 + }, + { + "epoch": 1.6082819383259912, + "grad_norm": 1.8677335810734068, + "learning_rate": 2.0299594585866166e-06, + "loss": 0.6871482133865356, + "step": 4564 + }, + { + "epoch": 1.6086343612334801, + "grad_norm": 2.1450630320575863, + "learning_rate": 2.0264418202215998e-06, + "loss": 0.5770177245140076, + "step": 4565 + }, + { + "epoch": 1.6089867841409693, + "grad_norm": 2.0018570918486263, + "learning_rate": 2.0229268886314413e-06, + "loss": 0.600841224193573, + "step": 4566 + }, + { + "epoch": 1.6093392070484582, + "grad_norm": 1.4951834973656204, + "learning_rate": 2.0194146650093494e-06, + "loss": 0.47742071747779846, + "step": 4567 + }, + { + "epoch": 1.6096916299559472, + "grad_norm": 1.932667797658379, + "learning_rate": 2.015905150547612e-06, + "loss": 0.5528711080551147, + "step": 4568 + }, + { + "epoch": 1.610044052863436, + "grad_norm": 1.7893968437532208, + "learning_rate": 2.0123983464376028e-06, + "loss": 0.6892603635787964, + "step": 4569 + }, + { + "epoch": 1.610396475770925, + "grad_norm": 2.0432539431091405, + "learning_rate": 2.0088942538697687e-06, + "loss": 0.593653678894043, + "step": 4570 + }, + { + "epoch": 1.610748898678414, + "grad_norm": 1.913622035178548, + "learning_rate": 2.005392874033646e-06, + "loss": 0.5570813417434692, + "step": 4571 + }, + { + "epoch": 1.6111013215859031, + "grad_norm": 1.7912413841249368, + "learning_rate": 2.0018942081178426e-06, + "loss": 0.6723357439041138, + "step": 4572 + }, + { + "epoch": 1.611453744493392, + "grad_norm": 1.8833118579628767, + "learning_rate": 1.9983982573100413e-06, + "loss": 0.5333940982818604, + "step": 4573 + }, + { + "epoch": 1.611806167400881, + "grad_norm": 2.1881508790927358, + "learning_rate": 1.9949050227970148e-06, + "loss": 0.6404193043708801, + "step": 4574 + }, + { + "epoch": 1.6121585903083702, + "grad_norm": 1.9103565569987608, + "learning_rate": 1.991414505764605e-06, + "loss": 0.6831241250038147, + "step": 4575 + }, + { + "epoch": 1.612511013215859, + "grad_norm": 2.3229832844307063, + "learning_rate": 1.9879267073977337e-06, + "loss": 0.6741847991943359, + "step": 4576 + }, + { + "epoch": 1.612863436123348, + "grad_norm": 1.843434925588856, + "learning_rate": 1.9844416288804004e-06, + "loss": 0.5234787464141846, + "step": 4577 + }, + { + "epoch": 1.613215859030837, + "grad_norm": 1.931234115746558, + "learning_rate": 1.9809592713956782e-06, + "loss": 0.6462803483009338, + "step": 4578 + }, + { + "epoch": 1.613568281938326, + "grad_norm": 2.495392945939654, + "learning_rate": 1.977479636125724e-06, + "loss": 0.612025797367096, + "step": 4579 + }, + { + "epoch": 1.6139207048458148, + "grad_norm": 1.6414504893846202, + "learning_rate": 1.9740027242517668e-06, + "loss": 0.5065322518348694, + "step": 4580 + }, + { + "epoch": 1.614273127753304, + "grad_norm": 1.9613495904560583, + "learning_rate": 1.9705285369540994e-06, + "loss": 0.4986911714076996, + "step": 4581 + }, + { + "epoch": 1.614625550660793, + "grad_norm": 2.1185650604413926, + "learning_rate": 1.967057075412111e-06, + "loss": 0.6030969619750977, + "step": 4582 + }, + { + "epoch": 1.614977973568282, + "grad_norm": 1.8032946015429019, + "learning_rate": 1.963588340804251e-06, + "loss": 0.6116718649864197, + "step": 4583 + }, + { + "epoch": 1.615330396475771, + "grad_norm": 1.9008591407855147, + "learning_rate": 1.960122334308047e-06, + "loss": 0.8064850568771362, + "step": 4584 + }, + { + "epoch": 1.61568281938326, + "grad_norm": 2.130250646945173, + "learning_rate": 1.9566590571000997e-06, + "loss": 0.7416974306106567, + "step": 4585 + }, + { + "epoch": 1.616035242290749, + "grad_norm": 2.0285944926888604, + "learning_rate": 1.9531985103560813e-06, + "loss": 0.48169833421707153, + "step": 4586 + }, + { + "epoch": 1.6163876651982378, + "grad_norm": 8.08226040018375, + "learning_rate": 1.949740695250746e-06, + "loss": 0.7766422629356384, + "step": 4587 + }, + { + "epoch": 1.6167400881057268, + "grad_norm": 1.6227557131714891, + "learning_rate": 1.9462856129579144e-06, + "loss": 0.3793888986110687, + "step": 4588 + }, + { + "epoch": 1.6170925110132157, + "grad_norm": 1.6662726387585254, + "learning_rate": 1.94283326465047e-06, + "loss": 0.6129955053329468, + "step": 4589 + }, + { + "epoch": 1.6174449339207049, + "grad_norm": 1.927411767174183, + "learning_rate": 1.9393836515003874e-06, + "loss": 0.7420347929000854, + "step": 4590 + }, + { + "epoch": 1.6177973568281938, + "grad_norm": 1.810002162071199, + "learning_rate": 1.9359367746786993e-06, + "loss": 0.49013108015060425, + "step": 4591 + }, + { + "epoch": 1.618149779735683, + "grad_norm": 1.8150752517575908, + "learning_rate": 1.932492635355513e-06, + "loss": 0.5198413133621216, + "step": 4592 + }, + { + "epoch": 1.618502202643172, + "grad_norm": 1.9402976415289777, + "learning_rate": 1.929051234700007e-06, + "loss": 0.6031092405319214, + "step": 4593 + }, + { + "epoch": 1.6188546255506608, + "grad_norm": 2.041490312444486, + "learning_rate": 1.9256125738804264e-06, + "loss": 0.6269406080245972, + "step": 4594 + }, + { + "epoch": 1.6192070484581498, + "grad_norm": 1.801972947869227, + "learning_rate": 1.922176654064096e-06, + "loss": 0.4518774747848511, + "step": 4595 + }, + { + "epoch": 1.6195594713656387, + "grad_norm": 1.8680481961289441, + "learning_rate": 1.9187434764174027e-06, + "loss": 0.6199424862861633, + "step": 4596 + }, + { + "epoch": 1.6199118942731277, + "grad_norm": 2.634014207343412, + "learning_rate": 1.9153130421057955e-06, + "loss": 0.5155355930328369, + "step": 4597 + }, + { + "epoch": 1.6202643171806166, + "grad_norm": 1.8081505074484028, + "learning_rate": 1.9118853522938087e-06, + "loss": 0.6188424229621887, + "step": 4598 + }, + { + "epoch": 1.6206167400881057, + "grad_norm": 1.8999856535081827, + "learning_rate": 1.908460408145033e-06, + "loss": 0.5807337164878845, + "step": 4599 + }, + { + "epoch": 1.6209691629955947, + "grad_norm": 1.6142171687185456, + "learning_rate": 1.9050382108221311e-06, + "loss": 0.5258378982543945, + "step": 4600 + }, + { + "epoch": 1.6213215859030838, + "grad_norm": 1.9194714558474444, + "learning_rate": 1.9016187614868308e-06, + "loss": 0.6612311601638794, + "step": 4601 + }, + { + "epoch": 1.6216740088105728, + "grad_norm": 1.7849999472385678, + "learning_rate": 1.8982020612999285e-06, + "loss": 0.611383855342865, + "step": 4602 + }, + { + "epoch": 1.6220264317180617, + "grad_norm": 1.7599275323638883, + "learning_rate": 1.894788111421294e-06, + "loss": 0.6111105680465698, + "step": 4603 + }, + { + "epoch": 1.6223788546255506, + "grad_norm": 2.061255928544227, + "learning_rate": 1.8913769130098504e-06, + "loss": 0.7554557919502258, + "step": 4604 + }, + { + "epoch": 1.6227312775330396, + "grad_norm": 1.7818402726516558, + "learning_rate": 1.887968467223591e-06, + "loss": 0.597324013710022, + "step": 4605 + }, + { + "epoch": 1.6230837004405285, + "grad_norm": 2.3192399293978014, + "learning_rate": 1.8845627752195839e-06, + "loss": 0.6232750415802002, + "step": 4606 + }, + { + "epoch": 1.6234361233480175, + "grad_norm": 1.7697166073683794, + "learning_rate": 1.8811598381539543e-06, + "loss": 0.45699936151504517, + "step": 4607 + }, + { + "epoch": 1.6237885462555066, + "grad_norm": 1.9980768091261172, + "learning_rate": 1.87775965718189e-06, + "loss": 0.5307953953742981, + "step": 4608 + }, + { + "epoch": 1.6241409691629956, + "grad_norm": 1.8817640717556428, + "learning_rate": 1.8743622334576495e-06, + "loss": 0.6013764142990112, + "step": 4609 + }, + { + "epoch": 1.6244933920704847, + "grad_norm": 2.0614740198183066, + "learning_rate": 1.8709675681345485e-06, + "loss": 0.5143340826034546, + "step": 4610 + }, + { + "epoch": 1.6248458149779736, + "grad_norm": 1.6895900050976231, + "learning_rate": 1.8675756623649788e-06, + "loss": 0.506861686706543, + "step": 4611 + }, + { + "epoch": 1.6251982378854626, + "grad_norm": 2.223885866703504, + "learning_rate": 1.8641865173003793e-06, + "loss": 0.6807849407196045, + "step": 4612 + }, + { + "epoch": 1.6255506607929515, + "grad_norm": 1.8930990565263293, + "learning_rate": 1.8608001340912573e-06, + "loss": 0.592629075050354, + "step": 4613 + }, + { + "epoch": 1.6259030837004405, + "grad_norm": 2.032831166123834, + "learning_rate": 1.8574165138871925e-06, + "loss": 0.5669249296188354, + "step": 4614 + }, + { + "epoch": 1.6262555066079294, + "grad_norm": 1.9071887451281335, + "learning_rate": 1.8540356578368135e-06, + "loss": 0.7123057246208191, + "step": 4615 + }, + { + "epoch": 1.6266079295154185, + "grad_norm": 1.7499585996323015, + "learning_rate": 1.8506575670878168e-06, + "loss": 0.5844429731369019, + "step": 4616 + }, + { + "epoch": 1.6269603524229075, + "grad_norm": 1.8176797951508414, + "learning_rate": 1.8472822427869597e-06, + "loss": 0.661457359790802, + "step": 4617 + }, + { + "epoch": 1.6273127753303964, + "grad_norm": 1.9714232511915755, + "learning_rate": 1.8439096860800565e-06, + "loss": 0.6944575905799866, + "step": 4618 + }, + { + "epoch": 1.6276651982378856, + "grad_norm": 1.9471855664955058, + "learning_rate": 1.8405398981119927e-06, + "loss": 0.5818712115287781, + "step": 4619 + }, + { + "epoch": 1.6280176211453745, + "grad_norm": 1.8573981084806426, + "learning_rate": 1.8371728800266964e-06, + "loss": 0.6373921632766724, + "step": 4620 + }, + { + "epoch": 1.6283700440528635, + "grad_norm": 1.8455409169726698, + "learning_rate": 1.8338086329671734e-06, + "loss": 0.4629862904548645, + "step": 4621 + }, + { + "epoch": 1.6287224669603524, + "grad_norm": 2.1547215929268306, + "learning_rate": 1.8304471580754779e-06, + "loss": 0.6537790894508362, + "step": 4622 + }, + { + "epoch": 1.6290748898678413, + "grad_norm": 1.9071168587624383, + "learning_rate": 1.8270884564927272e-06, + "loss": 0.527474582195282, + "step": 4623 + }, + { + "epoch": 1.6294273127753303, + "grad_norm": 1.9134019886674338, + "learning_rate": 1.8237325293590934e-06, + "loss": 0.48941463232040405, + "step": 4624 + }, + { + "epoch": 1.6297797356828194, + "grad_norm": 1.7797372995747724, + "learning_rate": 1.8203793778138123e-06, + "loss": 0.6276243925094604, + "step": 4625 + }, + { + "epoch": 1.6301321585903084, + "grad_norm": 2.175835170708709, + "learning_rate": 1.8170290029951708e-06, + "loss": 0.6339844465255737, + "step": 4626 + }, + { + "epoch": 1.6304845814977975, + "grad_norm": 1.8667689453086813, + "learning_rate": 1.813681406040524e-06, + "loss": 0.517188549041748, + "step": 4627 + }, + { + "epoch": 1.6308370044052865, + "grad_norm": 1.8956914399941025, + "learning_rate": 1.8103365880862667e-06, + "loss": 0.576552152633667, + "step": 4628 + }, + { + "epoch": 1.6311894273127754, + "grad_norm": 1.7936413452903872, + "learning_rate": 1.8069945502678688e-06, + "loss": 0.5703557729721069, + "step": 4629 + }, + { + "epoch": 1.6315418502202643, + "grad_norm": 1.9048409586347532, + "learning_rate": 1.8036552937198447e-06, + "loss": 0.538072943687439, + "step": 4630 + }, + { + "epoch": 1.6318942731277533, + "grad_norm": 1.6721149802212347, + "learning_rate": 1.8003188195757693e-06, + "loss": 0.4144761562347412, + "step": 4631 + }, + { + "epoch": 1.6322466960352422, + "grad_norm": 2.056410628146389, + "learning_rate": 1.7969851289682704e-06, + "loss": 0.5357951521873474, + "step": 4632 + }, + { + "epoch": 1.6325991189427311, + "grad_norm": 1.9601913826257962, + "learning_rate": 1.7936542230290333e-06, + "loss": 0.6158766746520996, + "step": 4633 + }, + { + "epoch": 1.6329515418502203, + "grad_norm": 2.018782202231636, + "learning_rate": 1.790326102888794e-06, + "loss": 0.7278525233268738, + "step": 4634 + }, + { + "epoch": 1.6333039647577092, + "grad_norm": 1.8937378067838377, + "learning_rate": 1.787000769677354e-06, + "loss": 0.5113881230354309, + "step": 4635 + }, + { + "epoch": 1.6336563876651984, + "grad_norm": 2.2218997592930987, + "learning_rate": 1.7836782245235485e-06, + "loss": 0.6247432827949524, + "step": 4636 + }, + { + "epoch": 1.6340088105726873, + "grad_norm": 1.9409043558834718, + "learning_rate": 1.7803584685552877e-06, + "loss": 0.513325572013855, + "step": 4637 + }, + { + "epoch": 1.6343612334801763, + "grad_norm": 2.023194297584799, + "learning_rate": 1.7770415028995213e-06, + "loss": 0.4980276823043823, + "step": 4638 + }, + { + "epoch": 1.6347136563876652, + "grad_norm": 1.8669544509684106, + "learning_rate": 1.7737273286822565e-06, + "loss": 0.5832515954971313, + "step": 4639 + }, + { + "epoch": 1.6350660792951541, + "grad_norm": 1.7519671458346908, + "learning_rate": 1.7704159470285532e-06, + "loss": 0.6030116081237793, + "step": 4640 + }, + { + "epoch": 1.635418502202643, + "grad_norm": 2.26980120712081, + "learning_rate": 1.7671073590625188e-06, + "loss": 0.5494866371154785, + "step": 4641 + }, + { + "epoch": 1.635770925110132, + "grad_norm": 1.8803060042220399, + "learning_rate": 1.7638015659073216e-06, + "loss": 0.617791473865509, + "step": 4642 + }, + { + "epoch": 1.6361233480176212, + "grad_norm": 1.8809591920257003, + "learning_rate": 1.760498568685175e-06, + "loss": 0.5213589668273926, + "step": 4643 + }, + { + "epoch": 1.63647577092511, + "grad_norm": 1.7835752431606857, + "learning_rate": 1.7571983685173367e-06, + "loss": 0.5114192962646484, + "step": 4644 + }, + { + "epoch": 1.6368281938325993, + "grad_norm": 1.8264916856765907, + "learning_rate": 1.7539009665241291e-06, + "loss": 0.6207156181335449, + "step": 4645 + }, + { + "epoch": 1.6371806167400882, + "grad_norm": 1.7037955383522276, + "learning_rate": 1.750606363824915e-06, + "loss": 0.5893350839614868, + "step": 4646 + }, + { + "epoch": 1.6375330396475771, + "grad_norm": 2.0239756750398077, + "learning_rate": 1.7473145615381092e-06, + "loss": 0.6453898549079895, + "step": 4647 + }, + { + "epoch": 1.637885462555066, + "grad_norm": 1.623565893456343, + "learning_rate": 1.7440255607811773e-06, + "loss": 0.5098680853843689, + "step": 4648 + }, + { + "epoch": 1.638237885462555, + "grad_norm": 1.9009179186379688, + "learning_rate": 1.7407393626706305e-06, + "loss": 0.5841408967971802, + "step": 4649 + }, + { + "epoch": 1.638590308370044, + "grad_norm": 1.8903189372223002, + "learning_rate": 1.7374559683220337e-06, + "loss": 0.5593127012252808, + "step": 4650 + }, + { + "epoch": 1.638942731277533, + "grad_norm": 1.9192509501465884, + "learning_rate": 1.7341753788499983e-06, + "loss": 0.6885190606117249, + "step": 4651 + }, + { + "epoch": 1.639295154185022, + "grad_norm": 2.019948918382337, + "learning_rate": 1.730897595368175e-06, + "loss": 0.6271092891693115, + "step": 4652 + }, + { + "epoch": 1.639647577092511, + "grad_norm": 1.8193728432309102, + "learning_rate": 1.7276226189892763e-06, + "loss": 0.6035536527633667, + "step": 4653 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 1.876741558260643, + "learning_rate": 1.724350450825052e-06, + "loss": 0.49980080127716064, + "step": 4654 + }, + { + "epoch": 1.640352422907489, + "grad_norm": 1.945483701689467, + "learning_rate": 1.721081091986303e-06, + "loss": 0.6056489944458008, + "step": 4655 + }, + { + "epoch": 1.640704845814978, + "grad_norm": 1.998934183218588, + "learning_rate": 1.717814543582873e-06, + "loss": 0.5611459016799927, + "step": 4656 + }, + { + "epoch": 1.641057268722467, + "grad_norm": 1.8501618159787931, + "learning_rate": 1.7145508067236515e-06, + "loss": 0.5655262470245361, + "step": 4657 + }, + { + "epoch": 1.6414096916299559, + "grad_norm": 2.17470073262635, + "learning_rate": 1.7112898825165814e-06, + "loss": 0.7793601751327515, + "step": 4658 + }, + { + "epoch": 1.6417621145374448, + "grad_norm": 1.8078904709838137, + "learning_rate": 1.7080317720686434e-06, + "loss": 0.6587018370628357, + "step": 4659 + }, + { + "epoch": 1.642114537444934, + "grad_norm": 2.0052578395520313, + "learning_rate": 1.7047764764858598e-06, + "loss": 0.5546305775642395, + "step": 4660 + }, + { + "epoch": 1.642466960352423, + "grad_norm": 2.2168924782846844, + "learning_rate": 1.7015239968733066e-06, + "loss": 0.6215736865997314, + "step": 4661 + }, + { + "epoch": 1.642819383259912, + "grad_norm": 1.95167913439103, + "learning_rate": 1.6982743343350983e-06, + "loss": 0.5772532224655151, + "step": 4662 + }, + { + "epoch": 1.643171806167401, + "grad_norm": 1.9049742666250684, + "learning_rate": 1.6950274899743947e-06, + "loss": 0.567034900188446, + "step": 4663 + }, + { + "epoch": 1.64352422907489, + "grad_norm": 1.6486603082479945, + "learning_rate": 1.6917834648933985e-06, + "loss": 0.5306716561317444, + "step": 4664 + }, + { + "epoch": 1.6438766519823789, + "grad_norm": 1.923372734442966, + "learning_rate": 1.688542260193351e-06, + "loss": 0.6691634654998779, + "step": 4665 + }, + { + "epoch": 1.6442290748898678, + "grad_norm": 1.9073972200097022, + "learning_rate": 1.6853038769745466e-06, + "loss": 0.6071977615356445, + "step": 4666 + }, + { + "epoch": 1.6445814977973567, + "grad_norm": 3.0113580201176355, + "learning_rate": 1.6820683163363161e-06, + "loss": 0.743544340133667, + "step": 4667 + }, + { + "epoch": 1.6449339207048457, + "grad_norm": 2.2198521832647864, + "learning_rate": 1.6788355793770238e-06, + "loss": 0.5745127201080322, + "step": 4668 + }, + { + "epoch": 1.6452863436123348, + "grad_norm": 2.083730313741091, + "learning_rate": 1.6756056671940902e-06, + "loss": 0.5153336524963379, + "step": 4669 + }, + { + "epoch": 1.6456387665198238, + "grad_norm": 1.888215895134721, + "learning_rate": 1.6723785808839666e-06, + "loss": 0.5780388116836548, + "step": 4670 + }, + { + "epoch": 1.645991189427313, + "grad_norm": 1.975333041709577, + "learning_rate": 1.6691543215421513e-06, + "loss": 0.601921796798706, + "step": 4671 + }, + { + "epoch": 1.6463436123348019, + "grad_norm": 1.8402715148458082, + "learning_rate": 1.6659328902631766e-06, + "loss": 0.6636123657226562, + "step": 4672 + }, + { + "epoch": 1.6466960352422908, + "grad_norm": 1.804292320266694, + "learning_rate": 1.6627142881406188e-06, + "loss": 0.45225393772125244, + "step": 4673 + }, + { + "epoch": 1.6470484581497797, + "grad_norm": 1.862693343451114, + "learning_rate": 1.6594985162670984e-06, + "loss": 0.6406756043434143, + "step": 4674 + }, + { + "epoch": 1.6474008810572687, + "grad_norm": 2.11645792406816, + "learning_rate": 1.6562855757342632e-06, + "loss": 0.6735906600952148, + "step": 4675 + }, + { + "epoch": 1.6477533039647576, + "grad_norm": 1.9503356292211693, + "learning_rate": 1.6530754676328064e-06, + "loss": 0.515188992023468, + "step": 4676 + }, + { + "epoch": 1.6481057268722465, + "grad_norm": 1.721977079638204, + "learning_rate": 1.6498681930524652e-06, + "loss": 0.5976129174232483, + "step": 4677 + }, + { + "epoch": 1.6484581497797357, + "grad_norm": 1.9285425022468947, + "learning_rate": 1.6466637530820074e-06, + "loss": 0.7367427945137024, + "step": 4678 + }, + { + "epoch": 1.6488105726872246, + "grad_norm": 2.073959448612198, + "learning_rate": 1.6434621488092385e-06, + "loss": 0.5173717737197876, + "step": 4679 + }, + { + "epoch": 1.6491629955947138, + "grad_norm": 2.1289983497571745, + "learning_rate": 1.6402633813210056e-06, + "loss": 0.7961066961288452, + "step": 4680 + }, + { + "epoch": 1.6495154185022027, + "grad_norm": 2.1150632325299488, + "learning_rate": 1.637067451703187e-06, + "loss": 0.8271595239639282, + "step": 4681 + }, + { + "epoch": 1.6498678414096917, + "grad_norm": 1.9513356704584446, + "learning_rate": 1.6338743610407103e-06, + "loss": 0.6818888783454895, + "step": 4682 + }, + { + "epoch": 1.6502202643171806, + "grad_norm": 2.182931567425792, + "learning_rate": 1.6306841104175219e-06, + "loss": 0.5168178677558899, + "step": 4683 + }, + { + "epoch": 1.6505726872246695, + "grad_norm": 1.8122401400933128, + "learning_rate": 1.627496700916612e-06, + "loss": 0.5792043209075928, + "step": 4684 + }, + { + "epoch": 1.6509251101321585, + "grad_norm": 1.8346977982265331, + "learning_rate": 1.624312133620013e-06, + "loss": 0.6099069118499756, + "step": 4685 + }, + { + "epoch": 1.6512775330396474, + "grad_norm": 1.7489569966562013, + "learning_rate": 1.6211304096087832e-06, + "loss": 0.4562867283821106, + "step": 4686 + }, + { + "epoch": 1.6516299559471366, + "grad_norm": 1.767208393167573, + "learning_rate": 1.61795152996302e-06, + "loss": 0.48648780584335327, + "step": 4687 + }, + { + "epoch": 1.6519823788546255, + "grad_norm": 1.619888597224146, + "learning_rate": 1.6147754957618533e-06, + "loss": 0.5351820588111877, + "step": 4688 + }, + { + "epoch": 1.6523348017621147, + "grad_norm": 1.5845894367063569, + "learning_rate": 1.6116023080834442e-06, + "loss": 0.4646923542022705, + "step": 4689 + }, + { + "epoch": 1.6526872246696036, + "grad_norm": 2.0533783205545304, + "learning_rate": 1.608431968005001e-06, + "loss": 0.6257984638214111, + "step": 4690 + }, + { + "epoch": 1.6530396475770925, + "grad_norm": 1.6714052981831828, + "learning_rate": 1.605264476602747e-06, + "loss": 0.5646224021911621, + "step": 4691 + }, + { + "epoch": 1.6533920704845815, + "grad_norm": 1.9704920715227376, + "learning_rate": 1.6020998349519457e-06, + "loss": 0.6074661612510681, + "step": 4692 + }, + { + "epoch": 1.6537444933920704, + "grad_norm": 2.119532209280586, + "learning_rate": 1.598938044126901e-06, + "loss": 0.703096866607666, + "step": 4693 + }, + { + "epoch": 1.6540969162995593, + "grad_norm": 2.0281924961410436, + "learning_rate": 1.5957791052009397e-06, + "loss": 0.6677542924880981, + "step": 4694 + }, + { + "epoch": 1.6544493392070485, + "grad_norm": 2.2554606939897313, + "learning_rate": 1.5926230192464232e-06, + "loss": 0.755639910697937, + "step": 4695 + }, + { + "epoch": 1.6548017621145374, + "grad_norm": 2.0543326225263705, + "learning_rate": 1.5894697873347442e-06, + "loss": 0.7008202075958252, + "step": 4696 + }, + { + "epoch": 1.6551541850220264, + "grad_norm": 1.7892378339322623, + "learning_rate": 1.5863194105363244e-06, + "loss": 0.5049681067466736, + "step": 4697 + }, + { + "epoch": 1.6555066079295155, + "grad_norm": 1.8394208195845667, + "learning_rate": 1.583171889920626e-06, + "loss": 0.7415407299995422, + "step": 4698 + }, + { + "epoch": 1.6558590308370045, + "grad_norm": 2.0103355889821404, + "learning_rate": 1.5800272265561256e-06, + "loss": 0.7949470281600952, + "step": 4699 + }, + { + "epoch": 1.6562114537444934, + "grad_norm": 2.2401604191268456, + "learning_rate": 1.5768854215103435e-06, + "loss": 0.5892510414123535, + "step": 4700 + }, + { + "epoch": 1.6565638766519823, + "grad_norm": 2.1732638193025076, + "learning_rate": 1.5737464758498243e-06, + "loss": 0.5357394814491272, + "step": 4701 + }, + { + "epoch": 1.6569162995594713, + "grad_norm": 1.92797804038562, + "learning_rate": 1.5706103906401416e-06, + "loss": 0.6078016757965088, + "step": 4702 + }, + { + "epoch": 1.6572687224669602, + "grad_norm": 2.019695184899454, + "learning_rate": 1.5674771669459e-06, + "loss": 0.5858899354934692, + "step": 4703 + }, + { + "epoch": 1.6576211453744494, + "grad_norm": 1.819706102358174, + "learning_rate": 1.5643468058307287e-06, + "loss": 0.5783329010009766, + "step": 4704 + }, + { + "epoch": 1.6579735682819383, + "grad_norm": 1.8104985438999985, + "learning_rate": 1.561219308357288e-06, + "loss": 0.5412800312042236, + "step": 4705 + }, + { + "epoch": 1.6583259911894275, + "grad_norm": 1.5193820753894371, + "learning_rate": 1.5580946755872727e-06, + "loss": 0.5609365701675415, + "step": 4706 + }, + { + "epoch": 1.6586784140969164, + "grad_norm": 2.2157168701611427, + "learning_rate": 1.554972908581388e-06, + "loss": 0.45193177461624146, + "step": 4707 + }, + { + "epoch": 1.6590308370044053, + "grad_norm": 1.885008861796424, + "learning_rate": 1.5518540083993838e-06, + "loss": 0.6402257680892944, + "step": 4708 + }, + { + "epoch": 1.6593832599118943, + "grad_norm": 1.906792902482494, + "learning_rate": 1.5487379761000276e-06, + "loss": 0.5956071615219116, + "step": 4709 + }, + { + "epoch": 1.6597356828193832, + "grad_norm": 1.5229764109639101, + "learning_rate": 1.5456248127411156e-06, + "loss": 0.5975273847579956, + "step": 4710 + }, + { + "epoch": 1.6600881057268722, + "grad_norm": 2.2860844716103514, + "learning_rate": 1.54251451937947e-06, + "loss": 0.6914929151535034, + "step": 4711 + }, + { + "epoch": 1.660440528634361, + "grad_norm": 2.141875122923791, + "learning_rate": 1.5394070970709384e-06, + "loss": 0.5867592096328735, + "step": 4712 + }, + { + "epoch": 1.6607929515418502, + "grad_norm": 1.96612759503979, + "learning_rate": 1.5363025468703917e-06, + "loss": 0.6448687314987183, + "step": 4713 + }, + { + "epoch": 1.6611453744493392, + "grad_norm": 1.8452223088884994, + "learning_rate": 1.5332008698317348e-06, + "loss": 0.5870485305786133, + "step": 4714 + }, + { + "epoch": 1.6614977973568283, + "grad_norm": 1.9043935409080608, + "learning_rate": 1.5301020670078803e-06, + "loss": 0.6336855888366699, + "step": 4715 + }, + { + "epoch": 1.6618502202643173, + "grad_norm": 1.9247468731228787, + "learning_rate": 1.527006139450784e-06, + "loss": 0.5924787521362305, + "step": 4716 + }, + { + "epoch": 1.6622026431718062, + "grad_norm": 1.5860620334804822, + "learning_rate": 1.523913088211415e-06, + "loss": 0.5817830562591553, + "step": 4717 + }, + { + "epoch": 1.6625550660792952, + "grad_norm": 1.8285246452015176, + "learning_rate": 1.5208229143397657e-06, + "loss": 0.5836390852928162, + "step": 4718 + }, + { + "epoch": 1.662907488986784, + "grad_norm": 1.5094327417455158, + "learning_rate": 1.5177356188848558e-06, + "loss": 0.47110515832901, + "step": 4719 + }, + { + "epoch": 1.663259911894273, + "grad_norm": 2.018838906344594, + "learning_rate": 1.5146512028947225e-06, + "loss": 0.6376635432243347, + "step": 4720 + }, + { + "epoch": 1.663612334801762, + "grad_norm": 1.7847798861513196, + "learning_rate": 1.5115696674164349e-06, + "loss": 0.6399784088134766, + "step": 4721 + }, + { + "epoch": 1.6639647577092511, + "grad_norm": 2.2125247577405127, + "learning_rate": 1.5084910134960773e-06, + "loss": 0.5891954898834229, + "step": 4722 + }, + { + "epoch": 1.66431718061674, + "grad_norm": 1.5827717360956535, + "learning_rate": 1.5054152421787505e-06, + "loss": 0.6358054876327515, + "step": 4723 + }, + { + "epoch": 1.6646696035242292, + "grad_norm": 1.855029533228232, + "learning_rate": 1.5023423545085892e-06, + "loss": 0.5072367191314697, + "step": 4724 + }, + { + "epoch": 1.6650220264317181, + "grad_norm": 1.8866512172900913, + "learning_rate": 1.4992723515287423e-06, + "loss": 0.5549881458282471, + "step": 4725 + }, + { + "epoch": 1.665374449339207, + "grad_norm": 1.5386308243299962, + "learning_rate": 1.4962052342813804e-06, + "loss": 0.4833364188671112, + "step": 4726 + }, + { + "epoch": 1.665726872246696, + "grad_norm": 1.837283227568624, + "learning_rate": 1.4931410038076922e-06, + "loss": 0.6183017492294312, + "step": 4727 + }, + { + "epoch": 1.666079295154185, + "grad_norm": 1.8220970545699078, + "learning_rate": 1.4900796611478885e-06, + "loss": 0.4956335127353668, + "step": 4728 + }, + { + "epoch": 1.666431718061674, + "grad_norm": 2.0221134241832552, + "learning_rate": 1.4870212073412027e-06, + "loss": 0.7345337271690369, + "step": 4729 + }, + { + "epoch": 1.6667841409691628, + "grad_norm": 1.5143426871240313, + "learning_rate": 1.4839656434258864e-06, + "loss": 0.5324833393096924, + "step": 4730 + }, + { + "epoch": 1.667136563876652, + "grad_norm": 1.7373474898452002, + "learning_rate": 1.4809129704392e-06, + "loss": 0.5702322125434875, + "step": 4731 + }, + { + "epoch": 1.667488986784141, + "grad_norm": 1.7374551868496027, + "learning_rate": 1.4778631894174389e-06, + "loss": 0.46188884973526, + "step": 4732 + }, + { + "epoch": 1.66784140969163, + "grad_norm": 1.76911142349316, + "learning_rate": 1.474816301395906e-06, + "loss": 0.5505924224853516, + "step": 4733 + }, + { + "epoch": 1.668193832599119, + "grad_norm": 1.9422422566247162, + "learning_rate": 1.4717723074089251e-06, + "loss": 0.5889710187911987, + "step": 4734 + }, + { + "epoch": 1.668546255506608, + "grad_norm": 2.1059796200005825, + "learning_rate": 1.4687312084898387e-06, + "loss": 0.5794551372528076, + "step": 4735 + }, + { + "epoch": 1.668898678414097, + "grad_norm": 1.8650983467603144, + "learning_rate": 1.4656930056710006e-06, + "loss": 0.567146897315979, + "step": 4736 + }, + { + "epoch": 1.6692511013215858, + "grad_norm": 2.0850787713289067, + "learning_rate": 1.4626576999837938e-06, + "loss": 0.5330451130867004, + "step": 4737 + }, + { + "epoch": 1.6696035242290748, + "grad_norm": 1.868870689701364, + "learning_rate": 1.459625292458604e-06, + "loss": 0.5631227493286133, + "step": 4738 + }, + { + "epoch": 1.669955947136564, + "grad_norm": 1.8773546830623118, + "learning_rate": 1.456595784124839e-06, + "loss": 0.5145374536514282, + "step": 4739 + }, + { + "epoch": 1.6703083700440529, + "grad_norm": 1.925388120075487, + "learning_rate": 1.453569176010927e-06, + "loss": 0.59378582239151, + "step": 4740 + }, + { + "epoch": 1.6706607929515418, + "grad_norm": 2.053494266916917, + "learning_rate": 1.4505454691443043e-06, + "loss": 0.5827980041503906, + "step": 4741 + }, + { + "epoch": 1.671013215859031, + "grad_norm": 1.6369511357690396, + "learning_rate": 1.4475246645514274e-06, + "loss": 0.5270858407020569, + "step": 4742 + }, + { + "epoch": 1.67136563876652, + "grad_norm": 2.2160806515473186, + "learning_rate": 1.4445067632577625e-06, + "loss": 0.4708535373210907, + "step": 4743 + }, + { + "epoch": 1.6717180616740088, + "grad_norm": 1.6498079624073576, + "learning_rate": 1.4414917662877924e-06, + "loss": 0.5505239963531494, + "step": 4744 + }, + { + "epoch": 1.6720704845814978, + "grad_norm": 1.8451834665357323, + "learning_rate": 1.4384796746650221e-06, + "loss": 0.5841302871704102, + "step": 4745 + }, + { + "epoch": 1.6724229074889867, + "grad_norm": 1.8665624096794386, + "learning_rate": 1.4354704894119554e-06, + "loss": 0.627534031867981, + "step": 4746 + }, + { + "epoch": 1.6727753303964756, + "grad_norm": 1.9526983627618448, + "learning_rate": 1.4324642115501165e-06, + "loss": 0.6160094738006592, + "step": 4747 + }, + { + "epoch": 1.6731277533039648, + "grad_norm": 1.6909104461316946, + "learning_rate": 1.4294608421000489e-06, + "loss": 0.5420609712600708, + "step": 4748 + }, + { + "epoch": 1.6734801762114537, + "grad_norm": 1.9597720364889828, + "learning_rate": 1.4264603820813006e-06, + "loss": 0.7729714512825012, + "step": 4749 + }, + { + "epoch": 1.6738325991189429, + "grad_norm": 1.8780592513411432, + "learning_rate": 1.4234628325124345e-06, + "loss": 0.6458747386932373, + "step": 4750 + }, + { + "epoch": 1.6741850220264318, + "grad_norm": 1.717642350217617, + "learning_rate": 1.4204681944110242e-06, + "loss": 0.5250670313835144, + "step": 4751 + }, + { + "epoch": 1.6745374449339208, + "grad_norm": 1.9945004637909651, + "learning_rate": 1.4174764687936548e-06, + "loss": 0.4985695779323578, + "step": 4752 + }, + { + "epoch": 1.6748898678414097, + "grad_norm": 2.1058731387570253, + "learning_rate": 1.4144876566759303e-06, + "loss": 0.6401104927062988, + "step": 4753 + }, + { + "epoch": 1.6752422907488986, + "grad_norm": 1.8242149794974472, + "learning_rate": 1.4115017590724534e-06, + "loss": 0.5991432666778564, + "step": 4754 + }, + { + "epoch": 1.6755947136563876, + "grad_norm": 1.8977892116048576, + "learning_rate": 1.4085187769968433e-06, + "loss": 0.7787071466445923, + "step": 4755 + }, + { + "epoch": 1.6759471365638765, + "grad_norm": 1.8915051082154768, + "learning_rate": 1.4055387114617336e-06, + "loss": 0.6977101564407349, + "step": 4756 + }, + { + "epoch": 1.6762995594713657, + "grad_norm": 1.7182871374874729, + "learning_rate": 1.4025615634787616e-06, + "loss": 0.541371762752533, + "step": 4757 + }, + { + "epoch": 1.6766519823788546, + "grad_norm": 1.8269477268502086, + "learning_rate": 1.3995873340585765e-06, + "loss": 0.5548759698867798, + "step": 4758 + }, + { + "epoch": 1.6770044052863438, + "grad_norm": 1.947083457475871, + "learning_rate": 1.3966160242108373e-06, + "loss": 0.6022241115570068, + "step": 4759 + }, + { + "epoch": 1.6773568281938327, + "grad_norm": 2.1542376603491946, + "learning_rate": 1.3936476349442074e-06, + "loss": 0.4965083599090576, + "step": 4760 + }, + { + "epoch": 1.6777092511013216, + "grad_norm": 2.4193138120349227, + "learning_rate": 1.3906821672663706e-06, + "loss": 0.6712369918823242, + "step": 4761 + }, + { + "epoch": 1.6780616740088106, + "grad_norm": 1.8527401573304754, + "learning_rate": 1.3877196221840038e-06, + "loss": 0.6236127614974976, + "step": 4762 + }, + { + "epoch": 1.6784140969162995, + "grad_norm": 1.9836915293869917, + "learning_rate": 1.3847600007027996e-06, + "loss": 0.7144246101379395, + "step": 4763 + }, + { + "epoch": 1.6787665198237884, + "grad_norm": 1.926900514093349, + "learning_rate": 1.3818033038274602e-06, + "loss": 0.650489091873169, + "step": 4764 + }, + { + "epoch": 1.6791189427312774, + "grad_norm": 1.896615210676468, + "learning_rate": 1.3788495325616912e-06, + "loss": 0.6711791157722473, + "step": 4765 + }, + { + "epoch": 1.6794713656387665, + "grad_norm": 1.6640253715487854, + "learning_rate": 1.375898687908206e-06, + "loss": 0.49629515409469604, + "step": 4766 + }, + { + "epoch": 1.6798237885462555, + "grad_norm": 2.3032521123469727, + "learning_rate": 1.372950770868724e-06, + "loss": 0.5843443870544434, + "step": 4767 + }, + { + "epoch": 1.6801762114537446, + "grad_norm": 1.7269921421841483, + "learning_rate": 1.3700057824439694e-06, + "loss": 0.6201150417327881, + "step": 4768 + }, + { + "epoch": 1.6805286343612336, + "grad_norm": 2.2518096795033746, + "learning_rate": 1.3670637236336815e-06, + "loss": 0.6149473190307617, + "step": 4769 + }, + { + "epoch": 1.6808810572687225, + "grad_norm": 1.8297389667337718, + "learning_rate": 1.3641245954365868e-06, + "loss": 0.476188987493515, + "step": 4770 + }, + { + "epoch": 1.6812334801762114, + "grad_norm": 1.6086104459565809, + "learning_rate": 1.361188398850436e-06, + "loss": 0.4850924015045166, + "step": 4771 + }, + { + "epoch": 1.6815859030837004, + "grad_norm": 2.8146145731538676, + "learning_rate": 1.3582551348719741e-06, + "loss": 0.6008634567260742, + "step": 4772 + }, + { + "epoch": 1.6819383259911893, + "grad_norm": 1.6382847925926618, + "learning_rate": 1.3553248044969525e-06, + "loss": 0.5383377075195312, + "step": 4773 + }, + { + "epoch": 1.6822907488986785, + "grad_norm": 1.966985389102481, + "learning_rate": 1.3523974087201274e-06, + "loss": 0.5711555480957031, + "step": 4774 + }, + { + "epoch": 1.6826431718061674, + "grad_norm": 1.915810750390724, + "learning_rate": 1.3494729485352586e-06, + "loss": 0.5267810821533203, + "step": 4775 + }, + { + "epoch": 1.6829955947136563, + "grad_norm": 1.968063769811982, + "learning_rate": 1.3465514249351076e-06, + "loss": 0.6203084588050842, + "step": 4776 + }, + { + "epoch": 1.6833480176211455, + "grad_norm": 1.604432029465195, + "learning_rate": 1.3436328389114473e-06, + "loss": 0.46672314405441284, + "step": 4777 + }, + { + "epoch": 1.6837004405286344, + "grad_norm": 2.175917964334397, + "learning_rate": 1.3407171914550366e-06, + "loss": 0.6375850439071655, + "step": 4778 + }, + { + "epoch": 1.6840528634361234, + "grad_norm": 1.7467776544405884, + "learning_rate": 1.337804483555656e-06, + "loss": 0.6162304282188416, + "step": 4779 + }, + { + "epoch": 1.6844052863436123, + "grad_norm": 2.0769560048267817, + "learning_rate": 1.3348947162020741e-06, + "loss": 0.7814363241195679, + "step": 4780 + }, + { + "epoch": 1.6847577092511012, + "grad_norm": 1.4969648698838118, + "learning_rate": 1.3319878903820682e-06, + "loss": 0.47330981492996216, + "step": 4781 + }, + { + "epoch": 1.6851101321585902, + "grad_norm": 2.184365435582337, + "learning_rate": 1.3290840070824163e-06, + "loss": 0.759529173374176, + "step": 4782 + }, + { + "epoch": 1.6854625550660793, + "grad_norm": 2.039688209679945, + "learning_rate": 1.326183067288893e-06, + "loss": 0.7727639675140381, + "step": 4783 + }, + { + "epoch": 1.6858149779735683, + "grad_norm": 1.9567066145193837, + "learning_rate": 1.3232850719862789e-06, + "loss": 0.6429058313369751, + "step": 4784 + }, + { + "epoch": 1.6861674008810574, + "grad_norm": 2.394172291442893, + "learning_rate": 1.3203900221583565e-06, + "loss": 0.5895540714263916, + "step": 4785 + }, + { + "epoch": 1.6865198237885464, + "grad_norm": 1.835232130498821, + "learning_rate": 1.317497918787899e-06, + "loss": 0.5410366058349609, + "step": 4786 + }, + { + "epoch": 1.6868722466960353, + "grad_norm": 2.025742052316709, + "learning_rate": 1.3146087628566894e-06, + "loss": 0.5144281387329102, + "step": 4787 + }, + { + "epoch": 1.6872246696035242, + "grad_norm": 2.3397936718705967, + "learning_rate": 1.311722555345506e-06, + "loss": 0.7025437355041504, + "step": 4788 + }, + { + "epoch": 1.6875770925110132, + "grad_norm": 1.7878267984100036, + "learning_rate": 1.3088392972341256e-06, + "loss": 0.6000303030014038, + "step": 4789 + }, + { + "epoch": 1.6879295154185021, + "grad_norm": 2.057293191164908, + "learning_rate": 1.3059589895013248e-06, + "loss": 0.5732883214950562, + "step": 4790 + }, + { + "epoch": 1.688281938325991, + "grad_norm": 1.83266529432573, + "learning_rate": 1.3030816331248785e-06, + "loss": 0.5908600091934204, + "step": 4791 + }, + { + "epoch": 1.6886343612334802, + "grad_norm": 1.7317933799112057, + "learning_rate": 1.3002072290815571e-06, + "loss": 0.5579310655593872, + "step": 4792 + }, + { + "epoch": 1.6889867841409691, + "grad_norm": 1.9713002881427846, + "learning_rate": 1.2973357783471385e-06, + "loss": 0.6439195871353149, + "step": 4793 + }, + { + "epoch": 1.6893392070484583, + "grad_norm": 2.1068548457431437, + "learning_rate": 1.2944672818963822e-06, + "loss": 0.6213329434394836, + "step": 4794 + }, + { + "epoch": 1.6896916299559472, + "grad_norm": 2.271454462540911, + "learning_rate": 1.291601740703059e-06, + "loss": 0.5875385999679565, + "step": 4795 + }, + { + "epoch": 1.6900440528634362, + "grad_norm": 1.8170542219049632, + "learning_rate": 1.2887391557399287e-06, + "loss": 0.7071372270584106, + "step": 4796 + }, + { + "epoch": 1.690396475770925, + "grad_norm": 1.9467140791895143, + "learning_rate": 1.2858795279787517e-06, + "loss": 0.504144549369812, + "step": 4797 + }, + { + "epoch": 1.690748898678414, + "grad_norm": 1.9410537548952953, + "learning_rate": 1.2830228583902816e-06, + "loss": 0.7081021070480347, + "step": 4798 + }, + { + "epoch": 1.691101321585903, + "grad_norm": 1.679906270518348, + "learning_rate": 1.2801691479442658e-06, + "loss": 0.5843057632446289, + "step": 4799 + }, + { + "epoch": 1.691453744493392, + "grad_norm": 2.366764710202919, + "learning_rate": 1.2773183976094571e-06, + "loss": 0.6437872052192688, + "step": 4800 + }, + { + "epoch": 1.691806167400881, + "grad_norm": 2.0784896951955125, + "learning_rate": 1.2744706083535907e-06, + "loss": 0.6945379972457886, + "step": 4801 + }, + { + "epoch": 1.69215859030837, + "grad_norm": 1.5919193491775196, + "learning_rate": 1.2716257811434019e-06, + "loss": 0.5827867984771729, + "step": 4802 + }, + { + "epoch": 1.6925110132158592, + "grad_norm": 2.6733992431993228, + "learning_rate": 1.2687839169446259e-06, + "loss": 0.5298784971237183, + "step": 4803 + }, + { + "epoch": 1.692863436123348, + "grad_norm": 2.069188268688187, + "learning_rate": 1.2659450167219834e-06, + "loss": 0.6885675191879272, + "step": 4804 + }, + { + "epoch": 1.693215859030837, + "grad_norm": 1.8639055588125417, + "learning_rate": 1.2631090814391945e-06, + "loss": 0.6902546882629395, + "step": 4805 + }, + { + "epoch": 1.693568281938326, + "grad_norm": 1.7529564321244235, + "learning_rate": 1.2602761120589713e-06, + "loss": 0.5972022414207458, + "step": 4806 + }, + { + "epoch": 1.693920704845815, + "grad_norm": 1.676709469876984, + "learning_rate": 1.2574461095430145e-06, + "loss": 0.5527150630950928, + "step": 4807 + }, + { + "epoch": 1.6942731277533039, + "grad_norm": 1.6073136228470446, + "learning_rate": 1.2546190748520294e-06, + "loss": 0.5898724794387817, + "step": 4808 + }, + { + "epoch": 1.6946255506607928, + "grad_norm": 1.7947012597219074, + "learning_rate": 1.2517950089457e-06, + "loss": 0.7023364901542664, + "step": 4809 + }, + { + "epoch": 1.694977973568282, + "grad_norm": 1.6620582383673839, + "learning_rate": 1.2489739127827083e-06, + "loss": 0.6016935706138611, + "step": 4810 + }, + { + "epoch": 1.6953303964757709, + "grad_norm": 1.8540813823422424, + "learning_rate": 1.246155787320733e-06, + "loss": 0.5724658966064453, + "step": 4811 + }, + { + "epoch": 1.69568281938326, + "grad_norm": 1.799661687922518, + "learning_rate": 1.2433406335164389e-06, + "loss": 0.5886126160621643, + "step": 4812 + }, + { + "epoch": 1.696035242290749, + "grad_norm": 1.7467523658249466, + "learning_rate": 1.2405284523254823e-06, + "loss": 0.6656844615936279, + "step": 4813 + }, + { + "epoch": 1.696387665198238, + "grad_norm": 1.9261091270854245, + "learning_rate": 1.237719244702511e-06, + "loss": 0.6208533644676208, + "step": 4814 + }, + { + "epoch": 1.6967400881057269, + "grad_norm": 2.00988865324314, + "learning_rate": 1.234913011601162e-06, + "loss": 0.6895248889923096, + "step": 4815 + }, + { + "epoch": 1.6970925110132158, + "grad_norm": 1.755568469718746, + "learning_rate": 1.23210975397407e-06, + "loss": 0.5525833368301392, + "step": 4816 + }, + { + "epoch": 1.6974449339207047, + "grad_norm": 1.9922303788563613, + "learning_rate": 1.2293094727728471e-06, + "loss": 0.5958225727081299, + "step": 4817 + }, + { + "epoch": 1.6977973568281939, + "grad_norm": 1.8538893137799548, + "learning_rate": 1.226512168948103e-06, + "loss": 0.7570905089378357, + "step": 4818 + }, + { + "epoch": 1.6981497797356828, + "grad_norm": 1.9122257264522353, + "learning_rate": 1.2237178434494379e-06, + "loss": 0.586568295955658, + "step": 4819 + }, + { + "epoch": 1.6985022026431718, + "grad_norm": 2.0787640518450257, + "learning_rate": 1.2209264972254365e-06, + "loss": 0.574797511100769, + "step": 4820 + }, + { + "epoch": 1.698854625550661, + "grad_norm": 1.8487761386635184, + "learning_rate": 1.2181381312236751e-06, + "loss": 0.6277909278869629, + "step": 4821 + }, + { + "epoch": 1.6992070484581498, + "grad_norm": 1.6733284685909569, + "learning_rate": 1.2153527463907155e-06, + "loss": 0.6308181285858154, + "step": 4822 + }, + { + "epoch": 1.6995594713656388, + "grad_norm": 1.493809970809451, + "learning_rate": 1.2125703436721091e-06, + "loss": 0.5076426267623901, + "step": 4823 + }, + { + "epoch": 1.6999118942731277, + "grad_norm": 1.9773484387601714, + "learning_rate": 1.2097909240123994e-06, + "loss": 0.7712167501449585, + "step": 4824 + }, + { + "epoch": 1.7002643171806167, + "grad_norm": 2.0486043522302517, + "learning_rate": 1.2070144883551072e-06, + "loss": 0.5964892506599426, + "step": 4825 + }, + { + "epoch": 1.7006167400881056, + "grad_norm": 2.112366587786807, + "learning_rate": 1.2042410376427472e-06, + "loss": 0.7473628520965576, + "step": 4826 + }, + { + "epoch": 1.7009691629955948, + "grad_norm": 2.17212484756118, + "learning_rate": 1.2014705728168219e-06, + "loss": 0.6897937059402466, + "step": 4827 + }, + { + "epoch": 1.7013215859030837, + "grad_norm": 1.734585395626036, + "learning_rate": 1.198703094817817e-06, + "loss": 0.5955557823181152, + "step": 4828 + }, + { + "epoch": 1.7016740088105728, + "grad_norm": 1.8689895875293312, + "learning_rate": 1.195938604585205e-06, + "loss": 0.7051092386245728, + "step": 4829 + }, + { + "epoch": 1.7020264317180618, + "grad_norm": 3.0972606123503836, + "learning_rate": 1.1931771030574446e-06, + "loss": 0.584572434425354, + "step": 4830 + }, + { + "epoch": 1.7023788546255507, + "grad_norm": 1.9375690474214398, + "learning_rate": 1.1904185911719768e-06, + "loss": 0.5691804885864258, + "step": 4831 + }, + { + "epoch": 1.7027312775330397, + "grad_norm": 2.088088383810667, + "learning_rate": 1.187663069865237e-06, + "loss": 0.5539572834968567, + "step": 4832 + }, + { + "epoch": 1.7030837004405286, + "grad_norm": 2.0666580745541956, + "learning_rate": 1.1849105400726324e-06, + "loss": 0.6229352951049805, + "step": 4833 + }, + { + "epoch": 1.7034361233480175, + "grad_norm": 1.9668453557048677, + "learning_rate": 1.1821610027285613e-06, + "loss": 0.7302919626235962, + "step": 4834 + }, + { + "epoch": 1.7037885462555065, + "grad_norm": 1.6339139133298357, + "learning_rate": 1.1794144587664113e-06, + "loss": 0.6802065372467041, + "step": 4835 + }, + { + "epoch": 1.7041409691629956, + "grad_norm": 1.8504754714684983, + "learning_rate": 1.1766709091185447e-06, + "loss": 0.6448635458946228, + "step": 4836 + }, + { + "epoch": 1.7044933920704846, + "grad_norm": 1.9824663488252348, + "learning_rate": 1.1739303547163138e-06, + "loss": 0.6141834259033203, + "step": 4837 + }, + { + "epoch": 1.7048458149779737, + "grad_norm": 1.8134017158002862, + "learning_rate": 1.1711927964900482e-06, + "loss": 0.5634737014770508, + "step": 4838 + }, + { + "epoch": 1.7051982378854627, + "grad_norm": 1.7474752293775022, + "learning_rate": 1.1684582353690642e-06, + "loss": 0.6786668300628662, + "step": 4839 + }, + { + "epoch": 1.7055506607929516, + "grad_norm": 1.8867560516103576, + "learning_rate": 1.1657266722816663e-06, + "loss": 0.6117962002754211, + "step": 4840 + }, + { + "epoch": 1.7059030837004405, + "grad_norm": 1.8617179395667027, + "learning_rate": 1.1629981081551278e-06, + "loss": 0.6115782260894775, + "step": 4841 + }, + { + "epoch": 1.7062555066079295, + "grad_norm": 1.676805324865966, + "learning_rate": 1.1602725439157114e-06, + "loss": 0.6526266932487488, + "step": 4842 + }, + { + "epoch": 1.7066079295154184, + "grad_norm": 1.8958276581556894, + "learning_rate": 1.1575499804886658e-06, + "loss": 0.5449249148368835, + "step": 4843 + }, + { + "epoch": 1.7069603524229073, + "grad_norm": 1.7291534323462554, + "learning_rate": 1.1548304187982152e-06, + "loss": 0.5793930292129517, + "step": 4844 + }, + { + "epoch": 1.7073127753303965, + "grad_norm": 1.9655642851245159, + "learning_rate": 1.152113859767565e-06, + "loss": 0.5133150815963745, + "step": 4845 + }, + { + "epoch": 1.7076651982378854, + "grad_norm": 2.3523116804029973, + "learning_rate": 1.1494003043189028e-06, + "loss": 0.6771460771560669, + "step": 4846 + }, + { + "epoch": 1.7080176211453746, + "grad_norm": 1.651478684492262, + "learning_rate": 1.1466897533733945e-06, + "loss": 0.5680071115493774, + "step": 4847 + }, + { + "epoch": 1.7083700440528635, + "grad_norm": 1.5749772965105935, + "learning_rate": 1.1439822078511941e-06, + "loss": 0.58878493309021, + "step": 4848 + }, + { + "epoch": 1.7087224669603525, + "grad_norm": 2.108145787297885, + "learning_rate": 1.14127766867142e-06, + "loss": 0.6441792249679565, + "step": 4849 + }, + { + "epoch": 1.7090748898678414, + "grad_norm": 1.98601005576605, + "learning_rate": 1.1385761367521865e-06, + "loss": 0.4910963773727417, + "step": 4850 + }, + { + "epoch": 1.7094273127753303, + "grad_norm": 1.860202988226145, + "learning_rate": 1.1358776130105765e-06, + "loss": 0.5878154635429382, + "step": 4851 + }, + { + "epoch": 1.7097797356828193, + "grad_norm": 1.8203129566458394, + "learning_rate": 1.133182098362654e-06, + "loss": 0.5835394859313965, + "step": 4852 + }, + { + "epoch": 1.7101321585903082, + "grad_norm": 2.552503327854629, + "learning_rate": 1.130489593723465e-06, + "loss": 0.6612577438354492, + "step": 4853 + }, + { + "epoch": 1.7104845814977974, + "grad_norm": 1.6914248252090902, + "learning_rate": 1.1278001000070282e-06, + "loss": 0.5892096757888794, + "step": 4854 + }, + { + "epoch": 1.7108370044052863, + "grad_norm": 1.597330243290686, + "learning_rate": 1.1251136181263433e-06, + "loss": 0.6196271181106567, + "step": 4855 + }, + { + "epoch": 1.7111894273127755, + "grad_norm": 1.7525774926334832, + "learning_rate": 1.122430148993392e-06, + "loss": 0.5365586280822754, + "step": 4856 + }, + { + "epoch": 1.7115418502202644, + "grad_norm": 1.8547440453110882, + "learning_rate": 1.119749693519121e-06, + "loss": 0.6006353497505188, + "step": 4857 + }, + { + "epoch": 1.7118942731277533, + "grad_norm": 1.7445383281871432, + "learning_rate": 1.117072252613467e-06, + "loss": 0.5645362138748169, + "step": 4858 + }, + { + "epoch": 1.7122466960352423, + "grad_norm": 1.928827114023792, + "learning_rate": 1.1143978271853362e-06, + "loss": 0.46408799290657043, + "step": 4859 + }, + { + "epoch": 1.7125991189427312, + "grad_norm": 1.9357772553695842, + "learning_rate": 1.1117264181426134e-06, + "loss": 0.7798513770103455, + "step": 4860 + }, + { + "epoch": 1.7129515418502201, + "grad_norm": 2.234058868113385, + "learning_rate": 1.109058026392158e-06, + "loss": 0.739770770072937, + "step": 4861 + }, + { + "epoch": 1.7133039647577093, + "grad_norm": 1.8311645792398603, + "learning_rate": 1.1063926528398062e-06, + "loss": 0.567306637763977, + "step": 4862 + }, + { + "epoch": 1.7136563876651982, + "grad_norm": 1.8983872649225184, + "learning_rate": 1.1037302983903686e-06, + "loss": 0.5730962753295898, + "step": 4863 + }, + { + "epoch": 1.7140088105726872, + "grad_norm": 2.0428299761075186, + "learning_rate": 1.1010709639476335e-06, + "loss": 0.6311475038528442, + "step": 4864 + }, + { + "epoch": 1.7143612334801763, + "grad_norm": 2.074080543967214, + "learning_rate": 1.098414650414359e-06, + "loss": 0.5867577791213989, + "step": 4865 + }, + { + "epoch": 1.7147136563876653, + "grad_norm": 1.7945849101921227, + "learning_rate": 1.0957613586922844e-06, + "loss": 0.6291393637657166, + "step": 4866 + }, + { + "epoch": 1.7150660792951542, + "grad_norm": 2.221825931925125, + "learning_rate": 1.0931110896821184e-06, + "loss": 0.5811575651168823, + "step": 4867 + }, + { + "epoch": 1.7154185022026431, + "grad_norm": 1.8041589779612486, + "learning_rate": 1.0904638442835459e-06, + "loss": 0.6340835690498352, + "step": 4868 + }, + { + "epoch": 1.715770925110132, + "grad_norm": 2.1324283591729696, + "learning_rate": 1.087819623395222e-06, + "loss": 0.6543419361114502, + "step": 4869 + }, + { + "epoch": 1.716123348017621, + "grad_norm": 1.7815282855404584, + "learning_rate": 1.0851784279147793e-06, + "loss": 0.5669729709625244, + "step": 4870 + }, + { + "epoch": 1.7164757709251102, + "grad_norm": 1.7880383242870224, + "learning_rate": 1.08254025873882e-06, + "loss": 0.5422554612159729, + "step": 4871 + }, + { + "epoch": 1.716828193832599, + "grad_norm": 2.1378220532284646, + "learning_rate": 1.0799051167629215e-06, + "loss": 0.6154215335845947, + "step": 4872 + }, + { + "epoch": 1.7171806167400883, + "grad_norm": 1.6926103915620132, + "learning_rate": 1.0772730028816304e-06, + "loss": 0.6306319236755371, + "step": 4873 + }, + { + "epoch": 1.7175330396475772, + "grad_norm": 1.8857595594505687, + "learning_rate": 1.0746439179884716e-06, + "loss": 0.6301003694534302, + "step": 4874 + }, + { + "epoch": 1.7178854625550661, + "grad_norm": 2.02854056964172, + "learning_rate": 1.0720178629759347e-06, + "loss": 0.5730071067810059, + "step": 4875 + }, + { + "epoch": 1.718237885462555, + "grad_norm": 1.911878436689674, + "learning_rate": 1.0693948387354836e-06, + "loss": 0.5330506563186646, + "step": 4876 + }, + { + "epoch": 1.718590308370044, + "grad_norm": 2.2472032788534033, + "learning_rate": 1.0667748461575544e-06, + "loss": 0.7724611759185791, + "step": 4877 + }, + { + "epoch": 1.718942731277533, + "grad_norm": 1.8072854503281317, + "learning_rate": 1.0641578861315517e-06, + "loss": 0.5415126085281372, + "step": 4878 + }, + { + "epoch": 1.7192951541850219, + "grad_norm": 1.7491717586336433, + "learning_rate": 1.0615439595458554e-06, + "loss": 0.4895828664302826, + "step": 4879 + }, + { + "epoch": 1.719647577092511, + "grad_norm": 2.1761761181791757, + "learning_rate": 1.0589330672878084e-06, + "loss": 0.6049074530601501, + "step": 4880 + }, + { + "epoch": 1.72, + "grad_norm": 1.8120989551683504, + "learning_rate": 1.056325210243726e-06, + "loss": 0.5733378529548645, + "step": 4881 + }, + { + "epoch": 1.7203524229074891, + "grad_norm": 1.842989042937231, + "learning_rate": 1.0537203892989e-06, + "loss": 0.6034674644470215, + "step": 4882 + }, + { + "epoch": 1.720704845814978, + "grad_norm": 1.873470428701205, + "learning_rate": 1.0511186053375833e-06, + "loss": 0.5282200574874878, + "step": 4883 + }, + { + "epoch": 1.721057268722467, + "grad_norm": 1.7879116674889537, + "learning_rate": 1.0485198592430001e-06, + "loss": 0.6331876516342163, + "step": 4884 + }, + { + "epoch": 1.721409691629956, + "grad_norm": 1.9365084560803385, + "learning_rate": 1.045924151897344e-06, + "loss": 0.5194844007492065, + "step": 4885 + }, + { + "epoch": 1.7217621145374449, + "grad_norm": 1.9087945355709668, + "learning_rate": 1.0433314841817755e-06, + "loss": 0.5496135354042053, + "step": 4886 + }, + { + "epoch": 1.7221145374449338, + "grad_norm": 2.009884434911672, + "learning_rate": 1.0407418569764304e-06, + "loss": 0.7871953248977661, + "step": 4887 + }, + { + "epoch": 1.7224669603524227, + "grad_norm": 2.040889138785673, + "learning_rate": 1.0381552711604004e-06, + "loss": 0.7199628353118896, + "step": 4888 + }, + { + "epoch": 1.722819383259912, + "grad_norm": 2.248999014584043, + "learning_rate": 1.0355717276117506e-06, + "loss": 0.5876469612121582, + "step": 4889 + }, + { + "epoch": 1.7231718061674008, + "grad_norm": 2.0349261607011, + "learning_rate": 1.0329912272075181e-06, + "loss": 0.5543426275253296, + "step": 4890 + }, + { + "epoch": 1.72352422907489, + "grad_norm": 1.583533546685778, + "learning_rate": 1.0304137708236994e-06, + "loss": 0.5118540525436401, + "step": 4891 + }, + { + "epoch": 1.723876651982379, + "grad_norm": 1.82424017683773, + "learning_rate": 1.0278393593352631e-06, + "loss": 0.628477931022644, + "step": 4892 + }, + { + "epoch": 1.7242290748898679, + "grad_norm": 1.9109773060364437, + "learning_rate": 1.0252679936161392e-06, + "loss": 0.6119322776794434, + "step": 4893 + }, + { + "epoch": 1.7245814977973568, + "grad_norm": 1.8450217827392812, + "learning_rate": 1.0226996745392259e-06, + "loss": 0.7661763429641724, + "step": 4894 + }, + { + "epoch": 1.7249339207048457, + "grad_norm": 2.1201139928861394, + "learning_rate": 1.0201344029763927e-06, + "loss": 0.6431440114974976, + "step": 4895 + }, + { + "epoch": 1.7252863436123347, + "grad_norm": 2.0387248477928503, + "learning_rate": 1.0175721797984639e-06, + "loss": 0.7295387983322144, + "step": 4896 + }, + { + "epoch": 1.7256387665198238, + "grad_norm": 1.9561833203401287, + "learning_rate": 1.015013005875235e-06, + "loss": 0.58225017786026, + "step": 4897 + }, + { + "epoch": 1.7259911894273128, + "grad_norm": 1.9211243008184207, + "learning_rate": 1.0124568820754689e-06, + "loss": 0.5467473864555359, + "step": 4898 + }, + { + "epoch": 1.7263436123348017, + "grad_norm": 2.2453442964094967, + "learning_rate": 1.00990380926689e-06, + "loss": 0.7637814283370972, + "step": 4899 + }, + { + "epoch": 1.7266960352422909, + "grad_norm": 2.13267606796778, + "learning_rate": 1.0073537883161821e-06, + "loss": 0.5354464650154114, + "step": 4900 + }, + { + "epoch": 1.7270484581497798, + "grad_norm": 1.91187833906973, + "learning_rate": 1.0048068200890037e-06, + "loss": 0.5213606357574463, + "step": 4901 + }, + { + "epoch": 1.7274008810572687, + "grad_norm": 1.8770841550484265, + "learning_rate": 1.0022629054499678e-06, + "loss": 0.6073330640792847, + "step": 4902 + }, + { + "epoch": 1.7277533039647577, + "grad_norm": 2.1663053459498283, + "learning_rate": 9.997220452626587e-07, + "loss": 0.5711998343467712, + "step": 4903 + }, + { + "epoch": 1.7281057268722466, + "grad_norm": 1.8823259072141711, + "learning_rate": 9.971842403896137e-07, + "loss": 0.6824701428413391, + "step": 4904 + }, + { + "epoch": 1.7284581497797356, + "grad_norm": 1.844862593672041, + "learning_rate": 9.9464949169234e-07, + "loss": 0.528059184551239, + "step": 4905 + }, + { + "epoch": 1.7288105726872247, + "grad_norm": 1.7519423160504919, + "learning_rate": 9.92117800031308e-07, + "loss": 0.45617133378982544, + "step": 4906 + }, + { + "epoch": 1.7291629955947136, + "grad_norm": 2.095891000231315, + "learning_rate": 9.895891662659485e-07, + "loss": 0.6186379194259644, + "step": 4907 + }, + { + "epoch": 1.7295154185022028, + "grad_norm": 1.8933361504308706, + "learning_rate": 9.870635912546511e-07, + "loss": 0.622776985168457, + "step": 4908 + }, + { + "epoch": 1.7298678414096917, + "grad_norm": 2.1556634846751073, + "learning_rate": 9.845410758547724e-07, + "loss": 0.6322426199913025, + "step": 4909 + }, + { + "epoch": 1.7302202643171807, + "grad_norm": 1.8637079254212523, + "learning_rate": 9.82021620922624e-07, + "loss": 0.565685510635376, + "step": 4910 + }, + { + "epoch": 1.7305726872246696, + "grad_norm": 1.9032887733300228, + "learning_rate": 9.795052273134908e-07, + "loss": 0.670723557472229, + "step": 4911 + }, + { + "epoch": 1.7309251101321586, + "grad_norm": 1.818317953069921, + "learning_rate": 9.769918958816017e-07, + "loss": 0.627914309501648, + "step": 4912 + }, + { + "epoch": 1.7312775330396475, + "grad_norm": 1.8142433277320784, + "learning_rate": 9.74481627480156e-07, + "loss": 0.613754391670227, + "step": 4913 + }, + { + "epoch": 1.7316299559471364, + "grad_norm": 1.6146673255290158, + "learning_rate": 9.719744229613148e-07, + "loss": 0.7128336429595947, + "step": 4914 + }, + { + "epoch": 1.7319823788546256, + "grad_norm": 2.07516307915708, + "learning_rate": 9.694702831761937e-07, + "loss": 0.692448079586029, + "step": 4915 + }, + { + "epoch": 1.7323348017621145, + "grad_norm": 1.8379288210737326, + "learning_rate": 9.669692089748717e-07, + "loss": 0.5722585916519165, + "step": 4916 + }, + { + "epoch": 1.7326872246696037, + "grad_norm": 2.880722779651987, + "learning_rate": 9.64471201206385e-07, + "loss": 0.5267904996871948, + "step": 4917 + }, + { + "epoch": 1.7330396475770926, + "grad_norm": 1.8098448963152955, + "learning_rate": 9.619762607187277e-07, + "loss": 0.6290950179100037, + "step": 4918 + }, + { + "epoch": 1.7333920704845815, + "grad_norm": 1.6991585212089806, + "learning_rate": 9.594843883588588e-07, + "loss": 0.5137144327163696, + "step": 4919 + }, + { + "epoch": 1.7337444933920705, + "grad_norm": 2.0101083451482067, + "learning_rate": 9.569955849726875e-07, + "loss": 0.6110765337944031, + "step": 4920 + }, + { + "epoch": 1.7340969162995594, + "grad_norm": 1.805820390142787, + "learning_rate": 9.545098514050844e-07, + "loss": 0.5097514390945435, + "step": 4921 + }, + { + "epoch": 1.7344493392070484, + "grad_norm": 1.9204009410934093, + "learning_rate": 9.520271884998822e-07, + "loss": 0.7220968008041382, + "step": 4922 + }, + { + "epoch": 1.7348017621145373, + "grad_norm": 1.816061125504689, + "learning_rate": 9.495475970998669e-07, + "loss": 0.4790550470352173, + "step": 4923 + }, + { + "epoch": 1.7351541850220265, + "grad_norm": 1.8878679441443287, + "learning_rate": 9.470710780467818e-07, + "loss": 0.5440540909767151, + "step": 4924 + }, + { + "epoch": 1.7355066079295154, + "grad_norm": 1.8420075371513611, + "learning_rate": 9.445976321813277e-07, + "loss": 0.6351054310798645, + "step": 4925 + }, + { + "epoch": 1.7358590308370045, + "grad_norm": 1.8685391189030902, + "learning_rate": 9.421272603431619e-07, + "loss": 0.597430944442749, + "step": 4926 + }, + { + "epoch": 1.7362114537444935, + "grad_norm": 1.8993591697635552, + "learning_rate": 9.396599633709013e-07, + "loss": 0.5826110243797302, + "step": 4927 + }, + { + "epoch": 1.7365638766519824, + "grad_norm": 1.9528322527669026, + "learning_rate": 9.371957421021116e-07, + "loss": 0.61531662940979, + "step": 4928 + }, + { + "epoch": 1.7369162995594714, + "grad_norm": 1.7976479809998938, + "learning_rate": 9.347345973733257e-07, + "loss": 0.5286549925804138, + "step": 4929 + }, + { + "epoch": 1.7372687224669603, + "grad_norm": 2.051327926584316, + "learning_rate": 9.322765300200209e-07, + "loss": 0.6923980712890625, + "step": 4930 + }, + { + "epoch": 1.7376211453744492, + "grad_norm": 1.8765754964403032, + "learning_rate": 9.298215408766376e-07, + "loss": 0.5408697128295898, + "step": 4931 + }, + { + "epoch": 1.7379735682819382, + "grad_norm": 1.9428832757254997, + "learning_rate": 9.273696307765656e-07, + "loss": 0.6360228061676025, + "step": 4932 + }, + { + "epoch": 1.7383259911894273, + "grad_norm": 1.5478222777536266, + "learning_rate": 9.249208005521538e-07, + "loss": 0.46559634804725647, + "step": 4933 + }, + { + "epoch": 1.7386784140969163, + "grad_norm": 2.0814940983294465, + "learning_rate": 9.224750510347036e-07, + "loss": 0.6065478324890137, + "step": 4934 + }, + { + "epoch": 1.7390308370044054, + "grad_norm": 2.197942688439507, + "learning_rate": 9.2003238305447e-07, + "loss": 0.6777745485305786, + "step": 4935 + }, + { + "epoch": 1.7393832599118944, + "grad_norm": 1.764242470379209, + "learning_rate": 9.175927974406607e-07, + "loss": 0.568982720375061, + "step": 4936 + }, + { + "epoch": 1.7397356828193833, + "grad_norm": 1.9082270198240563, + "learning_rate": 9.151562950214443e-07, + "loss": 0.6014461517333984, + "step": 4937 + }, + { + "epoch": 1.7400881057268722, + "grad_norm": 1.9463215063568118, + "learning_rate": 9.127228766239349e-07, + "loss": 0.6312133073806763, + "step": 4938 + }, + { + "epoch": 1.7404405286343612, + "grad_norm": 1.9066118382891128, + "learning_rate": 9.102925430742015e-07, + "loss": 0.5440298318862915, + "step": 4939 + }, + { + "epoch": 1.74079295154185, + "grad_norm": 1.9115402376997355, + "learning_rate": 9.078652951972688e-07, + "loss": 0.6599005460739136, + "step": 4940 + }, + { + "epoch": 1.7411453744493393, + "grad_norm": 1.8987879122247575, + "learning_rate": 9.054411338171099e-07, + "loss": 0.6719228625297546, + "step": 4941 + }, + { + "epoch": 1.7414977973568282, + "grad_norm": 1.7692389966879711, + "learning_rate": 9.030200597566529e-07, + "loss": 0.5771356821060181, + "step": 4942 + }, + { + "epoch": 1.7418502202643171, + "grad_norm": 2.0029197465912936, + "learning_rate": 9.006020738377764e-07, + "loss": 0.5066591501235962, + "step": 4943 + }, + { + "epoch": 1.7422026431718063, + "grad_norm": 1.754361693598564, + "learning_rate": 8.981871768813111e-07, + "loss": 0.5091663002967834, + "step": 4944 + }, + { + "epoch": 1.7425550660792952, + "grad_norm": 1.9092674317256029, + "learning_rate": 8.957753697070415e-07, + "loss": 0.6594514846801758, + "step": 4945 + }, + { + "epoch": 1.7429074889867842, + "grad_norm": 1.8033652679865708, + "learning_rate": 8.933666531337004e-07, + "loss": 0.5485379695892334, + "step": 4946 + }, + { + "epoch": 1.743259911894273, + "grad_norm": 2.2602019905537913, + "learning_rate": 8.909610279789716e-07, + "loss": 0.6079416871070862, + "step": 4947 + }, + { + "epoch": 1.743612334801762, + "grad_norm": 1.8415960205262154, + "learning_rate": 8.885584950594894e-07, + "loss": 0.4980606436729431, + "step": 4948 + }, + { + "epoch": 1.743964757709251, + "grad_norm": 1.5880176897451332, + "learning_rate": 8.861590551908405e-07, + "loss": 0.47701022028923035, + "step": 4949 + }, + { + "epoch": 1.7443171806167401, + "grad_norm": 1.7223149872435417, + "learning_rate": 8.837627091875578e-07, + "loss": 0.5041281580924988, + "step": 4950 + }, + { + "epoch": 1.744669603524229, + "grad_norm": 1.9666236461253934, + "learning_rate": 8.813694578631283e-07, + "loss": 0.5477255582809448, + "step": 4951 + }, + { + "epoch": 1.7450220264317182, + "grad_norm": 1.883766477051188, + "learning_rate": 8.78979302029983e-07, + "loss": 0.6377973556518555, + "step": 4952 + }, + { + "epoch": 1.7453744493392072, + "grad_norm": 1.940207867324299, + "learning_rate": 8.76592242499511e-07, + "loss": 0.6688166856765747, + "step": 4953 + }, + { + "epoch": 1.745726872246696, + "grad_norm": 2.0031898505950907, + "learning_rate": 8.742082800820406e-07, + "loss": 0.6236848831176758, + "step": 4954 + }, + { + "epoch": 1.746079295154185, + "grad_norm": 1.7582600318717108, + "learning_rate": 8.718274155868545e-07, + "loss": 0.653768002986908, + "step": 4955 + }, + { + "epoch": 1.746431718061674, + "grad_norm": 1.844534933556578, + "learning_rate": 8.694496498221805e-07, + "loss": 0.5647604465484619, + "step": 4956 + }, + { + "epoch": 1.746784140969163, + "grad_norm": 1.781932697931349, + "learning_rate": 8.670749835951964e-07, + "loss": 0.4960663914680481, + "step": 4957 + }, + { + "epoch": 1.7471365638766518, + "grad_norm": 1.6873484879529697, + "learning_rate": 8.647034177120317e-07, + "loss": 0.6271536350250244, + "step": 4958 + }, + { + "epoch": 1.747488986784141, + "grad_norm": 2.0059254125224757, + "learning_rate": 8.623349529777525e-07, + "loss": 0.6323459148406982, + "step": 4959 + }, + { + "epoch": 1.74784140969163, + "grad_norm": 1.9564636362517054, + "learning_rate": 8.599695901963811e-07, + "loss": 0.6084197163581848, + "step": 4960 + }, + { + "epoch": 1.748193832599119, + "grad_norm": 1.8913653459936526, + "learning_rate": 8.576073301708876e-07, + "loss": 0.48974379897117615, + "step": 4961 + }, + { + "epoch": 1.748546255506608, + "grad_norm": 1.8735173678444992, + "learning_rate": 8.552481737031859e-07, + "loss": 0.5985081195831299, + "step": 4962 + }, + { + "epoch": 1.748898678414097, + "grad_norm": 1.6360789306706147, + "learning_rate": 8.528921215941299e-07, + "loss": 0.507872998714447, + "step": 4963 + }, + { + "epoch": 1.749251101321586, + "grad_norm": 1.5251403239052872, + "learning_rate": 8.50539174643531e-07, + "loss": 0.5772356986999512, + "step": 4964 + }, + { + "epoch": 1.7496035242290748, + "grad_norm": 2.222117569410965, + "learning_rate": 8.48189333650139e-07, + "loss": 0.675100564956665, + "step": 4965 + }, + { + "epoch": 1.7499559471365638, + "grad_norm": 1.9356078104678653, + "learning_rate": 8.458425994116582e-07, + "loss": 0.5571645498275757, + "step": 4966 + }, + { + "epoch": 1.7503083700440527, + "grad_norm": 1.807660183683072, + "learning_rate": 8.434989727247233e-07, + "loss": 0.5842185020446777, + "step": 4967 + }, + { + "epoch": 1.7506607929515419, + "grad_norm": 1.7960899956397995, + "learning_rate": 8.41158454384925e-07, + "loss": 0.5693016648292542, + "step": 4968 + }, + { + "epoch": 1.7510132158590308, + "grad_norm": 1.808037504366546, + "learning_rate": 8.388210451868006e-07, + "loss": 0.5791449546813965, + "step": 4969 + }, + { + "epoch": 1.75136563876652, + "grad_norm": 2.1439820497437516, + "learning_rate": 8.364867459238257e-07, + "loss": 0.4873960018157959, + "step": 4970 + }, + { + "epoch": 1.751718061674009, + "grad_norm": 1.6712365329059415, + "learning_rate": 8.341555573884175e-07, + "loss": 0.609403669834137, + "step": 4971 + }, + { + "epoch": 1.7520704845814978, + "grad_norm": 2.0664225342752327, + "learning_rate": 8.318274803719483e-07, + "loss": 0.5676242113113403, + "step": 4972 + }, + { + "epoch": 1.7524229074889868, + "grad_norm": 2.2550971825464026, + "learning_rate": 8.29502515664723e-07, + "loss": 0.7692728638648987, + "step": 4973 + }, + { + "epoch": 1.7527753303964757, + "grad_norm": 2.318073308236361, + "learning_rate": 8.27180664056001e-07, + "loss": 0.7940253019332886, + "step": 4974 + }, + { + "epoch": 1.7531277533039646, + "grad_norm": 2.021077548315, + "learning_rate": 8.24861926333973e-07, + "loss": 0.5784735083580017, + "step": 4975 + }, + { + "epoch": 1.7534801762114536, + "grad_norm": 2.106016882372918, + "learning_rate": 8.225463032857783e-07, + "loss": 0.6493539810180664, + "step": 4976 + }, + { + "epoch": 1.7538325991189427, + "grad_norm": 1.6893816606485224, + "learning_rate": 8.202337956975026e-07, + "loss": 0.615519106388092, + "step": 4977 + }, + { + "epoch": 1.7541850220264317, + "grad_norm": 2.4337358559529587, + "learning_rate": 8.179244043541678e-07, + "loss": 0.5369104146957397, + "step": 4978 + }, + { + "epoch": 1.7545374449339208, + "grad_norm": 1.8845170170566812, + "learning_rate": 8.156181300397414e-07, + "loss": 0.5527158975601196, + "step": 4979 + }, + { + "epoch": 1.7548898678414098, + "grad_norm": 2.1597753145956786, + "learning_rate": 8.133149735371316e-07, + "loss": 0.5870147943496704, + "step": 4980 + }, + { + "epoch": 1.7552422907488987, + "grad_norm": 2.0333589118991497, + "learning_rate": 8.110149356281848e-07, + "loss": 0.7235025763511658, + "step": 4981 + }, + { + "epoch": 1.7555947136563876, + "grad_norm": 1.9283097758260628, + "learning_rate": 8.087180170937004e-07, + "loss": 0.5630521774291992, + "step": 4982 + }, + { + "epoch": 1.7559471365638766, + "grad_norm": 2.015740627515862, + "learning_rate": 8.06424218713403e-07, + "loss": 0.5005021691322327, + "step": 4983 + }, + { + "epoch": 1.7562995594713655, + "grad_norm": 2.0683486617790066, + "learning_rate": 8.041335412659679e-07, + "loss": 0.7267229557037354, + "step": 4984 + }, + { + "epoch": 1.7566519823788547, + "grad_norm": 2.2397406108409834, + "learning_rate": 8.018459855290107e-07, + "loss": 0.6494802236557007, + "step": 4985 + }, + { + "epoch": 1.7570044052863436, + "grad_norm": 1.8012009390187627, + "learning_rate": 7.995615522790845e-07, + "loss": 0.5637267827987671, + "step": 4986 + }, + { + "epoch": 1.7573568281938328, + "grad_norm": 1.807872858711751, + "learning_rate": 7.972802422916826e-07, + "loss": 0.5143958330154419, + "step": 4987 + }, + { + "epoch": 1.7577092511013217, + "grad_norm": 1.7925007157989583, + "learning_rate": 7.950020563412398e-07, + "loss": 0.607841968536377, + "step": 4988 + }, + { + "epoch": 1.7580616740088106, + "grad_norm": 1.9011698158798267, + "learning_rate": 7.927269952011285e-07, + "loss": 0.6066895723342896, + "step": 4989 + }, + { + "epoch": 1.7584140969162996, + "grad_norm": 2.293924542695718, + "learning_rate": 7.904550596436611e-07, + "loss": 0.6686232686042786, + "step": 4990 + }, + { + "epoch": 1.7587665198237885, + "grad_norm": 1.7540251789370713, + "learning_rate": 7.881862504400884e-07, + "loss": 0.589708685874939, + "step": 4991 + }, + { + "epoch": 1.7591189427312774, + "grad_norm": 1.9346002211307631, + "learning_rate": 7.859205683606008e-07, + "loss": 0.7008450031280518, + "step": 4992 + }, + { + "epoch": 1.7594713656387664, + "grad_norm": 1.5488386957340947, + "learning_rate": 7.836580141743289e-07, + "loss": 0.5754648447036743, + "step": 4993 + }, + { + "epoch": 1.7598237885462555, + "grad_norm": 1.8204543329281522, + "learning_rate": 7.81398588649338e-07, + "loss": 0.5756049156188965, + "step": 4994 + }, + { + "epoch": 1.7601762114537445, + "grad_norm": 1.8754803653843481, + "learning_rate": 7.791422925526326e-07, + "loss": 0.6143715381622314, + "step": 4995 + }, + { + "epoch": 1.7605286343612336, + "grad_norm": 1.9795958910244131, + "learning_rate": 7.768891266501544e-07, + "loss": 0.700069010257721, + "step": 4996 + }, + { + "epoch": 1.7608810572687226, + "grad_norm": 1.8030282940418303, + "learning_rate": 7.746390917067847e-07, + "loss": 0.5200002193450928, + "step": 4997 + }, + { + "epoch": 1.7612334801762115, + "grad_norm": 2.0811179040330483, + "learning_rate": 7.723921884863395e-07, + "loss": 0.6963525414466858, + "step": 4998 + }, + { + "epoch": 1.7615859030837004, + "grad_norm": 1.9255908471526815, + "learning_rate": 7.701484177515717e-07, + "loss": 0.6329556703567505, + "step": 4999 + }, + { + "epoch": 1.7619383259911894, + "grad_norm": 2.0796773022688213, + "learning_rate": 7.67907780264171e-07, + "loss": 0.6980677247047424, + "step": 5000 + }, + { + "epoch": 1.7622907488986783, + "grad_norm": 1.95091452058077, + "learning_rate": 7.656702767847679e-07, + "loss": 0.5244314670562744, + "step": 5001 + }, + { + "epoch": 1.7626431718061673, + "grad_norm": 1.937585844549177, + "learning_rate": 7.634359080729215e-07, + "loss": 0.6679523587226868, + "step": 5002 + }, + { + "epoch": 1.7629955947136564, + "grad_norm": 1.7698344536731299, + "learning_rate": 7.612046748871327e-07, + "loss": 0.6168316602706909, + "step": 5003 + }, + { + "epoch": 1.7633480176211453, + "grad_norm": 1.8295319189191592, + "learning_rate": 7.589765779848346e-07, + "loss": 0.5892738699913025, + "step": 5004 + }, + { + "epoch": 1.7637004405286345, + "grad_norm": 1.8270406797726577, + "learning_rate": 7.567516181223966e-07, + "loss": 0.6714082956314087, + "step": 5005 + }, + { + "epoch": 1.7640528634361234, + "grad_norm": 1.7798086214061835, + "learning_rate": 7.545297960551245e-07, + "loss": 0.6327016353607178, + "step": 5006 + }, + { + "epoch": 1.7644052863436124, + "grad_norm": 1.8272907155681217, + "learning_rate": 7.52311112537254e-07, + "loss": 0.5114126205444336, + "step": 5007 + }, + { + "epoch": 1.7647577092511013, + "grad_norm": 1.9198067827489789, + "learning_rate": 7.500955683219646e-07, + "loss": 0.5701695084571838, + "step": 5008 + }, + { + "epoch": 1.7651101321585903, + "grad_norm": 1.7304483866926885, + "learning_rate": 7.478831641613616e-07, + "loss": 0.5966283082962036, + "step": 5009 + }, + { + "epoch": 1.7654625550660792, + "grad_norm": 1.7690414353003558, + "learning_rate": 7.456739008064883e-07, + "loss": 0.6219101548194885, + "step": 5010 + }, + { + "epoch": 1.7658149779735681, + "grad_norm": 2.1971226449232804, + "learning_rate": 7.434677790073197e-07, + "loss": 0.6516324877738953, + "step": 5011 + }, + { + "epoch": 1.7661674008810573, + "grad_norm": 2.0945250680543395, + "learning_rate": 7.412647995127664e-07, + "loss": 0.4623621106147766, + "step": 5012 + }, + { + "epoch": 1.7665198237885462, + "grad_norm": 1.7568345992089816, + "learning_rate": 7.390649630706703e-07, + "loss": 0.5661109685897827, + "step": 5013 + }, + { + "epoch": 1.7668722466960354, + "grad_norm": 2.0070117088967154, + "learning_rate": 7.368682704278096e-07, + "loss": 0.47063148021698, + "step": 5014 + }, + { + "epoch": 1.7672246696035243, + "grad_norm": 1.636187219475051, + "learning_rate": 7.346747223298889e-07, + "loss": 0.5684597492218018, + "step": 5015 + }, + { + "epoch": 1.7675770925110132, + "grad_norm": 1.872749765270047, + "learning_rate": 7.324843195215548e-07, + "loss": 0.5614477396011353, + "step": 5016 + }, + { + "epoch": 1.7679295154185022, + "grad_norm": 1.9944667195924293, + "learning_rate": 7.302970627463779e-07, + "loss": 0.508664608001709, + "step": 5017 + }, + { + "epoch": 1.7682819383259911, + "grad_norm": 1.9918093815103546, + "learning_rate": 7.281129527468645e-07, + "loss": 0.5348209142684937, + "step": 5018 + }, + { + "epoch": 1.76863436123348, + "grad_norm": 2.2774118234615695, + "learning_rate": 7.259319902644513e-07, + "loss": 0.6441121101379395, + "step": 5019 + }, + { + "epoch": 1.7689867841409692, + "grad_norm": 1.7776640162425583, + "learning_rate": 7.237541760395083e-07, + "loss": 0.6454842686653137, + "step": 5020 + }, + { + "epoch": 1.7693392070484582, + "grad_norm": 1.818033997112941, + "learning_rate": 7.215795108113343e-07, + "loss": 0.4822286367416382, + "step": 5021 + }, + { + "epoch": 1.769691629955947, + "grad_norm": 2.2519074742911775, + "learning_rate": 7.19407995318162e-07, + "loss": 0.6078327894210815, + "step": 5022 + }, + { + "epoch": 1.7700440528634362, + "grad_norm": 1.9964867958416748, + "learning_rate": 7.172396302971507e-07, + "loss": 0.6394459009170532, + "step": 5023 + }, + { + "epoch": 1.7703964757709252, + "grad_norm": 1.919321953608054, + "learning_rate": 7.150744164843959e-07, + "loss": 0.646416425704956, + "step": 5024 + }, + { + "epoch": 1.7707488986784141, + "grad_norm": 1.743918601710363, + "learning_rate": 7.129123546149208e-07, + "loss": 0.6265356540679932, + "step": 5025 + }, + { + "epoch": 1.771101321585903, + "grad_norm": 1.717725969603381, + "learning_rate": 7.107534454226728e-07, + "loss": 0.5074717998504639, + "step": 5026 + }, + { + "epoch": 1.771453744493392, + "grad_norm": 1.9181838757933405, + "learning_rate": 7.0859768964054e-07, + "loss": 0.7036402821540833, + "step": 5027 + }, + { + "epoch": 1.771806167400881, + "grad_norm": 1.7638856276686163, + "learning_rate": 7.064450880003327e-07, + "loss": 0.6098893880844116, + "step": 5028 + }, + { + "epoch": 1.77215859030837, + "grad_norm": 2.005026773406909, + "learning_rate": 7.042956412327917e-07, + "loss": 0.582880973815918, + "step": 5029 + }, + { + "epoch": 1.772511013215859, + "grad_norm": 2.013313109536588, + "learning_rate": 7.021493500675869e-07, + "loss": 0.6003242135047913, + "step": 5030 + }, + { + "epoch": 1.7728634361233482, + "grad_norm": 1.9319887994625418, + "learning_rate": 7.000062152333165e-07, + "loss": 0.4999944865703583, + "step": 5031 + }, + { + "epoch": 1.7732158590308371, + "grad_norm": 1.8450299102376384, + "learning_rate": 6.978662374575107e-07, + "loss": 0.5569149255752563, + "step": 5032 + }, + { + "epoch": 1.773568281938326, + "grad_norm": 1.9277460192299252, + "learning_rate": 6.957294174666263e-07, + "loss": 0.5600287914276123, + "step": 5033 + }, + { + "epoch": 1.773920704845815, + "grad_norm": 1.8890013971887576, + "learning_rate": 6.935957559860418e-07, + "loss": 0.5412951707839966, + "step": 5034 + }, + { + "epoch": 1.774273127753304, + "grad_norm": 1.7378105888388657, + "learning_rate": 6.914652537400735e-07, + "loss": 0.5881151556968689, + "step": 5035 + }, + { + "epoch": 1.7746255506607929, + "grad_norm": 1.8829243382985155, + "learning_rate": 6.893379114519572e-07, + "loss": 0.5975406169891357, + "step": 5036 + }, + { + "epoch": 1.7749779735682818, + "grad_norm": 1.7883517993987919, + "learning_rate": 6.872137298438653e-07, + "loss": 0.6266802549362183, + "step": 5037 + }, + { + "epoch": 1.775330396475771, + "grad_norm": 2.279148556628154, + "learning_rate": 6.850927096368854e-07, + "loss": 0.6825709939002991, + "step": 5038 + }, + { + "epoch": 1.77568281938326, + "grad_norm": 1.6068572613194736, + "learning_rate": 6.829748515510381e-07, + "loss": 0.6035742163658142, + "step": 5039 + }, + { + "epoch": 1.776035242290749, + "grad_norm": 1.901514453732062, + "learning_rate": 6.808601563052742e-07, + "loss": 0.6665611267089844, + "step": 5040 + }, + { + "epoch": 1.776387665198238, + "grad_norm": 2.334324554300087, + "learning_rate": 6.787486246174657e-07, + "loss": 0.8202367424964905, + "step": 5041 + }, + { + "epoch": 1.776740088105727, + "grad_norm": 1.8080635950130315, + "learning_rate": 6.766402572044084e-07, + "loss": 0.6516656875610352, + "step": 5042 + }, + { + "epoch": 1.7770925110132159, + "grad_norm": 1.6361942373114873, + "learning_rate": 6.745350547818307e-07, + "loss": 0.663591742515564, + "step": 5043 + }, + { + "epoch": 1.7774449339207048, + "grad_norm": 2.0460511379273716, + "learning_rate": 6.724330180643824e-07, + "loss": 0.6025142669677734, + "step": 5044 + }, + { + "epoch": 1.7777973568281937, + "grad_norm": 1.6332878492082579, + "learning_rate": 6.703341477656422e-07, + "loss": 0.5704027414321899, + "step": 5045 + }, + { + "epoch": 1.7781497797356827, + "grad_norm": 2.0053343984683534, + "learning_rate": 6.682384445981071e-07, + "loss": 0.6518473625183105, + "step": 5046 + }, + { + "epoch": 1.7785022026431718, + "grad_norm": 1.6878153153712165, + "learning_rate": 6.661459092732037e-07, + "loss": 0.5547574758529663, + "step": 5047 + }, + { + "epoch": 1.7788546255506608, + "grad_norm": 1.8096814000573205, + "learning_rate": 6.640565425012846e-07, + "loss": 0.6248831748962402, + "step": 5048 + }, + { + "epoch": 1.77920704845815, + "grad_norm": 1.8747085080187502, + "learning_rate": 6.619703449916259e-07, + "loss": 0.5899701118469238, + "step": 5049 + }, + { + "epoch": 1.7795594713656389, + "grad_norm": 1.9253293216058311, + "learning_rate": 6.598873174524223e-07, + "loss": 0.41864174604415894, + "step": 5050 + }, + { + "epoch": 1.7799118942731278, + "grad_norm": 2.2457701854009025, + "learning_rate": 6.578074605908002e-07, + "loss": 0.7473436594009399, + "step": 5051 + }, + { + "epoch": 1.7802643171806167, + "grad_norm": 1.6599111795216646, + "learning_rate": 6.557307751128051e-07, + "loss": 0.49480879306793213, + "step": 5052 + }, + { + "epoch": 1.7806167400881057, + "grad_norm": 1.8257078701065834, + "learning_rate": 6.536572617234082e-07, + "loss": 0.5619323253631592, + "step": 5053 + }, + { + "epoch": 1.7809691629955946, + "grad_norm": 1.8566139978409217, + "learning_rate": 6.515869211265013e-07, + "loss": 0.5271984338760376, + "step": 5054 + }, + { + "epoch": 1.7813215859030835, + "grad_norm": 1.967436768949709, + "learning_rate": 6.495197540248999e-07, + "loss": 0.6544383764266968, + "step": 5055 + }, + { + "epoch": 1.7816740088105727, + "grad_norm": 2.157946298106486, + "learning_rate": 6.474557611203458e-07, + "loss": 0.6525388956069946, + "step": 5056 + }, + { + "epoch": 1.7820264317180616, + "grad_norm": 2.0314482863762735, + "learning_rate": 6.453949431134987e-07, + "loss": 0.5509910583496094, + "step": 5057 + }, + { + "epoch": 1.7823788546255508, + "grad_norm": 1.6067790596532618, + "learning_rate": 6.433373007039412e-07, + "loss": 0.5030776262283325, + "step": 5058 + }, + { + "epoch": 1.7827312775330397, + "grad_norm": 1.875686429811456, + "learning_rate": 6.412828345901811e-07, + "loss": 0.6743696331977844, + "step": 5059 + }, + { + "epoch": 1.7830837004405287, + "grad_norm": 1.9399780429001139, + "learning_rate": 6.392315454696452e-07, + "loss": 0.5395437479019165, + "step": 5060 + }, + { + "epoch": 1.7834361233480176, + "grad_norm": 1.7657846282567238, + "learning_rate": 6.371834340386807e-07, + "loss": 0.5773402452468872, + "step": 5061 + }, + { + "epoch": 1.7837885462555065, + "grad_norm": 1.920136830142019, + "learning_rate": 6.351385009925582e-07, + "loss": 0.6014268398284912, + "step": 5062 + }, + { + "epoch": 1.7841409691629955, + "grad_norm": 1.9465884411051106, + "learning_rate": 6.33096747025469e-07, + "loss": 0.5519139170646667, + "step": 5063 + }, + { + "epoch": 1.7844933920704846, + "grad_norm": 3.0085962631929752, + "learning_rate": 6.310581728305254e-07, + "loss": 0.5407502055168152, + "step": 5064 + }, + { + "epoch": 1.7848458149779736, + "grad_norm": 1.5371833099084395, + "learning_rate": 6.290227790997605e-07, + "loss": 0.61688232421875, + "step": 5065 + }, + { + "epoch": 1.7851982378854625, + "grad_norm": 2.002396471657761, + "learning_rate": 6.269905665241271e-07, + "loss": 0.5212849974632263, + "step": 5066 + }, + { + "epoch": 1.7855506607929517, + "grad_norm": 1.7684490871986807, + "learning_rate": 6.249615357934968e-07, + "loss": 0.6827710866928101, + "step": 5067 + }, + { + "epoch": 1.7859030837004406, + "grad_norm": 2.016669351586175, + "learning_rate": 6.22935687596663e-07, + "loss": 0.6907633543014526, + "step": 5068 + }, + { + "epoch": 1.7862555066079295, + "grad_norm": 2.045834595721204, + "learning_rate": 6.209130226213378e-07, + "loss": 0.5707769989967346, + "step": 5069 + }, + { + "epoch": 1.7866079295154185, + "grad_norm": 1.9432188628486171, + "learning_rate": 6.188935415541541e-07, + "loss": 0.6062690019607544, + "step": 5070 + }, + { + "epoch": 1.7869603524229074, + "grad_norm": 1.8744219034756735, + "learning_rate": 6.168772450806604e-07, + "loss": 0.5291163921356201, + "step": 5071 + }, + { + "epoch": 1.7873127753303963, + "grad_norm": 1.8892054954511246, + "learning_rate": 6.148641338853301e-07, + "loss": 0.6324198246002197, + "step": 5072 + }, + { + "epoch": 1.7876651982378855, + "grad_norm": 1.7030219876612867, + "learning_rate": 6.128542086515499e-07, + "loss": 0.5516111850738525, + "step": 5073 + }, + { + "epoch": 1.7880176211453744, + "grad_norm": 2.1800478368143232, + "learning_rate": 6.108474700616263e-07, + "loss": 0.6384079456329346, + "step": 5074 + }, + { + "epoch": 1.7883700440528636, + "grad_norm": 1.777234944410244, + "learning_rate": 6.088439187967865e-07, + "loss": 0.5699876546859741, + "step": 5075 + }, + { + "epoch": 1.7887224669603525, + "grad_norm": 2.081274535023766, + "learning_rate": 6.06843555537171e-07, + "loss": 0.6068697571754456, + "step": 5076 + }, + { + "epoch": 1.7890748898678415, + "grad_norm": 2.1233392160842066, + "learning_rate": 6.048463809618444e-07, + "loss": 0.6254304647445679, + "step": 5077 + }, + { + "epoch": 1.7894273127753304, + "grad_norm": 2.0059926594667914, + "learning_rate": 6.02852395748782e-07, + "loss": 0.6779477596282959, + "step": 5078 + }, + { + "epoch": 1.7897797356828193, + "grad_norm": 1.8024145072939486, + "learning_rate": 6.008616005748802e-07, + "loss": 0.6139817833900452, + "step": 5079 + }, + { + "epoch": 1.7901321585903083, + "grad_norm": 2.042935872875493, + "learning_rate": 5.988739961159539e-07, + "loss": 0.553310215473175, + "step": 5080 + }, + { + "epoch": 1.7904845814977972, + "grad_norm": 1.9543566497010472, + "learning_rate": 5.968895830467325e-07, + "loss": 0.6093542575836182, + "step": 5081 + }, + { + "epoch": 1.7908370044052864, + "grad_norm": 1.8231021161772492, + "learning_rate": 5.949083620408614e-07, + "loss": 0.6224432587623596, + "step": 5082 + }, + { + "epoch": 1.7911894273127753, + "grad_norm": 1.881995664144807, + "learning_rate": 5.929303337709047e-07, + "loss": 0.6155597567558289, + "step": 5083 + }, + { + "epoch": 1.7915418502202645, + "grad_norm": 1.7127795559170356, + "learning_rate": 5.909554989083411e-07, + "loss": 0.5742098093032837, + "step": 5084 + }, + { + "epoch": 1.7918942731277534, + "grad_norm": 2.1579790645115886, + "learning_rate": 5.889838581235641e-07, + "loss": 0.7427949905395508, + "step": 5085 + }, + { + "epoch": 1.7922466960352423, + "grad_norm": 1.8686834683482023, + "learning_rate": 5.870154120858851e-07, + "loss": 0.48208528757095337, + "step": 5086 + }, + { + "epoch": 1.7925991189427313, + "grad_norm": 2.103622298674757, + "learning_rate": 5.850501614635318e-07, + "loss": 0.48402148485183716, + "step": 5087 + }, + { + "epoch": 1.7929515418502202, + "grad_norm": 1.9085757415865392, + "learning_rate": 5.83088106923646e-07, + "loss": 0.6808921694755554, + "step": 5088 + }, + { + "epoch": 1.7933039647577091, + "grad_norm": 1.4851842618773352, + "learning_rate": 5.811292491322795e-07, + "loss": 0.48358428478240967, + "step": 5089 + }, + { + "epoch": 1.793656387665198, + "grad_norm": 1.801328000774117, + "learning_rate": 5.791735887544081e-07, + "loss": 0.6492827534675598, + "step": 5090 + }, + { + "epoch": 1.7940088105726872, + "grad_norm": 1.542873674028149, + "learning_rate": 5.772211264539162e-07, + "loss": 0.5453791618347168, + "step": 5091 + }, + { + "epoch": 1.7943612334801762, + "grad_norm": 1.780642500081645, + "learning_rate": 5.75271862893605e-07, + "loss": 0.5901151895523071, + "step": 5092 + }, + { + "epoch": 1.7947136563876653, + "grad_norm": 2.0888993209852664, + "learning_rate": 5.73325798735187e-07, + "loss": 0.616302490234375, + "step": 5093 + }, + { + "epoch": 1.7950660792951543, + "grad_norm": 1.7666548150635142, + "learning_rate": 5.713829346392907e-07, + "loss": 0.616886556148529, + "step": 5094 + }, + { + "epoch": 1.7954185022026432, + "grad_norm": 2.1253066780397725, + "learning_rate": 5.694432712654597e-07, + "loss": 0.5552375316619873, + "step": 5095 + }, + { + "epoch": 1.7957709251101321, + "grad_norm": 1.9305053090727797, + "learning_rate": 5.675068092721491e-07, + "loss": 0.5956143736839294, + "step": 5096 + }, + { + "epoch": 1.796123348017621, + "grad_norm": 2.0198097994194675, + "learning_rate": 5.655735493167247e-07, + "loss": 0.5870288610458374, + "step": 5097 + }, + { + "epoch": 1.79647577092511, + "grad_norm": 1.737470684820577, + "learning_rate": 5.636434920554701e-07, + "loss": 0.5325669646263123, + "step": 5098 + }, + { + "epoch": 1.7968281938325992, + "grad_norm": 1.9881595702868853, + "learning_rate": 5.617166381435813e-07, + "loss": 0.5931425094604492, + "step": 5099 + }, + { + "epoch": 1.797180616740088, + "grad_norm": 1.9607916445612916, + "learning_rate": 5.597929882351627e-07, + "loss": 0.5755603313446045, + "step": 5100 + }, + { + "epoch": 1.797533039647577, + "grad_norm": 2.000480246693455, + "learning_rate": 5.578725429832344e-07, + "loss": 0.5780980587005615, + "step": 5101 + }, + { + "epoch": 1.7978854625550662, + "grad_norm": 1.9982279321373282, + "learning_rate": 5.559553030397258e-07, + "loss": 0.5863890647888184, + "step": 5102 + }, + { + "epoch": 1.7982378854625551, + "grad_norm": 1.8196971349794717, + "learning_rate": 5.540412690554842e-07, + "loss": 0.5577390789985657, + "step": 5103 + }, + { + "epoch": 1.798590308370044, + "grad_norm": 1.773628551628446, + "learning_rate": 5.521304416802642e-07, + "loss": 0.5994857549667358, + "step": 5104 + }, + { + "epoch": 1.798942731277533, + "grad_norm": 1.8364843823531443, + "learning_rate": 5.502228215627281e-07, + "loss": 0.6065348982810974, + "step": 5105 + }, + { + "epoch": 1.799295154185022, + "grad_norm": 1.9447341697044171, + "learning_rate": 5.483184093504568e-07, + "loss": 0.5390498638153076, + "step": 5106 + }, + { + "epoch": 1.7996475770925109, + "grad_norm": 1.9731136151561257, + "learning_rate": 5.464172056899364e-07, + "loss": 0.5826783180236816, + "step": 5107 + }, + { + "epoch": 1.8, + "grad_norm": 1.7733740837200977, + "learning_rate": 5.445192112265718e-07, + "loss": 0.5429874658584595, + "step": 5108 + }, + { + "epoch": 1.800352422907489, + "grad_norm": 1.8521585290179927, + "learning_rate": 5.426244266046676e-07, + "loss": 0.5591466426849365, + "step": 5109 + }, + { + "epoch": 1.8007048458149781, + "grad_norm": 1.6996794293630604, + "learning_rate": 5.407328524674449e-07, + "loss": 0.5351911187171936, + "step": 5110 + }, + { + "epoch": 1.801057268722467, + "grad_norm": 1.9525068150093072, + "learning_rate": 5.388444894570378e-07, + "loss": 0.6095720529556274, + "step": 5111 + }, + { + "epoch": 1.801409691629956, + "grad_norm": 1.9048124225268466, + "learning_rate": 5.369593382144844e-07, + "loss": 0.6278849840164185, + "step": 5112 + }, + { + "epoch": 1.801762114537445, + "grad_norm": 1.932605893192458, + "learning_rate": 5.350773993797332e-07, + "loss": 0.6787056922912598, + "step": 5113 + }, + { + "epoch": 1.8021145374449339, + "grad_norm": 1.7901749162387552, + "learning_rate": 5.331986735916461e-07, + "loss": 0.6054684519767761, + "step": 5114 + }, + { + "epoch": 1.8024669603524228, + "grad_norm": 1.9918768270140568, + "learning_rate": 5.31323161487991e-07, + "loss": 0.5039973855018616, + "step": 5115 + }, + { + "epoch": 1.8028193832599118, + "grad_norm": 2.1203502988203207, + "learning_rate": 5.294508637054474e-07, + "loss": 0.6306504011154175, + "step": 5116 + }, + { + "epoch": 1.803171806167401, + "grad_norm": 1.5433818431075417, + "learning_rate": 5.275817808796013e-07, + "loss": 0.5654761791229248, + "step": 5117 + }, + { + "epoch": 1.8035242290748899, + "grad_norm": 1.84553610812893, + "learning_rate": 5.257159136449452e-07, + "loss": 0.5801905989646912, + "step": 5118 + }, + { + "epoch": 1.803876651982379, + "grad_norm": 1.9190330109285871, + "learning_rate": 5.238532626348891e-07, + "loss": 0.6565619707107544, + "step": 5119 + }, + { + "epoch": 1.804229074889868, + "grad_norm": 2.043183915925982, + "learning_rate": 5.219938284817416e-07, + "loss": 0.5923253297805786, + "step": 5120 + }, + { + "epoch": 1.8045814977973569, + "grad_norm": 2.0522176560055647, + "learning_rate": 5.2013761181672e-07, + "loss": 0.6697949171066284, + "step": 5121 + }, + { + "epoch": 1.8049339207048458, + "grad_norm": 1.5694231089682613, + "learning_rate": 5.182846132699571e-07, + "loss": 0.5146230459213257, + "step": 5122 + }, + { + "epoch": 1.8052863436123348, + "grad_norm": 1.8882278421308176, + "learning_rate": 5.16434833470485e-07, + "loss": 0.5928882360458374, + "step": 5123 + }, + { + "epoch": 1.8056387665198237, + "grad_norm": 1.8209325836560148, + "learning_rate": 5.145882730462481e-07, + "loss": 0.6114771366119385, + "step": 5124 + }, + { + "epoch": 1.8059911894273126, + "grad_norm": 2.0596769025893122, + "learning_rate": 5.127449326240952e-07, + "loss": 0.6624642014503479, + "step": 5125 + }, + { + "epoch": 1.8063436123348018, + "grad_norm": 1.6177669824438379, + "learning_rate": 5.109048128297822e-07, + "loss": 0.6277980208396912, + "step": 5126 + }, + { + "epoch": 1.8066960352422907, + "grad_norm": 1.8432956331440709, + "learning_rate": 5.090679142879751e-07, + "loss": 0.6470246911048889, + "step": 5127 + }, + { + "epoch": 1.8070484581497799, + "grad_norm": 1.9361376318593135, + "learning_rate": 5.072342376222438e-07, + "loss": 0.6418337821960449, + "step": 5128 + }, + { + "epoch": 1.8074008810572688, + "grad_norm": 1.7303831881097942, + "learning_rate": 5.054037834550596e-07, + "loss": 0.6013847589492798, + "step": 5129 + }, + { + "epoch": 1.8077533039647578, + "grad_norm": 2.0870369514809086, + "learning_rate": 5.035765524078095e-07, + "loss": 0.5354605913162231, + "step": 5130 + }, + { + "epoch": 1.8081057268722467, + "grad_norm": 1.7245482885328716, + "learning_rate": 5.01752545100781e-07, + "loss": 0.6017459034919739, + "step": 5131 + }, + { + "epoch": 1.8084581497797356, + "grad_norm": 2.1853671040659335, + "learning_rate": 4.999317621531663e-07, + "loss": 0.5929696559906006, + "step": 5132 + }, + { + "epoch": 1.8088105726872246, + "grad_norm": 2.1106102623060723, + "learning_rate": 4.981142041830645e-07, + "loss": 0.6444251537322998, + "step": 5133 + }, + { + "epoch": 1.8091629955947135, + "grad_norm": 1.9231094224982612, + "learning_rate": 4.962998718074807e-07, + "loss": 0.5854116678237915, + "step": 5134 + }, + { + "epoch": 1.8095154185022027, + "grad_norm": 1.674252446757184, + "learning_rate": 4.944887656423248e-07, + "loss": 0.5145394206047058, + "step": 5135 + }, + { + "epoch": 1.8098678414096916, + "grad_norm": 1.9221197947181823, + "learning_rate": 4.926808863024102e-07, + "loss": 0.5733104348182678, + "step": 5136 + }, + { + "epoch": 1.8102202643171807, + "grad_norm": 1.955048282910108, + "learning_rate": 4.908762344014573e-07, + "loss": 0.5925072431564331, + "step": 5137 + }, + { + "epoch": 1.8105726872246697, + "grad_norm": 1.8754640994406597, + "learning_rate": 4.890748105520859e-07, + "loss": 0.5346912145614624, + "step": 5138 + }, + { + "epoch": 1.8109251101321586, + "grad_norm": 1.636475505756285, + "learning_rate": 4.87276615365827e-07, + "loss": 0.6206755638122559, + "step": 5139 + }, + { + "epoch": 1.8112775330396476, + "grad_norm": 2.0734228349073076, + "learning_rate": 4.854816494531089e-07, + "loss": 0.5998660326004028, + "step": 5140 + }, + { + "epoch": 1.8116299559471365, + "grad_norm": 2.10222956499389, + "learning_rate": 4.836899134232687e-07, + "loss": 0.44545644521713257, + "step": 5141 + }, + { + "epoch": 1.8119823788546254, + "grad_norm": 1.904050289597462, + "learning_rate": 4.81901407884543e-07, + "loss": 0.701204776763916, + "step": 5142 + }, + { + "epoch": 1.8123348017621146, + "grad_norm": 1.8707530799436762, + "learning_rate": 4.801161334440762e-07, + "loss": 0.6103897094726562, + "step": 5143 + }, + { + "epoch": 1.8126872246696035, + "grad_norm": 1.7727850982789193, + "learning_rate": 4.783340907079126e-07, + "loss": 0.5864719152450562, + "step": 5144 + }, + { + "epoch": 1.8130396475770925, + "grad_norm": 1.436946543481978, + "learning_rate": 4.7655528028099916e-07, + "loss": 0.46949082612991333, + "step": 5145 + }, + { + "epoch": 1.8133920704845816, + "grad_norm": 1.9729708472080463, + "learning_rate": 4.7477970276718855e-07, + "loss": 0.6371885538101196, + "step": 5146 + }, + { + "epoch": 1.8137444933920706, + "grad_norm": 2.043577546107911, + "learning_rate": 4.730073587692319e-07, + "loss": 0.6819220781326294, + "step": 5147 + }, + { + "epoch": 1.8140969162995595, + "grad_norm": 1.7501541102560871, + "learning_rate": 4.712382488887868e-07, + "loss": 0.5230735540390015, + "step": 5148 + }, + { + "epoch": 1.8144493392070484, + "grad_norm": 1.6629154647812032, + "learning_rate": 4.6947237372640954e-07, + "loss": 0.5194997787475586, + "step": 5149 + }, + { + "epoch": 1.8148017621145374, + "grad_norm": 2.6396803493511842, + "learning_rate": 4.677097338815595e-07, + "loss": 0.6025055050849915, + "step": 5150 + }, + { + "epoch": 1.8151541850220263, + "grad_norm": 1.9158428969793393, + "learning_rate": 4.6595032995260135e-07, + "loss": 0.649467945098877, + "step": 5151 + }, + { + "epoch": 1.8155066079295155, + "grad_norm": 1.8951471308172565, + "learning_rate": 4.641941625367918e-07, + "loss": 0.5216347575187683, + "step": 5152 + }, + { + "epoch": 1.8158590308370044, + "grad_norm": 2.264572307408149, + "learning_rate": 4.6244123223030177e-07, + "loss": 0.5135647058486938, + "step": 5153 + }, + { + "epoch": 1.8162114537444936, + "grad_norm": 1.8178771999892822, + "learning_rate": 4.6069153962819193e-07, + "loss": 0.5526058673858643, + "step": 5154 + }, + { + "epoch": 1.8165638766519825, + "grad_norm": 2.050533288883353, + "learning_rate": 4.589450853244315e-07, + "loss": 0.5897486209869385, + "step": 5155 + }, + { + "epoch": 1.8169162995594714, + "grad_norm": 1.8009014119109743, + "learning_rate": 4.5720186991188517e-07, + "loss": 0.5698407888412476, + "step": 5156 + }, + { + "epoch": 1.8172687224669604, + "grad_norm": 1.7954864355128493, + "learning_rate": 4.5546189398232075e-07, + "loss": 0.579573392868042, + "step": 5157 + }, + { + "epoch": 1.8176211453744493, + "grad_norm": 1.7473651992455344, + "learning_rate": 4.5372515812640573e-07, + "loss": 0.41852182149887085, + "step": 5158 + }, + { + "epoch": 1.8179735682819382, + "grad_norm": 1.7056493552996725, + "learning_rate": 4.519916629337107e-07, + "loss": 0.6081204414367676, + "step": 5159 + }, + { + "epoch": 1.8183259911894272, + "grad_norm": 2.046109798166009, + "learning_rate": 4.502614089926982e-07, + "loss": 0.5725652575492859, + "step": 5160 + }, + { + "epoch": 1.8186784140969163, + "grad_norm": 1.7147916989755474, + "learning_rate": 4.4853439689073965e-07, + "loss": 0.5109303593635559, + "step": 5161 + }, + { + "epoch": 1.8190308370044053, + "grad_norm": 1.8721629996812361, + "learning_rate": 4.468106272141004e-07, + "loss": 0.5647833347320557, + "step": 5162 + }, + { + "epoch": 1.8193832599118944, + "grad_norm": 1.8784402680779348, + "learning_rate": 4.450901005479469e-07, + "loss": 0.6074738502502441, + "step": 5163 + }, + { + "epoch": 1.8197356828193834, + "grad_norm": 1.9135972387212516, + "learning_rate": 4.433728174763452e-07, + "loss": 0.647289514541626, + "step": 5164 + }, + { + "epoch": 1.8200881057268723, + "grad_norm": 2.08976454113542, + "learning_rate": 4.416587785822568e-07, + "loss": 0.5817590951919556, + "step": 5165 + }, + { + "epoch": 1.8204405286343612, + "grad_norm": 2.105714289057314, + "learning_rate": 4.399479844475485e-07, + "loss": 0.6483672857284546, + "step": 5166 + }, + { + "epoch": 1.8207929515418502, + "grad_norm": 1.9562649517319024, + "learning_rate": 4.382404356529801e-07, + "loss": 0.5439441204071045, + "step": 5167 + }, + { + "epoch": 1.821145374449339, + "grad_norm": 1.8467126365486348, + "learning_rate": 4.3653613277820804e-07, + "loss": 0.5835710167884827, + "step": 5168 + }, + { + "epoch": 1.821497797356828, + "grad_norm": 1.9450074521030982, + "learning_rate": 4.3483507640179503e-07, + "loss": 0.7024152874946594, + "step": 5169 + }, + { + "epoch": 1.8218502202643172, + "grad_norm": 1.880332916659811, + "learning_rate": 4.331372671011935e-07, + "loss": 0.5223513841629028, + "step": 5170 + }, + { + "epoch": 1.8222026431718061, + "grad_norm": 2.771814545513559, + "learning_rate": 4.3144270545275814e-07, + "loss": 0.5975688099861145, + "step": 5171 + }, + { + "epoch": 1.8225550660792953, + "grad_norm": 1.5329834705964882, + "learning_rate": 4.2975139203173977e-07, + "loss": 0.5459109544754028, + "step": 5172 + }, + { + "epoch": 1.8229074889867842, + "grad_norm": 1.8202354421886453, + "learning_rate": 4.2806332741228586e-07, + "loss": 0.6155862808227539, + "step": 5173 + }, + { + "epoch": 1.8232599118942732, + "grad_norm": 2.2226946714753644, + "learning_rate": 4.263785121674435e-07, + "loss": 0.6505374908447266, + "step": 5174 + }, + { + "epoch": 1.823612334801762, + "grad_norm": 1.9153455724722082, + "learning_rate": 4.246969468691553e-07, + "loss": 0.5243734121322632, + "step": 5175 + }, + { + "epoch": 1.823964757709251, + "grad_norm": 1.8732488601912396, + "learning_rate": 4.2301863208825676e-07, + "loss": 0.6931817531585693, + "step": 5176 + }, + { + "epoch": 1.82431718061674, + "grad_norm": 1.969859922329015, + "learning_rate": 4.2134356839448665e-07, + "loss": 0.5312765836715698, + "step": 5177 + }, + { + "epoch": 1.824669603524229, + "grad_norm": 1.9404158745446412, + "learning_rate": 4.1967175635647674e-07, + "loss": 0.598992109298706, + "step": 5178 + }, + { + "epoch": 1.825022026431718, + "grad_norm": 1.7631344780586065, + "learning_rate": 4.1800319654175413e-07, + "loss": 0.5844708681106567, + "step": 5179 + }, + { + "epoch": 1.825374449339207, + "grad_norm": 1.9995354508958225, + "learning_rate": 4.1633788951674357e-07, + "loss": 0.5884612798690796, + "step": 5180 + }, + { + "epoch": 1.8257268722466962, + "grad_norm": 1.72810410086028, + "learning_rate": 4.1467583584676395e-07, + "loss": 0.6038404107093811, + "step": 5181 + }, + { + "epoch": 1.826079295154185, + "grad_norm": 2.339259211755874, + "learning_rate": 4.130170360960317e-07, + "loss": 0.6511296033859253, + "step": 5182 + }, + { + "epoch": 1.826431718061674, + "grad_norm": 1.925197944351106, + "learning_rate": 4.113614908276609e-07, + "loss": 0.5884404182434082, + "step": 5183 + }, + { + "epoch": 1.826784140969163, + "grad_norm": 1.731239361884253, + "learning_rate": 4.097092006036507e-07, + "loss": 0.5549901723861694, + "step": 5184 + }, + { + "epoch": 1.827136563876652, + "grad_norm": 1.994782951411243, + "learning_rate": 4.0806016598490707e-07, + "loss": 0.561951756477356, + "step": 5185 + }, + { + "epoch": 1.8274889867841408, + "grad_norm": 1.869408348764558, + "learning_rate": 4.064143875312254e-07, + "loss": 0.6412413120269775, + "step": 5186 + }, + { + "epoch": 1.82784140969163, + "grad_norm": 1.6798143654231001, + "learning_rate": 4.0477186580129447e-07, + "loss": 0.6295674443244934, + "step": 5187 + }, + { + "epoch": 1.828193832599119, + "grad_norm": 1.6293958799120483, + "learning_rate": 4.031326013527015e-07, + "loss": 0.6700723767280579, + "step": 5188 + }, + { + "epoch": 1.8285462555066079, + "grad_norm": 1.8215522719850648, + "learning_rate": 4.014965947419236e-07, + "loss": 0.5758254528045654, + "step": 5189 + }, + { + "epoch": 1.828898678414097, + "grad_norm": 1.9932829475641192, + "learning_rate": 3.9986384652433654e-07, + "loss": 0.6663509607315063, + "step": 5190 + }, + { + "epoch": 1.829251101321586, + "grad_norm": 1.9935453293677252, + "learning_rate": 3.982343572542069e-07, + "loss": 0.6459337472915649, + "step": 5191 + }, + { + "epoch": 1.829603524229075, + "grad_norm": 1.854876606446137, + "learning_rate": 3.9660812748469336e-07, + "loss": 0.6411766409873962, + "step": 5192 + }, + { + "epoch": 1.8299559471365638, + "grad_norm": 2.1651745240120976, + "learning_rate": 3.9498515776785207e-07, + "loss": 0.711888313293457, + "step": 5193 + }, + { + "epoch": 1.8303083700440528, + "grad_norm": 2.2389356684810284, + "learning_rate": 3.933654486546312e-07, + "loss": 0.63288813829422, + "step": 5194 + }, + { + "epoch": 1.8306607929515417, + "grad_norm": 1.9048245223498055, + "learning_rate": 3.9174900069486985e-07, + "loss": 0.6330822706222534, + "step": 5195 + }, + { + "epoch": 1.8310132158590309, + "grad_norm": 2.0831179708663154, + "learning_rate": 3.901358144373035e-07, + "loss": 0.7242149114608765, + "step": 5196 + }, + { + "epoch": 1.8313656387665198, + "grad_norm": 1.8790323108631095, + "learning_rate": 3.885258904295575e-07, + "loss": 0.6741703748703003, + "step": 5197 + }, + { + "epoch": 1.831718061674009, + "grad_norm": 1.9200909143991698, + "learning_rate": 3.8691922921815226e-07, + "loss": 0.625057578086853, + "step": 5198 + }, + { + "epoch": 1.832070484581498, + "grad_norm": 2.457846968244059, + "learning_rate": 3.853158313484995e-07, + "loss": 0.673669159412384, + "step": 5199 + }, + { + "epoch": 1.8324229074889868, + "grad_norm": 1.7310768756301407, + "learning_rate": 3.837156973648992e-07, + "loss": 0.5981203317642212, + "step": 5200 + }, + { + "epoch": 1.8327753303964758, + "grad_norm": 2.2560941225086992, + "learning_rate": 3.821188278105514e-07, + "loss": 0.6577199697494507, + "step": 5201 + }, + { + "epoch": 1.8331277533039647, + "grad_norm": 1.8570769012933126, + "learning_rate": 3.805252232275414e-07, + "loss": 0.6951043605804443, + "step": 5202 + }, + { + "epoch": 1.8334801762114536, + "grad_norm": 1.874325920944958, + "learning_rate": 3.7893488415684964e-07, + "loss": 0.572435200214386, + "step": 5203 + }, + { + "epoch": 1.8338325991189426, + "grad_norm": 1.7906206085216059, + "learning_rate": 3.773478111383455e-07, + "loss": 0.5849496126174927, + "step": 5204 + }, + { + "epoch": 1.8341850220264317, + "grad_norm": 1.9908368337543014, + "learning_rate": 3.7576400471079023e-07, + "loss": 0.5380967855453491, + "step": 5205 + }, + { + "epoch": 1.8345374449339207, + "grad_norm": 1.7322293442190257, + "learning_rate": 3.7418346541183923e-07, + "loss": 0.5681222677230835, + "step": 5206 + }, + { + "epoch": 1.8348898678414098, + "grad_norm": 1.7551676131968534, + "learning_rate": 3.7260619377803677e-07, + "loss": 0.5012099146842957, + "step": 5207 + }, + { + "epoch": 1.8352422907488988, + "grad_norm": 1.9889231090545432, + "learning_rate": 3.710321903448133e-07, + "loss": 0.6175205707550049, + "step": 5208 + }, + { + "epoch": 1.8355947136563877, + "grad_norm": 2.0658320822662137, + "learning_rate": 3.6946145564649817e-07, + "loss": 0.6190954446792603, + "step": 5209 + }, + { + "epoch": 1.8359471365638766, + "grad_norm": 2.067936609981899, + "learning_rate": 3.678939902163048e-07, + "loss": 0.6820691823959351, + "step": 5210 + }, + { + "epoch": 1.8362995594713656, + "grad_norm": 1.6116358163190896, + "learning_rate": 3.6632979458633867e-07, + "loss": 0.5309683084487915, + "step": 5211 + }, + { + "epoch": 1.8366519823788545, + "grad_norm": 1.7416007879814253, + "learning_rate": 3.6476886928759726e-07, + "loss": 0.5110820531845093, + "step": 5212 + }, + { + "epoch": 1.8370044052863435, + "grad_norm": 1.723221372899004, + "learning_rate": 3.6321121484996447e-07, + "loss": 0.6226333975791931, + "step": 5213 + }, + { + "epoch": 1.8373568281938326, + "grad_norm": 2.234178040191492, + "learning_rate": 3.6165683180221735e-07, + "loss": 0.6287777423858643, + "step": 5214 + }, + { + "epoch": 1.8377092511013216, + "grad_norm": 1.9295755553308827, + "learning_rate": 3.601057206720182e-07, + "loss": 0.7033661603927612, + "step": 5215 + }, + { + "epoch": 1.8380616740088107, + "grad_norm": 2.3805238150126473, + "learning_rate": 3.5855788198592257e-07, + "loss": 0.5841168165206909, + "step": 5216 + }, + { + "epoch": 1.8384140969162996, + "grad_norm": 1.9475866760038651, + "learning_rate": 3.570133162693734e-07, + "loss": 0.6797176599502563, + "step": 5217 + }, + { + "epoch": 1.8387665198237886, + "grad_norm": 1.8282916435885754, + "learning_rate": 3.5547202404670246e-07, + "loss": 0.4317880868911743, + "step": 5218 + }, + { + "epoch": 1.8391189427312775, + "grad_norm": 1.8334146730463823, + "learning_rate": 3.5393400584113004e-07, + "loss": 0.4757443368434906, + "step": 5219 + }, + { + "epoch": 1.8394713656387665, + "grad_norm": 1.907804753373484, + "learning_rate": 3.5239926217476627e-07, + "loss": 0.6341856718063354, + "step": 5220 + }, + { + "epoch": 1.8398237885462554, + "grad_norm": 1.8320811149781473, + "learning_rate": 3.5086779356860777e-07, + "loss": 0.5401504039764404, + "step": 5221 + }, + { + "epoch": 1.8401762114537445, + "grad_norm": 1.9485378653698677, + "learning_rate": 3.4933960054254314e-07, + "loss": 0.507185697555542, + "step": 5222 + }, + { + "epoch": 1.8405286343612335, + "grad_norm": 1.8475072625751607, + "learning_rate": 3.478146836153418e-07, + "loss": 0.544599175453186, + "step": 5223 + }, + { + "epoch": 1.8408810572687224, + "grad_norm": 1.7516560167770228, + "learning_rate": 3.4629304330466964e-07, + "loss": 0.5231183767318726, + "step": 5224 + }, + { + "epoch": 1.8412334801762116, + "grad_norm": 1.9594972590005177, + "learning_rate": 3.447746801270746e-07, + "loss": 0.5505118370056152, + "step": 5225 + }, + { + "epoch": 1.8415859030837005, + "grad_norm": 1.8779318369867126, + "learning_rate": 3.432595945979944e-07, + "loss": 0.6056097149848938, + "step": 5226 + }, + { + "epoch": 1.8419383259911895, + "grad_norm": 2.1828814894071806, + "learning_rate": 3.4174778723175204e-07, + "loss": 0.6292518377304077, + "step": 5227 + }, + { + "epoch": 1.8422907488986784, + "grad_norm": 2.121254282924953, + "learning_rate": 3.4023925854156035e-07, + "loss": 0.6821235418319702, + "step": 5228 + }, + { + "epoch": 1.8426431718061673, + "grad_norm": 1.8646887822875091, + "learning_rate": 3.3873400903951636e-07, + "loss": 0.6663388013839722, + "step": 5229 + }, + { + "epoch": 1.8429955947136563, + "grad_norm": 1.7699721471254064, + "learning_rate": 3.3723203923660795e-07, + "loss": 0.5283368825912476, + "step": 5230 + }, + { + "epoch": 1.8433480176211454, + "grad_norm": 1.8757843861417383, + "learning_rate": 3.35733349642704e-07, + "loss": 0.6193508505821228, + "step": 5231 + }, + { + "epoch": 1.8437004405286344, + "grad_norm": 1.8277200643148488, + "learning_rate": 3.3423794076656635e-07, + "loss": 0.5790667533874512, + "step": 5232 + }, + { + "epoch": 1.8440528634361235, + "grad_norm": 1.8773326611638317, + "learning_rate": 3.3274581311583786e-07, + "loss": 0.5774649381637573, + "step": 5233 + }, + { + "epoch": 1.8444052863436124, + "grad_norm": 1.8907427086265292, + "learning_rate": 3.312569671970489e-07, + "loss": 0.7818938493728638, + "step": 5234 + }, + { + "epoch": 1.8447577092511014, + "grad_norm": 1.9327729742836703, + "learning_rate": 3.297714035156174e-07, + "loss": 0.7140024900436401, + "step": 5235 + }, + { + "epoch": 1.8451101321585903, + "grad_norm": 1.8813227413168874, + "learning_rate": 3.2828912257584664e-07, + "loss": 0.526549220085144, + "step": 5236 + }, + { + "epoch": 1.8454625550660793, + "grad_norm": 1.7801884231788352, + "learning_rate": 3.268101248809219e-07, + "loss": 0.5497986078262329, + "step": 5237 + }, + { + "epoch": 1.8458149779735682, + "grad_norm": 1.8669723447216968, + "learning_rate": 3.2533441093292153e-07, + "loss": 0.587260901927948, + "step": 5238 + }, + { + "epoch": 1.8461674008810571, + "grad_norm": 1.7543011465942289, + "learning_rate": 3.238619812327992e-07, + "loss": 0.6064329147338867, + "step": 5239 + }, + { + "epoch": 1.8465198237885463, + "grad_norm": 1.6866654405083865, + "learning_rate": 3.22392836280403e-07, + "loss": 0.5427783727645874, + "step": 5240 + }, + { + "epoch": 1.8468722466960352, + "grad_norm": 2.007154381007414, + "learning_rate": 3.209269765744605e-07, + "loss": 0.6315155029296875, + "step": 5241 + }, + { + "epoch": 1.8472246696035244, + "grad_norm": 1.8683798567232428, + "learning_rate": 3.194644026125848e-07, + "loss": 0.47614991664886475, + "step": 5242 + }, + { + "epoch": 1.8475770925110133, + "grad_norm": 1.7870378472192856, + "learning_rate": 3.1800511489127553e-07, + "loss": 0.4671345353126526, + "step": 5243 + }, + { + "epoch": 1.8479295154185023, + "grad_norm": 2.1401583736619774, + "learning_rate": 3.1654911390591404e-07, + "loss": 0.5751510262489319, + "step": 5244 + }, + { + "epoch": 1.8482819383259912, + "grad_norm": 1.8052174793154305, + "learning_rate": 3.1509640015076946e-07, + "loss": 0.41024816036224365, + "step": 5245 + }, + { + "epoch": 1.8486343612334801, + "grad_norm": 1.731551636677765, + "learning_rate": 3.136469741189918e-07, + "loss": 0.5401195287704468, + "step": 5246 + }, + { + "epoch": 1.848986784140969, + "grad_norm": 1.653370854405324, + "learning_rate": 3.1220083630261413e-07, + "loss": 0.526515007019043, + "step": 5247 + }, + { + "epoch": 1.849339207048458, + "grad_norm": 1.8913718815401968, + "learning_rate": 3.1075798719255813e-07, + "loss": 0.5476140975952148, + "step": 5248 + }, + { + "epoch": 1.8496916299559472, + "grad_norm": 1.8985078398075201, + "learning_rate": 3.093184272786254e-07, + "loss": 0.5542911291122437, + "step": 5249 + }, + { + "epoch": 1.850044052863436, + "grad_norm": 1.880723497688654, + "learning_rate": 3.078821570495005e-07, + "loss": 0.5147569179534912, + "step": 5250 + }, + { + "epoch": 1.8503964757709253, + "grad_norm": 1.982026450369604, + "learning_rate": 3.0644917699275355e-07, + "loss": 0.5774611830711365, + "step": 5251 + }, + { + "epoch": 1.8507488986784142, + "grad_norm": 1.7200421440570042, + "learning_rate": 3.0501948759483646e-07, + "loss": 0.6516300439834595, + "step": 5252 + }, + { + "epoch": 1.8511013215859031, + "grad_norm": 2.0195950340864495, + "learning_rate": 3.0359308934108435e-07, + "loss": 0.7598013877868652, + "step": 5253 + }, + { + "epoch": 1.851453744493392, + "grad_norm": 2.0638022912417506, + "learning_rate": 3.0216998271571653e-07, + "loss": 0.5605336427688599, + "step": 5254 + }, + { + "epoch": 1.851806167400881, + "grad_norm": 2.028778763216705, + "learning_rate": 3.007501682018288e-07, + "loss": 0.6549514532089233, + "step": 5255 + }, + { + "epoch": 1.85215859030837, + "grad_norm": 2.059939172990393, + "learning_rate": 2.993336462814089e-07, + "loss": 0.5390901565551758, + "step": 5256 + }, + { + "epoch": 1.8525110132158589, + "grad_norm": 1.812559235788011, + "learning_rate": 2.979204174353201e-07, + "loss": 0.5039275884628296, + "step": 5257 + }, + { + "epoch": 1.852863436123348, + "grad_norm": 1.6793203683546194, + "learning_rate": 2.9651048214330956e-07, + "loss": 0.4715292453765869, + "step": 5258 + }, + { + "epoch": 1.853215859030837, + "grad_norm": 1.5445048853459802, + "learning_rate": 2.951038408840068e-07, + "loss": 0.4593687653541565, + "step": 5259 + }, + { + "epoch": 1.8535682819383261, + "grad_norm": 2.427211613937901, + "learning_rate": 2.9370049413492084e-07, + "loss": 0.8451346158981323, + "step": 5260 + }, + { + "epoch": 1.853920704845815, + "grad_norm": 1.796887553027914, + "learning_rate": 2.923004423724474e-07, + "loss": 0.5567130446434021, + "step": 5261 + }, + { + "epoch": 1.854273127753304, + "grad_norm": 1.6019285108338794, + "learning_rate": 2.909036860718595e-07, + "loss": 0.4740293622016907, + "step": 5262 + }, + { + "epoch": 1.854625550660793, + "grad_norm": 1.566732286884799, + "learning_rate": 2.895102257073101e-07, + "loss": 0.5279378294944763, + "step": 5263 + }, + { + "epoch": 1.8549779735682819, + "grad_norm": 2.0699049521167923, + "learning_rate": 2.881200617518387e-07, + "loss": 0.5977471470832825, + "step": 5264 + }, + { + "epoch": 1.8553303964757708, + "grad_norm": 2.147594228172352, + "learning_rate": 2.8673319467736104e-07, + "loss": 0.5385996699333191, + "step": 5265 + }, + { + "epoch": 1.85568281938326, + "grad_norm": 2.011382389323699, + "learning_rate": 2.85349624954675e-07, + "loss": 0.5702279806137085, + "step": 5266 + }, + { + "epoch": 1.856035242290749, + "grad_norm": 1.875774247263156, + "learning_rate": 2.839693530534604e-07, + "loss": 0.584097146987915, + "step": 5267 + }, + { + "epoch": 1.8563876651982378, + "grad_norm": 1.9561416110933127, + "learning_rate": 2.825923794422758e-07, + "loss": 0.6205782890319824, + "step": 5268 + }, + { + "epoch": 1.856740088105727, + "grad_norm": 1.8766933117628495, + "learning_rate": 2.8121870458856284e-07, + "loss": 0.5626852512359619, + "step": 5269 + }, + { + "epoch": 1.857092511013216, + "grad_norm": 1.826792073608219, + "learning_rate": 2.798483289586396e-07, + "loss": 0.6052513122558594, + "step": 5270 + }, + { + "epoch": 1.8574449339207049, + "grad_norm": 2.051566447554152, + "learning_rate": 2.7848125301770504e-07, + "loss": 0.5074095726013184, + "step": 5271 + }, + { + "epoch": 1.8577973568281938, + "grad_norm": 2.3608926664844705, + "learning_rate": 2.7711747722984127e-07, + "loss": 0.8006119728088379, + "step": 5272 + }, + { + "epoch": 1.8581497797356827, + "grad_norm": 1.939365874771501, + "learning_rate": 2.7575700205800694e-07, + "loss": 0.6437188982963562, + "step": 5273 + }, + { + "epoch": 1.8585022026431717, + "grad_norm": 2.070323156152843, + "learning_rate": 2.743998279640403e-07, + "loss": 0.6610177755355835, + "step": 5274 + }, + { + "epoch": 1.8588546255506608, + "grad_norm": 2.242727394045801, + "learning_rate": 2.7304595540865953e-07, + "loss": 0.6041977405548096, + "step": 5275 + }, + { + "epoch": 1.8592070484581498, + "grad_norm": 2.296252009493085, + "learning_rate": 2.716953848514625e-07, + "loss": 0.5684002041816711, + "step": 5276 + }, + { + "epoch": 1.859559471365639, + "grad_norm": 2.108426771462305, + "learning_rate": 2.703481167509281e-07, + "loss": 0.7256498336791992, + "step": 5277 + }, + { + "epoch": 1.8599118942731279, + "grad_norm": 1.959590007863519, + "learning_rate": 2.690041515644093e-07, + "loss": 0.7264266014099121, + "step": 5278 + }, + { + "epoch": 1.8602643171806168, + "grad_norm": 2.0027244373685047, + "learning_rate": 2.6766348974813895e-07, + "loss": 0.5427879095077515, + "step": 5279 + }, + { + "epoch": 1.8606167400881057, + "grad_norm": 1.679848534564951, + "learning_rate": 2.663261317572341e-07, + "loss": 0.5970745086669922, + "step": 5280 + }, + { + "epoch": 1.8609691629955947, + "grad_norm": 1.9989999209106484, + "learning_rate": 2.6499207804568495e-07, + "loss": 0.5796299576759338, + "step": 5281 + }, + { + "epoch": 1.8613215859030836, + "grad_norm": 1.6433355014728201, + "learning_rate": 2.6366132906635923e-07, + "loss": 0.4900246262550354, + "step": 5282 + }, + { + "epoch": 1.8616740088105725, + "grad_norm": 1.8937189873731617, + "learning_rate": 2.6233388527100777e-07, + "loss": 0.6052582263946533, + "step": 5283 + }, + { + "epoch": 1.8620264317180617, + "grad_norm": 2.1632344831004127, + "learning_rate": 2.610097471102524e-07, + "loss": 0.6908484697341919, + "step": 5284 + }, + { + "epoch": 1.8623788546255506, + "grad_norm": 1.9493448159947622, + "learning_rate": 2.596889150336024e-07, + "loss": 0.6353795528411865, + "step": 5285 + }, + { + "epoch": 1.8627312775330398, + "grad_norm": 2.019445353702499, + "learning_rate": 2.5837138948943354e-07, + "loss": 0.803575873374939, + "step": 5286 + }, + { + "epoch": 1.8630837004405287, + "grad_norm": 1.9882041113358364, + "learning_rate": 2.5705717092500694e-07, + "loss": 0.5551957488059998, + "step": 5287 + }, + { + "epoch": 1.8634361233480177, + "grad_norm": 1.9987103830633048, + "learning_rate": 2.5574625978646017e-07, + "loss": 0.6247879266738892, + "step": 5288 + }, + { + "epoch": 1.8637885462555066, + "grad_norm": 2.072117287811421, + "learning_rate": 2.544386565188062e-07, + "loss": 0.6029977798461914, + "step": 5289 + }, + { + "epoch": 1.8641409691629955, + "grad_norm": 2.101747258049668, + "learning_rate": 2.531343615659343e-07, + "loss": 0.611297070980072, + "step": 5290 + }, + { + "epoch": 1.8644933920704845, + "grad_norm": 2.1168170865355616, + "learning_rate": 2.518333753706137e-07, + "loss": 0.5290260314941406, + "step": 5291 + }, + { + "epoch": 1.8648458149779734, + "grad_norm": 1.88270236786552, + "learning_rate": 2.5053569837448664e-07, + "loss": 0.5988795757293701, + "step": 5292 + }, + { + "epoch": 1.8651982378854626, + "grad_norm": 2.1933893236783613, + "learning_rate": 2.4924133101807636e-07, + "loss": 0.671028733253479, + "step": 5293 + }, + { + "epoch": 1.8655506607929515, + "grad_norm": 2.195163128107634, + "learning_rate": 2.4795027374077905e-07, + "loss": 0.5741167664527893, + "step": 5294 + }, + { + "epoch": 1.8659030837004407, + "grad_norm": 1.8793688638635475, + "learning_rate": 2.4666252698086867e-07, + "loss": 0.47447216510772705, + "step": 5295 + }, + { + "epoch": 1.8662555066079296, + "grad_norm": 1.813537542020307, + "learning_rate": 2.453780911754955e-07, + "loss": 0.6535651087760925, + "step": 5296 + }, + { + "epoch": 1.8666079295154185, + "grad_norm": 1.830958965071389, + "learning_rate": 2.4409696676068517e-07, + "loss": 0.5928847193717957, + "step": 5297 + }, + { + "epoch": 1.8669603524229075, + "grad_norm": 2.1016696944101363, + "learning_rate": 2.428191541713387e-07, + "loss": 0.5928774476051331, + "step": 5298 + }, + { + "epoch": 1.8673127753303964, + "grad_norm": 1.8181831294339377, + "learning_rate": 2.415446538412358e-07, + "loss": 0.5798670053482056, + "step": 5299 + }, + { + "epoch": 1.8676651982378853, + "grad_norm": 1.8162014512536164, + "learning_rate": 2.4027346620302707e-07, + "loss": 0.6222843527793884, + "step": 5300 + }, + { + "epoch": 1.8680176211453743, + "grad_norm": 1.9183032685045331, + "learning_rate": 2.39005591688245e-07, + "loss": 0.5501612424850464, + "step": 5301 + }, + { + "epoch": 1.8683700440528634, + "grad_norm": 1.7621857286720093, + "learning_rate": 2.377410307272887e-07, + "loss": 0.5266422033309937, + "step": 5302 + }, + { + "epoch": 1.8687224669603524, + "grad_norm": 1.9926692528436012, + "learning_rate": 2.3647978374944037e-07, + "loss": 0.7145729064941406, + "step": 5303 + }, + { + "epoch": 1.8690748898678415, + "grad_norm": 1.8939089473542137, + "learning_rate": 2.3522185118285411e-07, + "loss": 0.6505781412124634, + "step": 5304 + }, + { + "epoch": 1.8694273127753305, + "grad_norm": 2.0817226286854607, + "learning_rate": 2.3396723345455728e-07, + "loss": 0.6278528571128845, + "step": 5305 + }, + { + "epoch": 1.8697797356828194, + "grad_norm": 1.790557343760165, + "learning_rate": 2.3271593099045475e-07, + "loss": 0.5650503039360046, + "step": 5306 + }, + { + "epoch": 1.8701321585903083, + "grad_norm": 1.6157546701422072, + "learning_rate": 2.314679442153256e-07, + "loss": 0.6267939209938049, + "step": 5307 + }, + { + "epoch": 1.8704845814977973, + "grad_norm": 1.874302486649101, + "learning_rate": 2.302232735528187e-07, + "loss": 0.45913875102996826, + "step": 5308 + }, + { + "epoch": 1.8708370044052862, + "grad_norm": 1.7607480001908633, + "learning_rate": 2.289819194254661e-07, + "loss": 0.6122059226036072, + "step": 5309 + }, + { + "epoch": 1.8711894273127754, + "grad_norm": 1.803806841150382, + "learning_rate": 2.2774388225466514e-07, + "loss": 0.6479405164718628, + "step": 5310 + }, + { + "epoch": 1.8715418502202643, + "grad_norm": 1.8546829656575279, + "learning_rate": 2.26509162460693e-07, + "loss": 0.5013849139213562, + "step": 5311 + }, + { + "epoch": 1.8718942731277532, + "grad_norm": 1.749663744266161, + "learning_rate": 2.2527776046269767e-07, + "loss": 0.6431373357772827, + "step": 5312 + }, + { + "epoch": 1.8722466960352424, + "grad_norm": 1.669095711801791, + "learning_rate": 2.2404967667870147e-07, + "loss": 0.6447317004203796, + "step": 5313 + }, + { + "epoch": 1.8725991189427313, + "grad_norm": 2.405218866271529, + "learning_rate": 2.2282491152560203e-07, + "loss": 0.5784682631492615, + "step": 5314 + }, + { + "epoch": 1.8729515418502203, + "grad_norm": 1.7544004376252713, + "learning_rate": 2.2160346541916677e-07, + "loss": 0.560835599899292, + "step": 5315 + }, + { + "epoch": 1.8733039647577092, + "grad_norm": 1.7162975954294335, + "learning_rate": 2.2038533877404066e-07, + "loss": 0.5930913686752319, + "step": 5316 + }, + { + "epoch": 1.8736563876651982, + "grad_norm": 1.9892540663354406, + "learning_rate": 2.1917053200374073e-07, + "loss": 0.7221095561981201, + "step": 5317 + }, + { + "epoch": 1.874008810572687, + "grad_norm": 1.9380281400359725, + "learning_rate": 2.179590455206515e-07, + "loss": 0.6307567358016968, + "step": 5318 + }, + { + "epoch": 1.8743612334801762, + "grad_norm": 2.0190052317760814, + "learning_rate": 2.167508797360396e-07, + "loss": 0.6158597469329834, + "step": 5319 + }, + { + "epoch": 1.8747136563876652, + "grad_norm": 1.7468326387459954, + "learning_rate": 2.1554603506003802e-07, + "loss": 0.5778557062149048, + "step": 5320 + }, + { + "epoch": 1.8750660792951543, + "grad_norm": 1.497372593580549, + "learning_rate": 2.1434451190165294e-07, + "loss": 0.5213632583618164, + "step": 5321 + }, + { + "epoch": 1.8754185022026433, + "grad_norm": 1.8555907678767487, + "learning_rate": 2.131463106687659e-07, + "loss": 0.6633203029632568, + "step": 5322 + }, + { + "epoch": 1.8757709251101322, + "grad_norm": 1.9991798348617227, + "learning_rate": 2.1195143176812817e-07, + "loss": 0.6586780548095703, + "step": 5323 + }, + { + "epoch": 1.8761233480176212, + "grad_norm": 1.991978810673319, + "learning_rate": 2.1075987560536305e-07, + "loss": 0.4946047067642212, + "step": 5324 + }, + { + "epoch": 1.87647577092511, + "grad_norm": 1.6744690075916624, + "learning_rate": 2.0957164258497031e-07, + "loss": 0.5689302682876587, + "step": 5325 + }, + { + "epoch": 1.876828193832599, + "grad_norm": 1.9550201402383367, + "learning_rate": 2.0838673311031287e-07, + "loss": 0.5761843323707581, + "step": 5326 + }, + { + "epoch": 1.877180616740088, + "grad_norm": 1.6070623974889393, + "learning_rate": 2.0720514758363343e-07, + "loss": 0.5714447498321533, + "step": 5327 + }, + { + "epoch": 1.8775330396475771, + "grad_norm": 1.7537019465709125, + "learning_rate": 2.0602688640604441e-07, + "loss": 0.4566301107406616, + "step": 5328 + }, + { + "epoch": 1.877885462555066, + "grad_norm": 2.110089760102471, + "learning_rate": 2.04851949977527e-07, + "loss": 0.6326137781143188, + "step": 5329 + }, + { + "epoch": 1.8782378854625552, + "grad_norm": 1.8775980517302555, + "learning_rate": 2.036803386969355e-07, + "loss": 0.6342206001281738, + "step": 5330 + }, + { + "epoch": 1.8785903083700441, + "grad_norm": 1.9958405881870251, + "learning_rate": 2.0251205296199616e-07, + "loss": 0.5525872707366943, + "step": 5331 + }, + { + "epoch": 1.878942731277533, + "grad_norm": 1.6965395036886874, + "learning_rate": 2.0134709316930733e-07, + "loss": 0.4932950735092163, + "step": 5332 + }, + { + "epoch": 1.879295154185022, + "grad_norm": 1.7918605717870588, + "learning_rate": 2.001854597143349e-07, + "loss": 0.6526485681533813, + "step": 5333 + }, + { + "epoch": 1.879647577092511, + "grad_norm": 1.8862781919579625, + "learning_rate": 1.990271529914156e-07, + "loss": 0.6256940960884094, + "step": 5334 + }, + { + "epoch": 1.88, + "grad_norm": 2.361417623387243, + "learning_rate": 1.9787217339376053e-07, + "loss": 0.6406987905502319, + "step": 5335 + }, + { + "epoch": 1.8803524229074888, + "grad_norm": 1.812802653812012, + "learning_rate": 1.9672052131345043e-07, + "loss": 0.6141321659088135, + "step": 5336 + }, + { + "epoch": 1.880704845814978, + "grad_norm": 2.025004487176686, + "learning_rate": 1.955721971414326e-07, + "loss": 0.558428943157196, + "step": 5337 + }, + { + "epoch": 1.881057268722467, + "grad_norm": 1.973943138705469, + "learning_rate": 1.9442720126752968e-07, + "loss": 0.5995065569877625, + "step": 5338 + }, + { + "epoch": 1.881409691629956, + "grad_norm": 1.6822565518265986, + "learning_rate": 1.932855340804296e-07, + "loss": 0.5109822750091553, + "step": 5339 + }, + { + "epoch": 1.881762114537445, + "grad_norm": 1.941646392245956, + "learning_rate": 1.921471959676957e-07, + "loss": 0.6695220470428467, + "step": 5340 + }, + { + "epoch": 1.882114537444934, + "grad_norm": 1.8857636319654494, + "learning_rate": 1.9101218731575777e-07, + "loss": 0.6982283592224121, + "step": 5341 + }, + { + "epoch": 1.882466960352423, + "grad_norm": 1.8944501787373655, + "learning_rate": 1.8988050850991314e-07, + "loss": 0.6475410461425781, + "step": 5342 + }, + { + "epoch": 1.8828193832599118, + "grad_norm": 1.7449353446414906, + "learning_rate": 1.8875215993433448e-07, + "loss": 0.57706218957901, + "step": 5343 + }, + { + "epoch": 1.8831718061674008, + "grad_norm": 1.708696671712054, + "learning_rate": 1.8762714197205988e-07, + "loss": 0.5243045091629028, + "step": 5344 + }, + { + "epoch": 1.88352422907489, + "grad_norm": 1.797956034726921, + "learning_rate": 1.865054550049994e-07, + "loss": 0.6208887100219727, + "step": 5345 + }, + { + "epoch": 1.8838766519823789, + "grad_norm": 1.9048581772706628, + "learning_rate": 1.853870994139284e-07, + "loss": 0.5572443008422852, + "step": 5346 + }, + { + "epoch": 1.8842290748898678, + "grad_norm": 1.7939928987370566, + "learning_rate": 1.8427207557849436e-07, + "loss": 0.5673031806945801, + "step": 5347 + }, + { + "epoch": 1.884581497797357, + "grad_norm": 1.6894216214789064, + "learning_rate": 1.8316038387721558e-07, + "loss": 0.5085422992706299, + "step": 5348 + }, + { + "epoch": 1.8849339207048459, + "grad_norm": 1.7455381888238348, + "learning_rate": 1.8205202468747463e-07, + "loss": 0.5480824708938599, + "step": 5349 + }, + { + "epoch": 1.8852863436123348, + "grad_norm": 1.7848642016680003, + "learning_rate": 1.8094699838552387e-07, + "loss": 0.6236293911933899, + "step": 5350 + }, + { + "epoch": 1.8856387665198238, + "grad_norm": 1.7626474829765526, + "learning_rate": 1.798453053464888e-07, + "loss": 0.541741132736206, + "step": 5351 + }, + { + "epoch": 1.8859911894273127, + "grad_norm": 1.7289887528200605, + "learning_rate": 1.7874694594435692e-07, + "loss": 0.5309538245201111, + "step": 5352 + }, + { + "epoch": 1.8863436123348016, + "grad_norm": 1.944311199542912, + "learning_rate": 1.7765192055198888e-07, + "loss": 0.5886228084564209, + "step": 5353 + }, + { + "epoch": 1.8866960352422908, + "grad_norm": 1.6415851491633797, + "learning_rate": 1.7656022954111064e-07, + "loss": 0.6216265559196472, + "step": 5354 + }, + { + "epoch": 1.8870484581497797, + "grad_norm": 1.6922081510439257, + "learning_rate": 1.7547187328231575e-07, + "loss": 0.5393999814987183, + "step": 5355 + }, + { + "epoch": 1.8874008810572689, + "grad_norm": 1.7167987260272457, + "learning_rate": 1.74386852145072e-07, + "loss": 0.583373486995697, + "step": 5356 + }, + { + "epoch": 1.8877533039647578, + "grad_norm": 2.361225928566298, + "learning_rate": 1.73305166497707e-07, + "loss": 0.6403313875198364, + "step": 5357 + }, + { + "epoch": 1.8881057268722468, + "grad_norm": 1.771396849548527, + "learning_rate": 1.7222681670741814e-07, + "loss": 0.5780963897705078, + "step": 5358 + }, + { + "epoch": 1.8884581497797357, + "grad_norm": 1.59802053134679, + "learning_rate": 1.711518031402748e-07, + "loss": 0.6046397686004639, + "step": 5359 + }, + { + "epoch": 1.8888105726872246, + "grad_norm": 1.5504259730519754, + "learning_rate": 1.700801261612084e-07, + "loss": 0.5582219362258911, + "step": 5360 + }, + { + "epoch": 1.8891629955947136, + "grad_norm": 1.962329345083699, + "learning_rate": 1.6901178613402125e-07, + "loss": 0.4880410432815552, + "step": 5361 + }, + { + "epoch": 1.8895154185022025, + "grad_norm": 2.055990524297856, + "learning_rate": 1.6794678342138105e-07, + "loss": 0.7417550086975098, + "step": 5362 + }, + { + "epoch": 1.8898678414096917, + "grad_norm": 1.8316934396355506, + "learning_rate": 1.668851183848219e-07, + "loss": 0.4616948962211609, + "step": 5363 + }, + { + "epoch": 1.8902202643171806, + "grad_norm": 1.6177478399502592, + "learning_rate": 1.658267913847489e-07, + "loss": 0.5595716834068298, + "step": 5364 + }, + { + "epoch": 1.8905726872246698, + "grad_norm": 1.9610306002643032, + "learning_rate": 1.6477180278042793e-07, + "loss": 0.72450852394104, + "step": 5365 + }, + { + "epoch": 1.8909251101321587, + "grad_norm": 1.8036541582694667, + "learning_rate": 1.637201529299959e-07, + "loss": 0.6261592507362366, + "step": 5366 + }, + { + "epoch": 1.8912775330396476, + "grad_norm": 2.1024939179342823, + "learning_rate": 1.6267184219045607e-07, + "loss": 0.5023064613342285, + "step": 5367 + }, + { + "epoch": 1.8916299559471366, + "grad_norm": 1.9210322300280602, + "learning_rate": 1.6162687091767714e-07, + "loss": 0.7113457918167114, + "step": 5368 + }, + { + "epoch": 1.8919823788546255, + "grad_norm": 1.9212954550271457, + "learning_rate": 1.6058523946639426e-07, + "loss": 0.5376787185668945, + "step": 5369 + }, + { + "epoch": 1.8923348017621144, + "grad_norm": 1.86817536856008, + "learning_rate": 1.5954694819020788e-07, + "loss": 0.6523979902267456, + "step": 5370 + }, + { + "epoch": 1.8926872246696034, + "grad_norm": 1.841265437549123, + "learning_rate": 1.5851199744158607e-07, + "loss": 0.6610705852508545, + "step": 5371 + }, + { + "epoch": 1.8930396475770925, + "grad_norm": 2.0967966308369053, + "learning_rate": 1.5748038757186445e-07, + "loss": 0.657126247882843, + "step": 5372 + }, + { + "epoch": 1.8933920704845815, + "grad_norm": 2.3300722251609893, + "learning_rate": 1.5645211893123846e-07, + "loss": 0.7247096300125122, + "step": 5373 + }, + { + "epoch": 1.8937444933920706, + "grad_norm": 1.5063549897958597, + "learning_rate": 1.5542719186877553e-07, + "loss": 0.5392117500305176, + "step": 5374 + }, + { + "epoch": 1.8940969162995596, + "grad_norm": 1.706529406386883, + "learning_rate": 1.5440560673240735e-07, + "loss": 0.5038361549377441, + "step": 5375 + }, + { + "epoch": 1.8944493392070485, + "grad_norm": 1.9403637299706042, + "learning_rate": 1.5338736386892982e-07, + "loss": 0.4768316447734833, + "step": 5376 + }, + { + "epoch": 1.8948017621145374, + "grad_norm": 1.7917263966392405, + "learning_rate": 1.5237246362400316e-07, + "loss": 0.5925793051719666, + "step": 5377 + }, + { + "epoch": 1.8951541850220264, + "grad_norm": 2.029166285154972, + "learning_rate": 1.5136090634215616e-07, + "loss": 0.47840988636016846, + "step": 5378 + }, + { + "epoch": 1.8955066079295153, + "grad_norm": 1.9172034216887006, + "learning_rate": 1.5035269236677974e-07, + "loss": 0.6365169882774353, + "step": 5379 + }, + { + "epoch": 1.8958590308370042, + "grad_norm": 1.789950493711397, + "learning_rate": 1.4934782204013344e-07, + "loss": 0.6287797689437866, + "step": 5380 + }, + { + "epoch": 1.8962114537444934, + "grad_norm": 1.8420293657892082, + "learning_rate": 1.4834629570333548e-07, + "loss": 0.6859137415885925, + "step": 5381 + }, + { + "epoch": 1.8965638766519823, + "grad_norm": 1.9365437650034845, + "learning_rate": 1.4734811369637725e-07, + "loss": 0.5545040369033813, + "step": 5382 + }, + { + "epoch": 1.8969162995594715, + "grad_norm": 1.6857031681916985, + "learning_rate": 1.463532763581077e-07, + "loss": 0.6418923139572144, + "step": 5383 + }, + { + "epoch": 1.8972687224669604, + "grad_norm": 4.115242480246632, + "learning_rate": 1.4536178402624334e-07, + "loss": 0.7618488669395447, + "step": 5384 + }, + { + "epoch": 1.8976211453744494, + "grad_norm": 1.7790399709296727, + "learning_rate": 1.4437363703736718e-07, + "loss": 0.6178286671638489, + "step": 5385 + }, + { + "epoch": 1.8979735682819383, + "grad_norm": 2.33955789440919, + "learning_rate": 1.4338883572692087e-07, + "loss": 0.6800570487976074, + "step": 5386 + }, + { + "epoch": 1.8983259911894272, + "grad_norm": 1.9056441030293936, + "learning_rate": 1.4240738042921588e-07, + "loss": 0.6063584089279175, + "step": 5387 + }, + { + "epoch": 1.8986784140969162, + "grad_norm": 1.857878498727731, + "learning_rate": 1.4142927147742792e-07, + "loss": 0.5631873607635498, + "step": 5388 + }, + { + "epoch": 1.8990308370044053, + "grad_norm": 1.6999145603505723, + "learning_rate": 1.4045450920358917e-07, + "loss": 0.5346484184265137, + "step": 5389 + }, + { + "epoch": 1.8993832599118943, + "grad_norm": 1.660876208730021, + "learning_rate": 1.3948309393860605e-07, + "loss": 0.5043535232543945, + "step": 5390 + }, + { + "epoch": 1.8997356828193832, + "grad_norm": 1.9091498065078292, + "learning_rate": 1.3851502601224032e-07, + "loss": 0.6591805219650269, + "step": 5391 + }, + { + "epoch": 1.9000881057268724, + "grad_norm": 1.777554153966534, + "learning_rate": 1.3755030575312355e-07, + "loss": 0.6831244826316833, + "step": 5392 + }, + { + "epoch": 1.9004405286343613, + "grad_norm": 1.744983267268657, + "learning_rate": 1.3658893348874714e-07, + "loss": 0.6572617292404175, + "step": 5393 + }, + { + "epoch": 1.9007929515418502, + "grad_norm": 2.007956379457216, + "learning_rate": 1.3563090954546555e-07, + "loss": 0.5834530591964722, + "step": 5394 + }, + { + "epoch": 1.9011453744493392, + "grad_norm": 1.8405418946212868, + "learning_rate": 1.3467623424850084e-07, + "loss": 0.5810972452163696, + "step": 5395 + }, + { + "epoch": 1.9014977973568281, + "grad_norm": 1.8342670520255937, + "learning_rate": 1.3372490792193493e-07, + "loss": 0.6338596940040588, + "step": 5396 + }, + { + "epoch": 1.901850220264317, + "grad_norm": 2.4739742581402946, + "learning_rate": 1.327769308887117e-07, + "loss": 0.5274045467376709, + "step": 5397 + }, + { + "epoch": 1.9022026431718062, + "grad_norm": 2.13415646905843, + "learning_rate": 1.3183230347064147e-07, + "loss": 0.5416278839111328, + "step": 5398 + }, + { + "epoch": 1.9025550660792951, + "grad_norm": 1.8878260396672215, + "learning_rate": 1.3089102598839442e-07, + "loss": 0.4818935692310333, + "step": 5399 + }, + { + "epoch": 1.9029074889867843, + "grad_norm": 1.6383283062285148, + "learning_rate": 1.299530987615072e-07, + "loss": 0.4553770124912262, + "step": 5400 + }, + { + "epoch": 1.9032599118942732, + "grad_norm": 1.7060011862412936, + "learning_rate": 1.2901852210837507e-07, + "loss": 0.5663920640945435, + "step": 5401 + }, + { + "epoch": 1.9036123348017622, + "grad_norm": 1.975611905778012, + "learning_rate": 1.2808729634625872e-07, + "loss": 0.5654638409614563, + "step": 5402 + }, + { + "epoch": 1.903964757709251, + "grad_norm": 2.0012288604540136, + "learning_rate": 1.271594217912797e-07, + "loss": 0.8061939477920532, + "step": 5403 + }, + { + "epoch": 1.90431718061674, + "grad_norm": 2.149695499003911, + "learning_rate": 1.2623489875842276e-07, + "loss": 0.5832188129425049, + "step": 5404 + }, + { + "epoch": 1.904669603524229, + "grad_norm": 1.8966385092802618, + "learning_rate": 1.2531372756153458e-07, + "loss": 0.6112633943557739, + "step": 5405 + }, + { + "epoch": 1.905022026431718, + "grad_norm": 2.3113031929819106, + "learning_rate": 1.2439590851332394e-07, + "loss": 0.7083494663238525, + "step": 5406 + }, + { + "epoch": 1.905374449339207, + "grad_norm": 1.9110441437452201, + "learning_rate": 1.2348144192536272e-07, + "loss": 0.5319055318832397, + "step": 5407 + }, + { + "epoch": 1.905726872246696, + "grad_norm": 1.9724655581165889, + "learning_rate": 1.2257032810808256e-07, + "loss": 0.6199945211410522, + "step": 5408 + }, + { + "epoch": 1.9060792951541852, + "grad_norm": 2.3233890606574503, + "learning_rate": 1.2166256737077942e-07, + "loss": 0.6596004962921143, + "step": 5409 + }, + { + "epoch": 1.906431718061674, + "grad_norm": 1.9040617554840082, + "learning_rate": 1.20758160021609e-07, + "loss": 0.553988516330719, + "step": 5410 + }, + { + "epoch": 1.906784140969163, + "grad_norm": 2.329855084255152, + "learning_rate": 1.1985710636759128e-07, + "loss": 0.6295895576477051, + "step": 5411 + }, + { + "epoch": 1.907136563876652, + "grad_norm": 2.035449496855298, + "learning_rate": 1.1895940671460271e-07, + "loss": 0.6555598378181458, + "step": 5412 + }, + { + "epoch": 1.907488986784141, + "grad_norm": 1.8252966820746244, + "learning_rate": 1.1806506136738616e-07, + "loss": 0.48203831911087036, + "step": 5413 + }, + { + "epoch": 1.9078414096916299, + "grad_norm": 2.0052153848511045, + "learning_rate": 1.1717407062954434e-07, + "loss": 0.6632858514785767, + "step": 5414 + }, + { + "epoch": 1.9081938325991188, + "grad_norm": 1.913108464706502, + "learning_rate": 1.1628643480354085e-07, + "loss": 0.6058870553970337, + "step": 5415 + }, + { + "epoch": 1.908546255506608, + "grad_norm": 1.6689328390033278, + "learning_rate": 1.1540215419070022e-07, + "loss": 0.5106638073921204, + "step": 5416 + }, + { + "epoch": 1.9088986784140969, + "grad_norm": 1.965112171139023, + "learning_rate": 1.1452122909120788e-07, + "loss": 0.6641250848770142, + "step": 5417 + }, + { + "epoch": 1.909251101321586, + "grad_norm": 1.7797017689691026, + "learning_rate": 1.1364365980411019e-07, + "loss": 0.4823518395423889, + "step": 5418 + }, + { + "epoch": 1.909603524229075, + "grad_norm": 1.7374946519813605, + "learning_rate": 1.127694466273166e-07, + "loss": 0.5770869255065918, + "step": 5419 + }, + { + "epoch": 1.909955947136564, + "grad_norm": 1.8439547121423094, + "learning_rate": 1.1189858985759306e-07, + "loss": 0.5120491981506348, + "step": 5420 + }, + { + "epoch": 1.9103083700440529, + "grad_norm": 1.998054444662161, + "learning_rate": 1.1103108979056865e-07, + "loss": 0.6742277145385742, + "step": 5421 + }, + { + "epoch": 1.9106607929515418, + "grad_norm": 1.7361045655014782, + "learning_rate": 1.1016694672073336e-07, + "loss": 0.6053510904312134, + "step": 5422 + }, + { + "epoch": 1.9110132158590307, + "grad_norm": 2.276872906150792, + "learning_rate": 1.0930616094143698e-07, + "loss": 0.5598228573799133, + "step": 5423 + }, + { + "epoch": 1.9113656387665197, + "grad_norm": 1.7689371613585823, + "learning_rate": 1.0844873274488799e-07, + "loss": 0.599521279335022, + "step": 5424 + }, + { + "epoch": 1.9117180616740088, + "grad_norm": 2.270274631303626, + "learning_rate": 1.075946624221591e-07, + "loss": 0.5986596345901489, + "step": 5425 + }, + { + "epoch": 1.9120704845814978, + "grad_norm": 2.0819173495219054, + "learning_rate": 1.067439502631773e-07, + "loss": 0.5657980442047119, + "step": 5426 + }, + { + "epoch": 1.912422907488987, + "grad_norm": 2.498725021517388, + "learning_rate": 1.0589659655673712e-07, + "loss": 0.5561040639877319, + "step": 5427 + }, + { + "epoch": 1.9127753303964758, + "grad_norm": 1.6241033411576455, + "learning_rate": 1.0505260159048513e-07, + "loss": 0.5088320970535278, + "step": 5428 + }, + { + "epoch": 1.9131277533039648, + "grad_norm": 2.1207031706665407, + "learning_rate": 1.0421196565093217e-07, + "loss": 0.5679075717926025, + "step": 5429 + }, + { + "epoch": 1.9134801762114537, + "grad_norm": 1.8775486377310404, + "learning_rate": 1.0337468902344994e-07, + "loss": 0.6701461672782898, + "step": 5430 + }, + { + "epoch": 1.9138325991189427, + "grad_norm": 1.7839638341554918, + "learning_rate": 1.0254077199226553e-07, + "loss": 0.6172112822532654, + "step": 5431 + }, + { + "epoch": 1.9141850220264316, + "grad_norm": 1.904067212081221, + "learning_rate": 1.0171021484046806e-07, + "loss": 0.5926263332366943, + "step": 5432 + }, + { + "epoch": 1.9145374449339208, + "grad_norm": 1.7190787727179386, + "learning_rate": 1.0088301785000754e-07, + "loss": 0.6142431497573853, + "step": 5433 + }, + { + "epoch": 1.9148898678414097, + "grad_norm": 1.7095738070807496, + "learning_rate": 1.0005918130168934e-07, + "loss": 0.5367780923843384, + "step": 5434 + }, + { + "epoch": 1.9152422907488986, + "grad_norm": 1.8769142431022592, + "learning_rate": 9.923870547518311e-08, + "loss": 0.5241641998291016, + "step": 5435 + }, + { + "epoch": 1.9155947136563878, + "grad_norm": 1.7765958549274539, + "learning_rate": 9.842159064901157e-08, + "loss": 0.5906308889389038, + "step": 5436 + }, + { + "epoch": 1.9159471365638767, + "grad_norm": 2.1275572555046613, + "learning_rate": 9.760783710056176e-08, + "loss": 0.5411181449890137, + "step": 5437 + }, + { + "epoch": 1.9162995594713657, + "grad_norm": 1.9001328464490854, + "learning_rate": 9.679744510607825e-08, + "loss": 0.6313618421554565, + "step": 5438 + }, + { + "epoch": 1.9166519823788546, + "grad_norm": 2.0658646856716336, + "learning_rate": 9.599041494066208e-08, + "loss": 0.6330033540725708, + "step": 5439 + }, + { + "epoch": 1.9170044052863435, + "grad_norm": 1.9617429681187768, + "learning_rate": 9.518674687827634e-08, + "loss": 0.5859507322311401, + "step": 5440 + }, + { + "epoch": 1.9173568281938325, + "grad_norm": 1.9233196169731877, + "learning_rate": 9.438644119174057e-08, + "loss": 0.571119487285614, + "step": 5441 + }, + { + "epoch": 1.9177092511013216, + "grad_norm": 1.683294616332208, + "learning_rate": 9.3589498152733e-08, + "loss": 0.6114518046379089, + "step": 5442 + }, + { + "epoch": 1.9180616740088106, + "grad_norm": 2.0948221060814407, + "learning_rate": 9.279591803179277e-08, + "loss": 0.5762027502059937, + "step": 5443 + }, + { + "epoch": 1.9184140969162997, + "grad_norm": 1.973540736612678, + "learning_rate": 9.200570109831441e-08, + "loss": 0.6081440448760986, + "step": 5444 + }, + { + "epoch": 1.9187665198237887, + "grad_norm": 1.9242540837021294, + "learning_rate": 9.121884762055222e-08, + "loss": 0.5682440996170044, + "step": 5445 + }, + { + "epoch": 1.9191189427312776, + "grad_norm": 1.642224199268087, + "learning_rate": 9.043535786561919e-08, + "loss": 0.5290100574493408, + "step": 5446 + }, + { + "epoch": 1.9194713656387665, + "grad_norm": 1.8013641871034827, + "learning_rate": 8.965523209948367e-08, + "loss": 0.5743255019187927, + "step": 5447 + }, + { + "epoch": 1.9198237885462555, + "grad_norm": 1.6357977481393366, + "learning_rate": 8.887847058697718e-08, + "loss": 0.5955618023872375, + "step": 5448 + }, + { + "epoch": 1.9201762114537444, + "grad_norm": 1.9706217525454803, + "learning_rate": 8.810507359178322e-08, + "loss": 0.4732915759086609, + "step": 5449 + }, + { + "epoch": 1.9205286343612333, + "grad_norm": 3.2730228664607797, + "learning_rate": 8.733504137644621e-08, + "loss": 0.6712108850479126, + "step": 5450 + }, + { + "epoch": 1.9208810572687225, + "grad_norm": 1.997966446518774, + "learning_rate": 8.656837420237152e-08, + "loss": 0.5169811248779297, + "step": 5451 + }, + { + "epoch": 1.9212334801762114, + "grad_norm": 1.9146732631772796, + "learning_rate": 8.580507232981428e-08, + "loss": 0.6117082238197327, + "step": 5452 + }, + { + "epoch": 1.9215859030837006, + "grad_norm": 1.7690878518096709, + "learning_rate": 8.504513601789388e-08, + "loss": 0.7020283937454224, + "step": 5453 + }, + { + "epoch": 1.9219383259911895, + "grad_norm": 1.806111695783304, + "learning_rate": 8.42885655245862e-08, + "loss": 0.5489979386329651, + "step": 5454 + }, + { + "epoch": 1.9222907488986785, + "grad_norm": 1.8218906131330599, + "learning_rate": 8.353536110672133e-08, + "loss": 0.5361644625663757, + "step": 5455 + }, + { + "epoch": 1.9226431718061674, + "grad_norm": 1.8728336665856926, + "learning_rate": 8.278552301998921e-08, + "loss": 0.6470010280609131, + "step": 5456 + }, + { + "epoch": 1.9229955947136563, + "grad_norm": 1.5338046694887773, + "learning_rate": 8.203905151893731e-08, + "loss": 0.4642202854156494, + "step": 5457 + }, + { + "epoch": 1.9233480176211453, + "grad_norm": 2.1878989180883357, + "learning_rate": 8.129594685696852e-08, + "loss": 0.6817516088485718, + "step": 5458 + }, + { + "epoch": 1.9237004405286342, + "grad_norm": 1.7544221338170298, + "learning_rate": 8.055620928634433e-08, + "loss": 0.5748617649078369, + "step": 5459 + }, + { + "epoch": 1.9240528634361234, + "grad_norm": 1.9928156109239001, + "learning_rate": 7.981983905818281e-08, + "loss": 0.6730939149856567, + "step": 5460 + }, + { + "epoch": 1.9244052863436123, + "grad_norm": 1.665760800669473, + "learning_rate": 7.90868364224584e-08, + "loss": 0.46469685435295105, + "step": 5461 + }, + { + "epoch": 1.9247577092511015, + "grad_norm": 2.0844638903136907, + "learning_rate": 7.835720162800209e-08, + "loss": 0.5633926391601562, + "step": 5462 + }, + { + "epoch": 1.9251101321585904, + "grad_norm": 2.034693536740542, + "learning_rate": 7.76309349225035e-08, + "loss": 0.5813394784927368, + "step": 5463 + }, + { + "epoch": 1.9254625550660793, + "grad_norm": 1.4118750743542163, + "learning_rate": 7.690803655250656e-08, + "loss": 0.39959418773651123, + "step": 5464 + }, + { + "epoch": 1.9258149779735683, + "grad_norm": 1.7685280750016403, + "learning_rate": 7.618850676341383e-08, + "loss": 0.6136372089385986, + "step": 5465 + }, + { + "epoch": 1.9261674008810572, + "grad_norm": 1.7393751984149959, + "learning_rate": 7.547234579948104e-08, + "loss": 0.6664354801177979, + "step": 5466 + }, + { + "epoch": 1.9265198237885461, + "grad_norm": 1.8827898065352628, + "learning_rate": 7.475955390382483e-08, + "loss": 0.6009566783905029, + "step": 5467 + }, + { + "epoch": 1.9268722466960353, + "grad_norm": 1.7872694267120686, + "learning_rate": 7.405013131841499e-08, + "loss": 0.7307299375534058, + "step": 5468 + }, + { + "epoch": 1.9272246696035242, + "grad_norm": 1.8234703336391604, + "learning_rate": 7.334407828407885e-08, + "loss": 0.5459531545639038, + "step": 5469 + }, + { + "epoch": 1.9275770925110132, + "grad_norm": 2.1252744976115583, + "learning_rate": 7.264139504049916e-08, + "loss": 0.6230820417404175, + "step": 5470 + }, + { + "epoch": 1.9279295154185023, + "grad_norm": 1.6781926619362313, + "learning_rate": 7.194208182621509e-08, + "loss": 0.5282379984855652, + "step": 5471 + }, + { + "epoch": 1.9282819383259913, + "grad_norm": 2.1980396503246604, + "learning_rate": 7.12461388786212e-08, + "loss": 0.626023530960083, + "step": 5472 + }, + { + "epoch": 1.9286343612334802, + "grad_norm": 2.1608211937841197, + "learning_rate": 7.055356643396849e-08, + "loss": 0.6897492408752441, + "step": 5473 + }, + { + "epoch": 1.9289867841409691, + "grad_norm": 1.7214187213722456, + "learning_rate": 6.986436472736447e-08, + "loss": 0.583849310874939, + "step": 5474 + }, + { + "epoch": 1.929339207048458, + "grad_norm": 1.7492909983006562, + "learning_rate": 6.917853399277197e-08, + "loss": 0.6056735515594482, + "step": 5475 + }, + { + "epoch": 1.929691629955947, + "grad_norm": 1.8166317563571888, + "learning_rate": 6.849607446300699e-08, + "loss": 0.52838134765625, + "step": 5476 + }, + { + "epoch": 1.9300440528634362, + "grad_norm": 2.0425025849187954, + "learning_rate": 6.781698636974532e-08, + "loss": 0.6466653943061829, + "step": 5477 + }, + { + "epoch": 1.930396475770925, + "grad_norm": 1.9593462888477349, + "learning_rate": 6.714126994351589e-08, + "loss": 0.6570286750793457, + "step": 5478 + }, + { + "epoch": 1.9307488986784143, + "grad_norm": 2.4867358577799576, + "learning_rate": 6.646892541370409e-08, + "loss": 0.7303042411804199, + "step": 5479 + }, + { + "epoch": 1.9311013215859032, + "grad_norm": 1.7938376915708092, + "learning_rate": 6.579995300854846e-08, + "loss": 0.5556488037109375, + "step": 5480 + }, + { + "epoch": 1.9314537444933921, + "grad_norm": 1.9624740523274589, + "learning_rate": 6.513435295514404e-08, + "loss": 0.6673456430435181, + "step": 5481 + }, + { + "epoch": 1.931806167400881, + "grad_norm": 1.9681067241776358, + "learning_rate": 6.447212547944448e-08, + "loss": 0.5605199337005615, + "step": 5482 + }, + { + "epoch": 1.93215859030837, + "grad_norm": 2.1935053480556785, + "learning_rate": 6.381327080625111e-08, + "loss": 0.5455278158187866, + "step": 5483 + }, + { + "epoch": 1.932511013215859, + "grad_norm": 1.8919678372461928, + "learning_rate": 6.315778915922722e-08, + "loss": 0.5371166467666626, + "step": 5484 + }, + { + "epoch": 1.9328634361233479, + "grad_norm": 1.9114985069981878, + "learning_rate": 6.250568076088814e-08, + "loss": 0.5873486399650574, + "step": 5485 + }, + { + "epoch": 1.933215859030837, + "grad_norm": 1.706006640351556, + "learning_rate": 6.18569458326046e-08, + "loss": 0.4187420606613159, + "step": 5486 + }, + { + "epoch": 1.933568281938326, + "grad_norm": 1.900919435061996, + "learning_rate": 6.121158459460042e-08, + "loss": 0.6006373167037964, + "step": 5487 + }, + { + "epoch": 1.9339207048458151, + "grad_norm": 1.819026585986156, + "learning_rate": 6.056959726595702e-08, + "loss": 0.6022043228149414, + "step": 5488 + }, + { + "epoch": 1.934273127753304, + "grad_norm": 2.037720704211898, + "learning_rate": 5.993098406460895e-08, + "loss": 0.6324778199195862, + "step": 5489 + }, + { + "epoch": 1.934625550660793, + "grad_norm": 2.0263189254585026, + "learning_rate": 5.929574520734505e-08, + "loss": 0.545529305934906, + "step": 5490 + }, + { + "epoch": 1.934977973568282, + "grad_norm": 1.9957592171950855, + "learning_rate": 5.8663880909809454e-08, + "loss": 0.623627781867981, + "step": 5491 + }, + { + "epoch": 1.9353303964757709, + "grad_norm": 1.9773130682504432, + "learning_rate": 5.80353913865006e-08, + "loss": 0.529983639717102, + "step": 5492 + }, + { + "epoch": 1.9356828193832598, + "grad_norm": 1.8301905692374867, + "learning_rate": 5.7410276850770055e-08, + "loss": 0.638504147529602, + "step": 5493 + }, + { + "epoch": 1.9360352422907487, + "grad_norm": 1.7706026455559263, + "learning_rate": 5.678853751482694e-08, + "loss": 0.6822696924209595, + "step": 5494 + }, + { + "epoch": 1.936387665198238, + "grad_norm": 1.6924491917110376, + "learning_rate": 5.6170173589730204e-08, + "loss": 0.5454750061035156, + "step": 5495 + }, + { + "epoch": 1.9367400881057268, + "grad_norm": 2.1428203564618915, + "learning_rate": 5.555518528539638e-08, + "loss": 0.5301260948181152, + "step": 5496 + }, + { + "epoch": 1.937092511013216, + "grad_norm": 1.965552985899495, + "learning_rate": 5.4943572810594035e-08, + "loss": 0.697251558303833, + "step": 5497 + }, + { + "epoch": 1.937444933920705, + "grad_norm": 1.8589631146352448, + "learning_rate": 5.433533637294819e-08, + "loss": 0.5171586871147156, + "step": 5498 + }, + { + "epoch": 1.9377973568281939, + "grad_norm": 1.974708525019113, + "learning_rate": 5.373047617893479e-08, + "loss": 0.6006083488464355, + "step": 5499 + }, + { + "epoch": 1.9381497797356828, + "grad_norm": 1.8914658578007237, + "learning_rate": 5.312899243388403e-08, + "loss": 0.6083849668502808, + "step": 5500 + }, + { + "epoch": 1.9385022026431717, + "grad_norm": 2.189863186886587, + "learning_rate": 5.2530885341982586e-08, + "loss": 0.6572569608688354, + "step": 5501 + }, + { + "epoch": 1.9388546255506607, + "grad_norm": 1.9316409138269541, + "learning_rate": 5.1936155106269146e-08, + "loss": 0.497112512588501, + "step": 5502 + }, + { + "epoch": 1.9392070484581496, + "grad_norm": 1.9380736027791932, + "learning_rate": 5.1344801928636664e-08, + "loss": 0.5804885625839233, + "step": 5503 + }, + { + "epoch": 1.9395594713656388, + "grad_norm": 2.415405306864913, + "learning_rate": 5.075682600982901e-08, + "loss": 0.6225712299346924, + "step": 5504 + }, + { + "epoch": 1.9399118942731277, + "grad_norm": 1.896345547525062, + "learning_rate": 5.017222754944651e-08, + "loss": 0.6100028157234192, + "step": 5505 + }, + { + "epoch": 1.9402643171806169, + "grad_norm": 1.47523556471349, + "learning_rate": 4.959100674594486e-08, + "loss": 0.549712061882019, + "step": 5506 + }, + { + "epoch": 1.9406167400881058, + "grad_norm": 1.4736978929928604, + "learning_rate": 4.901316379662624e-08, + "loss": 0.5327162146568298, + "step": 5507 + }, + { + "epoch": 1.9409691629955947, + "grad_norm": 2.3670974688739697, + "learning_rate": 4.8438698897652626e-08, + "loss": 0.7408417463302612, + "step": 5508 + }, + { + "epoch": 1.9413215859030837, + "grad_norm": 1.8644826998816841, + "learning_rate": 4.7867612244036906e-08, + "loss": 0.6126288175582886, + "step": 5509 + }, + { + "epoch": 1.9416740088105726, + "grad_norm": 1.9600730866036664, + "learning_rate": 4.729990402964402e-08, + "loss": 0.542537271976471, + "step": 5510 + }, + { + "epoch": 1.9420264317180616, + "grad_norm": 1.9121979922913575, + "learning_rate": 4.6735574447195345e-08, + "loss": 0.5429843664169312, + "step": 5511 + }, + { + "epoch": 1.9423788546255507, + "grad_norm": 1.8002113296979507, + "learning_rate": 4.617462368826098e-08, + "loss": 0.6103960275650024, + "step": 5512 + }, + { + "epoch": 1.9427312775330396, + "grad_norm": 1.7389238607151303, + "learning_rate": 4.561705194326749e-08, + "loss": 0.43702462315559387, + "step": 5513 + }, + { + "epoch": 1.9430837004405286, + "grad_norm": 1.7641081174281446, + "learning_rate": 4.506285940149457e-08, + "loss": 0.5313314199447632, + "step": 5514 + }, + { + "epoch": 1.9434361233480177, + "grad_norm": 1.7069377243686814, + "learning_rate": 4.451204625107064e-08, + "loss": 0.568792462348938, + "step": 5515 + }, + { + "epoch": 1.9437885462555067, + "grad_norm": 2.1007223606906185, + "learning_rate": 4.3964612678979446e-08, + "loss": 0.6055475473403931, + "step": 5516 + }, + { + "epoch": 1.9441409691629956, + "grad_norm": 1.9436769148628141, + "learning_rate": 4.3420558871060116e-08, + "loss": 0.6203786730766296, + "step": 5517 + }, + { + "epoch": 1.9444933920704845, + "grad_norm": 1.788437156743959, + "learning_rate": 4.287988501200047e-08, + "loss": 0.5914345979690552, + "step": 5518 + }, + { + "epoch": 1.9448458149779735, + "grad_norm": 1.8745063002086186, + "learning_rate": 4.2342591285343684e-08, + "loss": 0.5650739669799805, + "step": 5519 + }, + { + "epoch": 1.9451982378854624, + "grad_norm": 1.4561818985326163, + "learning_rate": 4.180867787348164e-08, + "loss": 0.5589660406112671, + "step": 5520 + }, + { + "epoch": 1.9455506607929516, + "grad_norm": 1.9465775114906616, + "learning_rate": 4.12781449576638e-08, + "loss": 0.5683336853981018, + "step": 5521 + }, + { + "epoch": 1.9459030837004405, + "grad_norm": 1.7869041316521455, + "learning_rate": 4.075099271798943e-08, + "loss": 0.5388365983963013, + "step": 5522 + }, + { + "epoch": 1.9462555066079297, + "grad_norm": 2.3465100615160757, + "learning_rate": 4.0227221333408726e-08, + "loss": 0.575006365776062, + "step": 5523 + }, + { + "epoch": 1.9466079295154186, + "grad_norm": 1.6872132733494793, + "learning_rate": 3.970683098172723e-08, + "loss": 0.49638503789901733, + "step": 5524 + }, + { + "epoch": 1.9469603524229075, + "grad_norm": 2.095719754969683, + "learning_rate": 3.9189821839600294e-08, + "loss": 0.6484041213989258, + "step": 5525 + }, + { + "epoch": 1.9473127753303965, + "grad_norm": 1.7587272240429226, + "learning_rate": 3.8676194082537535e-08, + "loss": 0.5522493124008179, + "step": 5526 + }, + { + "epoch": 1.9476651982378854, + "grad_norm": 1.8834504959770908, + "learning_rate": 3.8165947884898356e-08, + "loss": 0.5875294208526611, + "step": 5527 + }, + { + "epoch": 1.9480176211453744, + "grad_norm": 1.8990167388470667, + "learning_rate": 3.765908341989644e-08, + "loss": 0.5725122690200806, + "step": 5528 + }, + { + "epoch": 1.9483700440528633, + "grad_norm": 1.7744908913216453, + "learning_rate": 3.7155600859595243e-08, + "loss": 0.5198935866355896, + "step": 5529 + }, + { + "epoch": 1.9487224669603525, + "grad_norm": 1.8236927705658619, + "learning_rate": 3.665550037491361e-08, + "loss": 0.6396631598472595, + "step": 5530 + }, + { + "epoch": 1.9490748898678414, + "grad_norm": 1.8879612013695581, + "learning_rate": 3.6158782135617965e-08, + "loss": 0.666089653968811, + "step": 5531 + }, + { + "epoch": 1.9494273127753305, + "grad_norm": 1.9912413735248546, + "learning_rate": 3.5665446310330087e-08, + "loss": 0.6818836331367493, + "step": 5532 + }, + { + "epoch": 1.9497797356828195, + "grad_norm": 2.04266783813749, + "learning_rate": 3.517549306652157e-08, + "loss": 0.533860981464386, + "step": 5533 + }, + { + "epoch": 1.9501321585903084, + "grad_norm": 2.011493253926506, + "learning_rate": 3.468892257051493e-08, + "loss": 0.6174973249435425, + "step": 5534 + }, + { + "epoch": 1.9504845814977974, + "grad_norm": 2.07102768257305, + "learning_rate": 3.4205734987488027e-08, + "loss": 0.6010403037071228, + "step": 5535 + }, + { + "epoch": 1.9508370044052863, + "grad_norm": 1.8654722728182422, + "learning_rate": 3.372593048146744e-08, + "loss": 0.6475502252578735, + "step": 5536 + }, + { + "epoch": 1.9511894273127752, + "grad_norm": 2.080853183455891, + "learning_rate": 3.3249509215330653e-08, + "loss": 0.5625165700912476, + "step": 5537 + }, + { + "epoch": 1.9515418502202642, + "grad_norm": 2.0303262611818336, + "learning_rate": 3.277647135080941e-08, + "loss": 0.6504719257354736, + "step": 5538 + }, + { + "epoch": 1.9518942731277533, + "grad_norm": 1.7964243534988884, + "learning_rate": 3.230681704848415e-08, + "loss": 0.6217454671859741, + "step": 5539 + }, + { + "epoch": 1.9522466960352423, + "grad_norm": 1.975881803401868, + "learning_rate": 3.1840546467788445e-08, + "loss": 0.5804678201675415, + "step": 5540 + }, + { + "epoch": 1.9525991189427314, + "grad_norm": 1.7644690968017507, + "learning_rate": 3.1377659767006795e-08, + "loss": 0.6133759617805481, + "step": 5541 + }, + { + "epoch": 1.9529515418502204, + "grad_norm": 1.736020484111057, + "learning_rate": 3.0918157103273506e-08, + "loss": 0.508539080619812, + "step": 5542 + }, + { + "epoch": 1.9533039647577093, + "grad_norm": 2.115379893074018, + "learning_rate": 3.0462038632577126e-08, + "loss": 0.5682996511459351, + "step": 5543 + }, + { + "epoch": 1.9536563876651982, + "grad_norm": 2.0360556708735276, + "learning_rate": 3.000930450975603e-08, + "loss": 0.7072808742523193, + "step": 5544 + }, + { + "epoch": 1.9540088105726872, + "grad_norm": 2.092981328238059, + "learning_rate": 2.9559954888497278e-08, + "loss": 0.5948976278305054, + "step": 5545 + }, + { + "epoch": 1.954361233480176, + "grad_norm": 1.827038503098094, + "learning_rate": 2.911398992134218e-08, + "loss": 0.5111032128334045, + "step": 5546 + }, + { + "epoch": 1.954713656387665, + "grad_norm": 1.8278152391313893, + "learning_rate": 2.8671409759681858e-08, + "loss": 0.553802490234375, + "step": 5547 + }, + { + "epoch": 1.9550660792951542, + "grad_norm": 1.685843539181356, + "learning_rate": 2.8232214553759462e-08, + "loss": 0.5091711282730103, + "step": 5548 + }, + { + "epoch": 1.9554185022026431, + "grad_norm": 1.4871983076237012, + "learning_rate": 2.7796404452666847e-08, + "loss": 0.47025251388549805, + "step": 5549 + }, + { + "epoch": 1.9557709251101323, + "grad_norm": 2.031516899140332, + "learning_rate": 2.7363979604349e-08, + "loss": 0.6174348592758179, + "step": 5550 + }, + { + "epoch": 1.9561233480176212, + "grad_norm": 1.8902471541583934, + "learning_rate": 2.69349401555985e-08, + "loss": 0.5516685247421265, + "step": 5551 + }, + { + "epoch": 1.9564757709251102, + "grad_norm": 2.1329834880360563, + "learning_rate": 2.6509286252063282e-08, + "loss": 0.6272131204605103, + "step": 5552 + }, + { + "epoch": 1.956828193832599, + "grad_norm": 1.796045915873636, + "learning_rate": 2.6087018038239987e-08, + "loss": 0.5913189649581909, + "step": 5553 + }, + { + "epoch": 1.957180616740088, + "grad_norm": 1.8863252927172953, + "learning_rate": 2.5668135657472835e-08, + "loss": 0.6802668571472168, + "step": 5554 + }, + { + "epoch": 1.957533039647577, + "grad_norm": 1.9442650959080303, + "learning_rate": 2.525263925196142e-08, + "loss": 0.5829865336418152, + "step": 5555 + }, + { + "epoch": 1.9578854625550661, + "grad_norm": 2.0474932427098627, + "learning_rate": 2.4840528962752907e-08, + "loss": 0.6400870680809021, + "step": 5556 + }, + { + "epoch": 1.958237885462555, + "grad_norm": 2.043080792800152, + "learning_rate": 2.4431804929746506e-08, + "loss": 0.48432302474975586, + "step": 5557 + }, + { + "epoch": 1.958590308370044, + "grad_norm": 1.9639599818265998, + "learning_rate": 2.4026467291691223e-08, + "loss": 0.5494402647018433, + "step": 5558 + }, + { + "epoch": 1.9589427312775332, + "grad_norm": 1.800709765694371, + "learning_rate": 2.3624516186186996e-08, + "loss": 0.5393223762512207, + "step": 5559 + }, + { + "epoch": 1.959295154185022, + "grad_norm": 1.868096905678952, + "learning_rate": 2.322595174968245e-08, + "loss": 0.5500867962837219, + "step": 5560 + }, + { + "epoch": 1.959647577092511, + "grad_norm": 1.986290631971783, + "learning_rate": 2.283077411747825e-08, + "loss": 0.5618818998336792, + "step": 5561 + }, + { + "epoch": 1.96, + "grad_norm": 2.0174876429391526, + "learning_rate": 2.243898342372597e-08, + "loss": 0.5681769251823425, + "step": 5562 + }, + { + "epoch": 1.960352422907489, + "grad_norm": 2.160298007931608, + "learning_rate": 2.2050579801424777e-08, + "loss": 0.8009706139564514, + "step": 5563 + }, + { + "epoch": 1.9607048458149778, + "grad_norm": 2.2076681264311517, + "learning_rate": 2.1665563382426978e-08, + "loss": 0.5609455704689026, + "step": 5564 + }, + { + "epoch": 1.961057268722467, + "grad_norm": 1.6584397285315808, + "learning_rate": 2.1283934297432472e-08, + "loss": 0.5615163445472717, + "step": 5565 + }, + { + "epoch": 1.961409691629956, + "grad_norm": 2.4819954064616265, + "learning_rate": 2.0905692675993182e-08, + "loss": 0.4442581832408905, + "step": 5566 + }, + { + "epoch": 1.961762114537445, + "grad_norm": 2.0037139303731344, + "learning_rate": 2.0530838646510842e-08, + "loss": 0.6557266116142273, + "step": 5567 + }, + { + "epoch": 1.962114537444934, + "grad_norm": 1.851215643338071, + "learning_rate": 2.0159372336235884e-08, + "loss": 0.5911799669265747, + "step": 5568 + }, + { + "epoch": 1.962466960352423, + "grad_norm": 2.0920087166052057, + "learning_rate": 1.9791293871269656e-08, + "loss": 0.5480202436447144, + "step": 5569 + }, + { + "epoch": 1.962819383259912, + "grad_norm": 2.0350633249337795, + "learning_rate": 1.9426603376563325e-08, + "loss": 0.6489467620849609, + "step": 5570 + }, + { + "epoch": 1.9631718061674008, + "grad_norm": 1.8480180634522771, + "learning_rate": 1.9065300975917856e-08, + "loss": 0.4699944853782654, + "step": 5571 + }, + { + "epoch": 1.9635242290748898, + "grad_norm": 1.8923901172350763, + "learning_rate": 1.8707386791985137e-08, + "loss": 0.6684885025024414, + "step": 5572 + }, + { + "epoch": 1.9638766519823787, + "grad_norm": 2.2169126358939413, + "learning_rate": 1.835286094626576e-08, + "loss": 0.5847122073173523, + "step": 5573 + }, + { + "epoch": 1.9642290748898679, + "grad_norm": 1.801041360244202, + "learning_rate": 1.8001723559109007e-08, + "loss": 0.5427859425544739, + "step": 5574 + }, + { + "epoch": 1.9645814977973568, + "grad_norm": 2.032431019918, + "learning_rate": 1.7653974749715087e-08, + "loss": 0.6545590758323669, + "step": 5575 + }, + { + "epoch": 1.964933920704846, + "grad_norm": 1.785624619961358, + "learning_rate": 1.730961463613512e-08, + "loss": 0.6369475722312927, + "step": 5576 + }, + { + "epoch": 1.965286343612335, + "grad_norm": 1.989892215094852, + "learning_rate": 1.696864333526893e-08, + "loss": 0.5165325403213501, + "step": 5577 + }, + { + "epoch": 1.9656387665198238, + "grad_norm": 2.147184198038496, + "learning_rate": 1.6631060962863933e-08, + "loss": 0.5651812553405762, + "step": 5578 + }, + { + "epoch": 1.9659911894273128, + "grad_norm": 1.6839108762220567, + "learning_rate": 1.6296867633519563e-08, + "loss": 0.5249905586242676, + "step": 5579 + }, + { + "epoch": 1.9663436123348017, + "grad_norm": 1.8723453129570697, + "learning_rate": 1.5966063460683967e-08, + "loss": 0.6748663783073425, + "step": 5580 + }, + { + "epoch": 1.9666960352422906, + "grad_norm": 1.654472064493344, + "learning_rate": 1.5638648556656198e-08, + "loss": 0.5276468992233276, + "step": 5581 + }, + { + "epoch": 1.9670484581497796, + "grad_norm": 1.7910399914217132, + "learning_rate": 1.5314623032581798e-08, + "loss": 0.5778729319572449, + "step": 5582 + }, + { + "epoch": 1.9674008810572687, + "grad_norm": 1.8564203677999862, + "learning_rate": 1.4993986998457223e-08, + "loss": 0.5805479288101196, + "step": 5583 + }, + { + "epoch": 1.9677533039647577, + "grad_norm": 1.9817945876697571, + "learning_rate": 1.4676740563129843e-08, + "loss": 0.6213263273239136, + "step": 5584 + }, + { + "epoch": 1.9681057268722468, + "grad_norm": 1.8037978918771924, + "learning_rate": 1.4362883834294627e-08, + "loss": 0.5081031322479248, + "step": 5585 + }, + { + "epoch": 1.9684581497797358, + "grad_norm": 1.71465121106617, + "learning_rate": 1.4052416918495237e-08, + "loss": 0.5605350136756897, + "step": 5586 + }, + { + "epoch": 1.9688105726872247, + "grad_norm": 2.1182297496689877, + "learning_rate": 1.3745339921126255e-08, + "loss": 0.701635479927063, + "step": 5587 + }, + { + "epoch": 1.9691629955947136, + "grad_norm": 4.768978361346767, + "learning_rate": 1.344165294642985e-08, + "loss": 0.5537668466567993, + "step": 5588 + }, + { + "epoch": 1.9695154185022026, + "grad_norm": 1.9636754875619487, + "learning_rate": 1.3141356097500225e-08, + "loss": 0.6395033597946167, + "step": 5589 + }, + { + "epoch": 1.9698678414096915, + "grad_norm": 2.0129419054377355, + "learning_rate": 1.2844449476276943e-08, + "loss": 0.549985408782959, + "step": 5590 + }, + { + "epoch": 1.9702202643171807, + "grad_norm": 1.5684457658919975, + "learning_rate": 1.2550933183550496e-08, + "loss": 0.4503220021724701, + "step": 5591 + }, + { + "epoch": 1.9705726872246696, + "grad_norm": 1.842567825609057, + "learning_rate": 1.2260807318962286e-08, + "loss": 0.6369946599006653, + "step": 5592 + }, + { + "epoch": 1.9709251101321585, + "grad_norm": 1.9389149649481725, + "learning_rate": 1.197407198099909e-08, + "loss": 0.547295093536377, + "step": 5593 + }, + { + "epoch": 1.9712775330396477, + "grad_norm": 1.9379429852476115, + "learning_rate": 1.1690727267000823e-08, + "loss": 0.578770101070404, + "step": 5594 + }, + { + "epoch": 1.9716299559471366, + "grad_norm": 1.7979041690440398, + "learning_rate": 1.1410773273151654e-08, + "loss": 0.5992920398712158, + "step": 5595 + }, + { + "epoch": 1.9719823788546256, + "grad_norm": 2.0358089708846503, + "learning_rate": 1.1134210094488896e-08, + "loss": 0.5912446975708008, + "step": 5596 + }, + { + "epoch": 1.9723348017621145, + "grad_norm": 1.9956728807231137, + "learning_rate": 1.0861037824896337e-08, + "loss": 0.6539223194122314, + "step": 5597 + }, + { + "epoch": 1.9726872246696034, + "grad_norm": 1.6995757910859364, + "learning_rate": 1.0591256557108686e-08, + "loss": 0.6487923860549927, + "step": 5598 + }, + { + "epoch": 1.9730396475770924, + "grad_norm": 2.0265831695223384, + "learning_rate": 1.0324866382707133e-08, + "loss": 0.7950254678726196, + "step": 5599 + }, + { + "epoch": 1.9733920704845815, + "grad_norm": 1.7028165277673737, + "learning_rate": 1.006186739212267e-08, + "loss": 0.4941173195838928, + "step": 5600 + } + ], + "logging_steps": 1, + "max_steps": 5676, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1731731137609728.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-5600/training_args.bin b/checkpoint-5600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3d16cae9f8126645d9b722fd466525457b2f8a90 --- /dev/null +++ b/checkpoint-5600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db5e3f64fea9062d775ce1214f2b31fbf79ffdfb10af7998752ce02faa3d3dd5 +size 6968 diff --git a/checkpoint-5600/zero_to_fp32.py b/checkpoint-5600/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/checkpoint-5600/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-5676/README.md b/checkpoint-5676/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4005c4d8e7a819833408da4794e4e74d2ced6553 --- /dev/null +++ b/checkpoint-5676/README.md @@ -0,0 +1,208 @@ +--- +base_model: Qwen/Qwen2.5-VL-7B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-5676/adapter_config.json b/checkpoint-5676/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8224bce2b962b82e55c954c000a28629995b1870 --- /dev/null +++ b/checkpoint-5676/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.8.mlp.gate_proj", + "layers.20.mlp.up_proj", + "layers.25.mlp.down_proj", + "layers.21.mlp.down_proj", + "layers.22.mlp.gate_proj", + "layers.23.mlp.gate_proj", + "layers.11.mlp.gate_proj", + "layers.13.mlp.down_proj", + "layers.8.mlp.down_proj", + "layers.18.mlp.gate_proj", + "layers.7.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.20.mlp.gate_proj", + "layers.16.mlp.down_proj", + "layers.23.mlp.down_proj", + "layers.12.mlp.gate_proj", + "layers.12.mlp.down_proj", + "layers.1.mlp.up_proj", + "layers.2.mlp.gate_proj", + "layers.15.mlp.down_proj", + "layers.25.mlp.up_proj", + "layers.27.mlp.down_proj", + "layers.19.mlp.down_proj", + "layers.14.mlp.gate_proj", + "layers.27.mlp.gate_proj", + "layers.18.mlp.up_proj", + "layers.2.mlp.up_proj", + "layers.0.mlp.gate_proj", + "q_proj", + "layers.10.mlp.down_proj", + "layers.11.mlp.up_proj", + "layers.15.mlp.up_proj", + "layers.7.mlp.up_proj", + "layers.8.mlp.up_proj", + "layers.3.mlp.up_proj", + "layers.10.mlp.up_proj", + "layers.23.mlp.up_proj", + "layers.19.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.0.mlp.up_proj", + "layers.12.mlp.up_proj", + "layers.13.mlp.up_proj", + "o_proj", + "layers.5.mlp.down_proj", + "layers.14.mlp.down_proj", + "layers.2.mlp.down_proj", + "layers.6.mlp.down_proj", + "layers.6.mlp.gate_proj", + "layers.25.mlp.gate_proj", + "layers.3.mlp.down_proj", + "k_proj", + "layers.17.mlp.gate_proj", + "layers.24.mlp.gate_proj", + "layers.1.mlp.gate_proj", + "layers.4.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.0.mlp.down_proj", + "layers.9.mlp.gate_proj", + "layers.5.mlp.gate_proj", + "layers.24.mlp.up_proj", + "layers.20.mlp.down_proj", + "layers.16.mlp.gate_proj", + "layers.26.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.22.mlp.down_proj", + "layers.16.mlp.up_proj", + "v_proj", + "layers.27.mlp.up_proj", + "layers.13.mlp.gate_proj", + "layers.21.mlp.up_proj", + "layers.4.mlp.gate_proj", + "layers.9.mlp.up_proj", + "layers.3.mlp.gate_proj", + "layers.7.mlp.gate_proj", + "layers.4.mlp.up_proj", + "layers.24.mlp.down_proj", + "layers.26.mlp.gate_proj", + "layers.22.mlp.up_proj", + "layers.11.mlp.down_proj", + "layers.10.mlp.gate_proj", + "layers.5.mlp.up_proj", + "layers.21.mlp.gate_proj", + "layers.6.mlp.up_proj", + "layers.19.mlp.up_proj", + "layers.14.mlp.up_proj", + "layers.26.mlp.down_proj", + "layers.17.mlp.up_proj", + "layers.15.mlp.gate_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-5676/adapter_model.safetensors b/checkpoint-5676/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a21515a12bad5a118247ac5cba120ba0c896fe7c --- /dev/null +++ b/checkpoint-5676/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96d3fb56e6df5a684a21c26f774fc41f3263352eede9bf1cdb9bf42fec9c1296 +size 323020440 diff --git a/checkpoint-5676/chat_template.jinja b/checkpoint-5676/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/checkpoint-5676/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-5676/global_step5676/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-5676/global_step5676/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e9a85a10780235072e99dd2a390f5e11cb30ca9 --- /dev/null +++ b/checkpoint-5676/global_step5676/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0b8040d0d148d6bdbde5e07fe56072e3bc950a790cea6bf5d216c56e779eda6 +size 1937772272 diff --git a/checkpoint-5676/global_step5676/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-5676/global_step5676/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e14d355756c46c29a2df37a7707afee162be264a --- /dev/null +++ b/checkpoint-5676/global_step5676/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9759ca7147074e2933d5cb05b5ce55c376f6c97f9a44c902783f939d2f58abe +size 460630 diff --git a/checkpoint-5676/latest b/checkpoint-5676/latest new file mode 100644 index 0000000000000000000000000000000000000000..b2eef6176150b9917c1011d6dd006e2da5fcb32a --- /dev/null +++ b/checkpoint-5676/latest @@ -0,0 +1 @@ +global_step5676 \ No newline at end of file diff --git a/checkpoint-5676/processor_config.json b/checkpoint-5676/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/checkpoint-5676/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/checkpoint-5676/rng_state.pth b/checkpoint-5676/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..283631cdbb978622ff2dcd38ca063a7c08b57e9a --- /dev/null +++ b/checkpoint-5676/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed3d581078810c5e8741dd4e1ca81c13a3ba12206a02f3e748783d836dc07c42 +size 14244 diff --git a/checkpoint-5676/scheduler.pt b/checkpoint-5676/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cef8f8eac5ebd9ad6b1c1cefd2673989c0bdd404 --- /dev/null +++ b/checkpoint-5676/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8219226d17ebb94ea6bc757ce75ca07943241606db0e06c3ed46d991c02b8a7 +size 1000 diff --git a/checkpoint-5676/tokenizer.json b/checkpoint-5676/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/checkpoint-5676/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/checkpoint-5676/tokenizer_config.json b/checkpoint-5676/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/checkpoint-5676/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-5676/trainer_state.json b/checkpoint-5676/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..46be8fadeed979184197053f1fe84e6e8b2263c8 --- /dev/null +++ b/checkpoint-5676/trainer_state.json @@ -0,0 +1,39766 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 5676, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0003524229074889868, + "grad_norm": 1.6512674233185107, + "learning_rate": 0.0, + "loss": 1.493973731994629, + "step": 1 + }, + { + "epoch": 0.0007048458149779736, + "grad_norm": 1.4463228571593894, + "learning_rate": 7.042253521126761e-08, + "loss": 1.3692013025283813, + "step": 2 + }, + { + "epoch": 0.0010572687224669603, + "grad_norm": 1.4036766254408197, + "learning_rate": 1.4084507042253522e-07, + "loss": 1.3996260166168213, + "step": 3 + }, + { + "epoch": 0.0014096916299559472, + "grad_norm": 1.29446596506829, + "learning_rate": 2.1126760563380284e-07, + "loss": 1.3011515140533447, + "step": 4 + }, + { + "epoch": 0.001762114537444934, + "grad_norm": 1.5130555881795185, + "learning_rate": 2.8169014084507043e-07, + "loss": 1.3736083507537842, + "step": 5 + }, + { + "epoch": 0.0021145374449339205, + "grad_norm": 1.247517750517551, + "learning_rate": 3.521126760563381e-07, + "loss": 1.051241159439087, + "step": 6 + }, + { + "epoch": 0.0024669603524229075, + "grad_norm": 1.611437944890658, + "learning_rate": 4.225352112676057e-07, + "loss": 1.2594621181488037, + "step": 7 + }, + { + "epoch": 0.0028193832599118945, + "grad_norm": 1.4604380967241444, + "learning_rate": 4.929577464788733e-07, + "loss": 1.0498416423797607, + "step": 8 + }, + { + "epoch": 0.003171806167400881, + "grad_norm": 1.367174801368101, + "learning_rate": 5.633802816901409e-07, + "loss": 1.3313459157943726, + "step": 9 + }, + { + "epoch": 0.003524229074889868, + "grad_norm": 1.4378623823320218, + "learning_rate": 6.338028169014085e-07, + "loss": 1.2484922409057617, + "step": 10 + }, + { + "epoch": 0.0038766519823788545, + "grad_norm": 1.197911167360161, + "learning_rate": 7.042253521126762e-07, + "loss": 1.097194790840149, + "step": 11 + }, + { + "epoch": 0.004229074889867841, + "grad_norm": 1.3767897701080816, + "learning_rate": 7.746478873239437e-07, + "loss": 1.3065136671066284, + "step": 12 + }, + { + "epoch": 0.0045814977973568285, + "grad_norm": 1.2501177622273731, + "learning_rate": 8.450704225352114e-07, + "loss": 1.1574026346206665, + "step": 13 + }, + { + "epoch": 0.004933920704845815, + "grad_norm": 1.3002699887597202, + "learning_rate": 9.154929577464789e-07, + "loss": 1.1509445905685425, + "step": 14 + }, + { + "epoch": 0.0052863436123348016, + "grad_norm": 1.3458236321153771, + "learning_rate": 9.859154929577465e-07, + "loss": 1.069403886795044, + "step": 15 + }, + { + "epoch": 0.005638766519823789, + "grad_norm": 1.52712721337833, + "learning_rate": 1.0563380281690142e-06, + "loss": 1.1731287240982056, + "step": 16 + }, + { + "epoch": 0.0059911894273127755, + "grad_norm": 1.5628075837505453, + "learning_rate": 1.1267605633802817e-06, + "loss": 0.9314254522323608, + "step": 17 + }, + { + "epoch": 0.006343612334801762, + "grad_norm": 1.3686084350519343, + "learning_rate": 1.1971830985915492e-06, + "loss": 1.2915008068084717, + "step": 18 + }, + { + "epoch": 0.006696035242290749, + "grad_norm": 1.2653916141417434, + "learning_rate": 1.267605633802817e-06, + "loss": 1.1088309288024902, + "step": 19 + }, + { + "epoch": 0.007048458149779736, + "grad_norm": 1.362753082153478, + "learning_rate": 1.3380281690140844e-06, + "loss": 1.21511709690094, + "step": 20 + }, + { + "epoch": 0.0074008810572687225, + "grad_norm": 1.3054604275805306, + "learning_rate": 1.4084507042253523e-06, + "loss": 1.241409420967102, + "step": 21 + }, + { + "epoch": 0.007753303964757709, + "grad_norm": 1.3646723208790772, + "learning_rate": 1.4788732394366198e-06, + "loss": 1.2170014381408691, + "step": 22 + }, + { + "epoch": 0.008105726872246696, + "grad_norm": 1.424586503093174, + "learning_rate": 1.5492957746478873e-06, + "loss": 1.1405870914459229, + "step": 23 + }, + { + "epoch": 0.008458149779735682, + "grad_norm": 1.429368633092772, + "learning_rate": 1.6197183098591552e-06, + "loss": 1.122542381286621, + "step": 24 + }, + { + "epoch": 0.00881057268722467, + "grad_norm": 1.2201478884239083, + "learning_rate": 1.6901408450704227e-06, + "loss": 1.1686937808990479, + "step": 25 + }, + { + "epoch": 0.009162995594713657, + "grad_norm": 1.4065678272985154, + "learning_rate": 1.7605633802816902e-06, + "loss": 1.215955376625061, + "step": 26 + }, + { + "epoch": 0.009515418502202643, + "grad_norm": 1.3879787249393913, + "learning_rate": 1.8309859154929579e-06, + "loss": 1.075179100036621, + "step": 27 + }, + { + "epoch": 0.00986784140969163, + "grad_norm": 1.2313632017619234, + "learning_rate": 1.9014084507042254e-06, + "loss": 1.198237419128418, + "step": 28 + }, + { + "epoch": 0.010220264317180617, + "grad_norm": 1.6833211669458825, + "learning_rate": 1.971830985915493e-06, + "loss": 1.2356700897216797, + "step": 29 + }, + { + "epoch": 0.010572687224669603, + "grad_norm": 1.3637967517131555, + "learning_rate": 2.0422535211267608e-06, + "loss": 1.2373592853546143, + "step": 30 + }, + { + "epoch": 0.01092511013215859, + "grad_norm": 1.377232613936239, + "learning_rate": 2.1126760563380285e-06, + "loss": 1.1857718229293823, + "step": 31 + }, + { + "epoch": 0.011277533039647578, + "grad_norm": 1.3566319214936433, + "learning_rate": 2.1830985915492958e-06, + "loss": 1.1844017505645752, + "step": 32 + }, + { + "epoch": 0.011629955947136564, + "grad_norm": 1.2486508447822717, + "learning_rate": 2.2535211267605635e-06, + "loss": 1.275226354598999, + "step": 33 + }, + { + "epoch": 0.011982378854625551, + "grad_norm": 1.3044888735575617, + "learning_rate": 2.323943661971831e-06, + "loss": 1.169473648071289, + "step": 34 + }, + { + "epoch": 0.012334801762114538, + "grad_norm": 1.2608655384056326, + "learning_rate": 2.3943661971830984e-06, + "loss": 1.2182841300964355, + "step": 35 + }, + { + "epoch": 0.012687224669603524, + "grad_norm": 1.3780698009940295, + "learning_rate": 2.4647887323943666e-06, + "loss": 1.2110469341278076, + "step": 36 + }, + { + "epoch": 0.01303964757709251, + "grad_norm": 1.3829042894220551, + "learning_rate": 2.535211267605634e-06, + "loss": 1.2886571884155273, + "step": 37 + }, + { + "epoch": 0.013392070484581497, + "grad_norm": 1.2954566526081723, + "learning_rate": 2.6056338028169015e-06, + "loss": 1.0740901231765747, + "step": 38 + }, + { + "epoch": 0.013744493392070485, + "grad_norm": 1.2079072281757672, + "learning_rate": 2.676056338028169e-06, + "loss": 1.0119279623031616, + "step": 39 + }, + { + "epoch": 0.014096916299559472, + "grad_norm": 1.1460333237155051, + "learning_rate": 2.746478873239437e-06, + "loss": 1.0752044916152954, + "step": 40 + }, + { + "epoch": 0.014449339207048459, + "grad_norm": 1.3690776364650978, + "learning_rate": 2.8169014084507046e-06, + "loss": 1.345343828201294, + "step": 41 + }, + { + "epoch": 0.014801762114537445, + "grad_norm": 1.0813865739605455, + "learning_rate": 2.887323943661972e-06, + "loss": 1.102332353591919, + "step": 42 + }, + { + "epoch": 0.015154185022026432, + "grad_norm": 1.1643083589428873, + "learning_rate": 2.9577464788732396e-06, + "loss": 1.006919264793396, + "step": 43 + }, + { + "epoch": 0.015506607929515418, + "grad_norm": 1.1582412568670832, + "learning_rate": 3.0281690140845073e-06, + "loss": 1.104026436805725, + "step": 44 + }, + { + "epoch": 0.015859030837004406, + "grad_norm": 1.3060563783851553, + "learning_rate": 3.0985915492957746e-06, + "loss": 1.299152135848999, + "step": 45 + }, + { + "epoch": 0.01621145374449339, + "grad_norm": 1.4304085919726754, + "learning_rate": 3.1690140845070427e-06, + "loss": 1.1075072288513184, + "step": 46 + }, + { + "epoch": 0.01656387665198238, + "grad_norm": 0.9865545367526579, + "learning_rate": 3.2394366197183104e-06, + "loss": 1.0296107530593872, + "step": 47 + }, + { + "epoch": 0.016916299559471364, + "grad_norm": 1.1960961939132708, + "learning_rate": 3.3098591549295777e-06, + "loss": 1.1097803115844727, + "step": 48 + }, + { + "epoch": 0.017268722466960353, + "grad_norm": 1.0974682037636356, + "learning_rate": 3.3802816901408454e-06, + "loss": 0.945678174495697, + "step": 49 + }, + { + "epoch": 0.01762114537444934, + "grad_norm": 0.9924343523024514, + "learning_rate": 3.4507042253521127e-06, + "loss": 1.075556993484497, + "step": 50 + }, + { + "epoch": 0.017973568281938326, + "grad_norm": 1.0849849170905757, + "learning_rate": 3.5211267605633804e-06, + "loss": 1.0790367126464844, + "step": 51 + }, + { + "epoch": 0.018325991189427314, + "grad_norm": 1.220415189867698, + "learning_rate": 3.5915492957746485e-06, + "loss": 1.2567799091339111, + "step": 52 + }, + { + "epoch": 0.0186784140969163, + "grad_norm": 1.1058732491316554, + "learning_rate": 3.6619718309859158e-06, + "loss": 1.1437780857086182, + "step": 53 + }, + { + "epoch": 0.019030837004405287, + "grad_norm": 1.0871981925234313, + "learning_rate": 3.7323943661971835e-06, + "loss": 1.0962307453155518, + "step": 54 + }, + { + "epoch": 0.019383259911894272, + "grad_norm": 0.9603250960542756, + "learning_rate": 3.8028169014084508e-06, + "loss": 1.0149122476577759, + "step": 55 + }, + { + "epoch": 0.01973568281938326, + "grad_norm": 0.9630324155849409, + "learning_rate": 3.873239436619718e-06, + "loss": 0.9029096364974976, + "step": 56 + }, + { + "epoch": 0.02008810572687225, + "grad_norm": 1.1449327271146603, + "learning_rate": 3.943661971830986e-06, + "loss": 1.1290819644927979, + "step": 57 + }, + { + "epoch": 0.020440528634361233, + "grad_norm": 1.1046082203063978, + "learning_rate": 4.014084507042254e-06, + "loss": 1.0965365171432495, + "step": 58 + }, + { + "epoch": 0.02079295154185022, + "grad_norm": 1.2553158733514387, + "learning_rate": 4.0845070422535216e-06, + "loss": 1.2854020595550537, + "step": 59 + }, + { + "epoch": 0.021145374449339206, + "grad_norm": 1.0484971235480365, + "learning_rate": 4.154929577464789e-06, + "loss": 1.0303996801376343, + "step": 60 + }, + { + "epoch": 0.021497797356828195, + "grad_norm": 0.9670460326314384, + "learning_rate": 4.225352112676057e-06, + "loss": 1.0811198949813843, + "step": 61 + }, + { + "epoch": 0.02185022026431718, + "grad_norm": 0.992548164971829, + "learning_rate": 4.295774647887324e-06, + "loss": 1.1373648643493652, + "step": 62 + }, + { + "epoch": 0.022202643171806168, + "grad_norm": 1.009220008285868, + "learning_rate": 4.3661971830985915e-06, + "loss": 0.8717563152313232, + "step": 63 + }, + { + "epoch": 0.022555066079295156, + "grad_norm": 0.9171432664885892, + "learning_rate": 4.43661971830986e-06, + "loss": 0.9939290881156921, + "step": 64 + }, + { + "epoch": 0.02290748898678414, + "grad_norm": 1.139248361968882, + "learning_rate": 4.507042253521127e-06, + "loss": 1.1776926517486572, + "step": 65 + }, + { + "epoch": 0.02325991189427313, + "grad_norm": 0.8971048282009709, + "learning_rate": 4.577464788732395e-06, + "loss": 0.9149726629257202, + "step": 66 + }, + { + "epoch": 0.023612334801762114, + "grad_norm": 0.9597323965843616, + "learning_rate": 4.647887323943662e-06, + "loss": 0.996609091758728, + "step": 67 + }, + { + "epoch": 0.023964757709251102, + "grad_norm": 1.0074979173506051, + "learning_rate": 4.71830985915493e-06, + "loss": 1.102593183517456, + "step": 68 + }, + { + "epoch": 0.024317180616740087, + "grad_norm": 0.8938780612317906, + "learning_rate": 4.788732394366197e-06, + "loss": 1.0912048816680908, + "step": 69 + }, + { + "epoch": 0.024669603524229075, + "grad_norm": 0.936561005612989, + "learning_rate": 4.859154929577465e-06, + "loss": 1.1192498207092285, + "step": 70 + }, + { + "epoch": 0.025022026431718063, + "grad_norm": 1.059387656590118, + "learning_rate": 4.929577464788733e-06, + "loss": 1.0358459949493408, + "step": 71 + }, + { + "epoch": 0.025374449339207048, + "grad_norm": 0.9588756664450253, + "learning_rate": 5e-06, + "loss": 1.076169490814209, + "step": 72 + }, + { + "epoch": 0.025726872246696036, + "grad_norm": 1.355361750045824, + "learning_rate": 5.070422535211268e-06, + "loss": 0.9906084537506104, + "step": 73 + }, + { + "epoch": 0.02607929515418502, + "grad_norm": 1.187443908189842, + "learning_rate": 5.140845070422536e-06, + "loss": 0.8163654804229736, + "step": 74 + }, + { + "epoch": 0.02643171806167401, + "grad_norm": 0.9764403954844053, + "learning_rate": 5.211267605633803e-06, + "loss": 1.140099048614502, + "step": 75 + }, + { + "epoch": 0.026784140969162994, + "grad_norm": 0.863156257130764, + "learning_rate": 5.28169014084507e-06, + "loss": 0.7654916048049927, + "step": 76 + }, + { + "epoch": 0.027136563876651983, + "grad_norm": 1.0935626721226286, + "learning_rate": 5.352112676056338e-06, + "loss": 0.9476499557495117, + "step": 77 + }, + { + "epoch": 0.02748898678414097, + "grad_norm": 1.0875377862843238, + "learning_rate": 5.422535211267607e-06, + "loss": 1.120811939239502, + "step": 78 + }, + { + "epoch": 0.027841409691629956, + "grad_norm": 0.9578356099138406, + "learning_rate": 5.492957746478874e-06, + "loss": 0.9745736122131348, + "step": 79 + }, + { + "epoch": 0.028193832599118944, + "grad_norm": 1.074452545035149, + "learning_rate": 5.563380281690142e-06, + "loss": 1.086181879043579, + "step": 80 + }, + { + "epoch": 0.02854625550660793, + "grad_norm": 0.9510910839018534, + "learning_rate": 5.633802816901409e-06, + "loss": 0.9904681444168091, + "step": 81 + }, + { + "epoch": 0.028898678414096917, + "grad_norm": 1.0958191892945044, + "learning_rate": 5.7042253521126766e-06, + "loss": 1.0311436653137207, + "step": 82 + }, + { + "epoch": 0.029251101321585902, + "grad_norm": 1.066849780091366, + "learning_rate": 5.774647887323944e-06, + "loss": 0.996998131275177, + "step": 83 + }, + { + "epoch": 0.02960352422907489, + "grad_norm": 1.1397250117300832, + "learning_rate": 5.845070422535212e-06, + "loss": 1.1526594161987305, + "step": 84 + }, + { + "epoch": 0.029955947136563875, + "grad_norm": 1.145115830862378, + "learning_rate": 5.915492957746479e-06, + "loss": 1.0914695262908936, + "step": 85 + }, + { + "epoch": 0.030308370044052863, + "grad_norm": 1.1551265503738541, + "learning_rate": 5.9859154929577465e-06, + "loss": 0.9558745622634888, + "step": 86 + }, + { + "epoch": 0.03066079295154185, + "grad_norm": 1.0734015706063305, + "learning_rate": 6.056338028169015e-06, + "loss": 0.9668983221054077, + "step": 87 + }, + { + "epoch": 0.031013215859030836, + "grad_norm": 1.2231010171085557, + "learning_rate": 6.126760563380282e-06, + "loss": 1.0132758617401123, + "step": 88 + }, + { + "epoch": 0.03136563876651982, + "grad_norm": 0.9911917466596063, + "learning_rate": 6.197183098591549e-06, + "loss": 1.0816935300827026, + "step": 89 + }, + { + "epoch": 0.03171806167400881, + "grad_norm": 1.1311803004782939, + "learning_rate": 6.267605633802818e-06, + "loss": 1.03245210647583, + "step": 90 + }, + { + "epoch": 0.0320704845814978, + "grad_norm": 1.0820347157611818, + "learning_rate": 6.3380281690140855e-06, + "loss": 0.9812602400779724, + "step": 91 + }, + { + "epoch": 0.03242290748898678, + "grad_norm": 1.0039329578342324, + "learning_rate": 6.408450704225353e-06, + "loss": 0.9303219318389893, + "step": 92 + }, + { + "epoch": 0.032775330396475774, + "grad_norm": 1.0308477549900932, + "learning_rate": 6.478873239436621e-06, + "loss": 1.175403356552124, + "step": 93 + }, + { + "epoch": 0.03312775330396476, + "grad_norm": 1.0822064194108554, + "learning_rate": 6.549295774647888e-06, + "loss": 1.1863958835601807, + "step": 94 + }, + { + "epoch": 0.033480176211453744, + "grad_norm": 1.0560683839166303, + "learning_rate": 6.619718309859155e-06, + "loss": 0.8630557060241699, + "step": 95 + }, + { + "epoch": 0.03383259911894273, + "grad_norm": 1.1683225259655636, + "learning_rate": 6.690140845070423e-06, + "loss": 1.0499619245529175, + "step": 96 + }, + { + "epoch": 0.03418502202643172, + "grad_norm": 0.9766019012274652, + "learning_rate": 6.760563380281691e-06, + "loss": 0.9443086981773376, + "step": 97 + }, + { + "epoch": 0.034537444933920705, + "grad_norm": 1.1779098792527396, + "learning_rate": 6.830985915492958e-06, + "loss": 1.0011450052261353, + "step": 98 + }, + { + "epoch": 0.03488986784140969, + "grad_norm": 1.0941166094999715, + "learning_rate": 6.901408450704225e-06, + "loss": 1.0239083766937256, + "step": 99 + }, + { + "epoch": 0.03524229074889868, + "grad_norm": 1.1605003575433563, + "learning_rate": 6.9718309859154935e-06, + "loss": 1.1335347890853882, + "step": 100 + }, + { + "epoch": 0.035594713656387666, + "grad_norm": 1.1410420869639502, + "learning_rate": 7.042253521126761e-06, + "loss": 0.9650854468345642, + "step": 101 + }, + { + "epoch": 0.03594713656387665, + "grad_norm": 1.0366491839089684, + "learning_rate": 7.112676056338029e-06, + "loss": 0.9284406900405884, + "step": 102 + }, + { + "epoch": 0.036299559471365636, + "grad_norm": 1.0368314441443032, + "learning_rate": 7.183098591549297e-06, + "loss": 0.989676296710968, + "step": 103 + }, + { + "epoch": 0.03665198237885463, + "grad_norm": 1.0475480945800932, + "learning_rate": 7.253521126760564e-06, + "loss": 0.9149842262268066, + "step": 104 + }, + { + "epoch": 0.03700440528634361, + "grad_norm": 1.0115568298427282, + "learning_rate": 7.3239436619718316e-06, + "loss": 0.9793657064437866, + "step": 105 + }, + { + "epoch": 0.0373568281938326, + "grad_norm": 1.0923401558071288, + "learning_rate": 7.3943661971831e-06, + "loss": 0.9508543014526367, + "step": 106 + }, + { + "epoch": 0.03770925110132159, + "grad_norm": 1.123373083563155, + "learning_rate": 7.464788732394367e-06, + "loss": 1.0623283386230469, + "step": 107 + }, + { + "epoch": 0.038061674008810574, + "grad_norm": 1.0472469474411819, + "learning_rate": 7.535211267605634e-06, + "loss": 1.0039314031600952, + "step": 108 + }, + { + "epoch": 0.03841409691629956, + "grad_norm": 1.1301029490219276, + "learning_rate": 7.6056338028169015e-06, + "loss": 0.9315502643585205, + "step": 109 + }, + { + "epoch": 0.038766519823788544, + "grad_norm": 0.9958020122553335, + "learning_rate": 7.67605633802817e-06, + "loss": 0.936677098274231, + "step": 110 + }, + { + "epoch": 0.039118942731277535, + "grad_norm": 1.0684105284421879, + "learning_rate": 7.746478873239436e-06, + "loss": 0.9373410940170288, + "step": 111 + }, + { + "epoch": 0.03947136563876652, + "grad_norm": 1.155598878121798, + "learning_rate": 7.816901408450704e-06, + "loss": 1.01617431640625, + "step": 112 + }, + { + "epoch": 0.039823788546255505, + "grad_norm": 1.1889006080727076, + "learning_rate": 7.887323943661972e-06, + "loss": 1.1713547706604004, + "step": 113 + }, + { + "epoch": 0.0401762114537445, + "grad_norm": 1.1256223667919436, + "learning_rate": 7.95774647887324e-06, + "loss": 0.8982350826263428, + "step": 114 + }, + { + "epoch": 0.04052863436123348, + "grad_norm": 1.0914199985412718, + "learning_rate": 8.028169014084509e-06, + "loss": 0.8123869895935059, + "step": 115 + }, + { + "epoch": 0.040881057268722466, + "grad_norm": 1.1505365914239516, + "learning_rate": 8.098591549295775e-06, + "loss": 1.0762536525726318, + "step": 116 + }, + { + "epoch": 0.04123348017621145, + "grad_norm": 1.0367170014557934, + "learning_rate": 8.169014084507043e-06, + "loss": 1.004841923713684, + "step": 117 + }, + { + "epoch": 0.04158590308370044, + "grad_norm": 1.0966724197265187, + "learning_rate": 8.239436619718311e-06, + "loss": 0.9237936735153198, + "step": 118 + }, + { + "epoch": 0.04193832599118943, + "grad_norm": 1.0785540239343763, + "learning_rate": 8.309859154929578e-06, + "loss": 0.9038913249969482, + "step": 119 + }, + { + "epoch": 0.04229074889867841, + "grad_norm": 1.1784096317090726, + "learning_rate": 8.380281690140846e-06, + "loss": 0.9488446712493896, + "step": 120 + }, + { + "epoch": 0.042643171806167404, + "grad_norm": 1.1559534491366574, + "learning_rate": 8.450704225352114e-06, + "loss": 1.0862706899642944, + "step": 121 + }, + { + "epoch": 0.04299559471365639, + "grad_norm": 1.5143452874154766, + "learning_rate": 8.52112676056338e-06, + "loss": 0.8882313966751099, + "step": 122 + }, + { + "epoch": 0.043348017621145374, + "grad_norm": 1.1412568707979918, + "learning_rate": 8.591549295774648e-06, + "loss": 0.9125900268554688, + "step": 123 + }, + { + "epoch": 0.04370044052863436, + "grad_norm": 1.403727281403332, + "learning_rate": 8.661971830985915e-06, + "loss": 0.944568395614624, + "step": 124 + }, + { + "epoch": 0.04405286343612335, + "grad_norm": 1.2993905510610635, + "learning_rate": 8.732394366197183e-06, + "loss": 0.9303089380264282, + "step": 125 + }, + { + "epoch": 0.044405286343612335, + "grad_norm": 1.1184314169128153, + "learning_rate": 8.802816901408451e-06, + "loss": 1.0983362197875977, + "step": 126 + }, + { + "epoch": 0.04475770925110132, + "grad_norm": 1.40811546312751, + "learning_rate": 8.87323943661972e-06, + "loss": 1.002477765083313, + "step": 127 + }, + { + "epoch": 0.04511013215859031, + "grad_norm": 1.1638063617076078, + "learning_rate": 8.943661971830987e-06, + "loss": 0.9994120001792908, + "step": 128 + }, + { + "epoch": 0.045462555066079297, + "grad_norm": 1.2118035451866538, + "learning_rate": 9.014084507042254e-06, + "loss": 1.0785832405090332, + "step": 129 + }, + { + "epoch": 0.04581497797356828, + "grad_norm": 1.0820277493757582, + "learning_rate": 9.084507042253522e-06, + "loss": 0.779441237449646, + "step": 130 + }, + { + "epoch": 0.046167400881057266, + "grad_norm": 1.1766256779195974, + "learning_rate": 9.15492957746479e-06, + "loss": 1.0052348375320435, + "step": 131 + }, + { + "epoch": 0.04651982378854626, + "grad_norm": 1.0771619013639089, + "learning_rate": 9.225352112676057e-06, + "loss": 1.0327996015548706, + "step": 132 + }, + { + "epoch": 0.04687224669603524, + "grad_norm": 1.501276619683034, + "learning_rate": 9.295774647887325e-06, + "loss": 1.0643246173858643, + "step": 133 + }, + { + "epoch": 0.04722466960352423, + "grad_norm": 1.1427145785080848, + "learning_rate": 9.366197183098593e-06, + "loss": 0.8449216485023499, + "step": 134 + }, + { + "epoch": 0.04757709251101322, + "grad_norm": 1.2684019730338143, + "learning_rate": 9.43661971830986e-06, + "loss": 0.8867055177688599, + "step": 135 + }, + { + "epoch": 0.047929515418502204, + "grad_norm": 1.4156875615017863, + "learning_rate": 9.507042253521127e-06, + "loss": 1.048499584197998, + "step": 136 + }, + { + "epoch": 0.04828193832599119, + "grad_norm": 1.2120768691141688, + "learning_rate": 9.577464788732394e-06, + "loss": 1.0548617839813232, + "step": 137 + }, + { + "epoch": 0.048634361233480174, + "grad_norm": 1.0679337780928526, + "learning_rate": 9.647887323943664e-06, + "loss": 0.8882845044136047, + "step": 138 + }, + { + "epoch": 0.048986784140969165, + "grad_norm": 1.622342973826323, + "learning_rate": 9.71830985915493e-06, + "loss": 1.032647967338562, + "step": 139 + }, + { + "epoch": 0.04933920704845815, + "grad_norm": 1.058782348686911, + "learning_rate": 9.788732394366198e-06, + "loss": 1.039523959159851, + "step": 140 + }, + { + "epoch": 0.049691629955947135, + "grad_norm": 1.1234982994751406, + "learning_rate": 9.859154929577466e-06, + "loss": 0.8451036214828491, + "step": 141 + }, + { + "epoch": 0.05004405286343613, + "grad_norm": 1.1376284406077708, + "learning_rate": 9.929577464788733e-06, + "loss": 0.8285897970199585, + "step": 142 + }, + { + "epoch": 0.05039647577092511, + "grad_norm": 1.2057497016168632, + "learning_rate": 1e-05, + "loss": 0.9998278021812439, + "step": 143 + }, + { + "epoch": 0.050748898678414096, + "grad_norm": 1.4528082359287422, + "learning_rate": 1.0070422535211269e-05, + "loss": 0.9782301187515259, + "step": 144 + }, + { + "epoch": 0.05110132158590308, + "grad_norm": 1.1663459671948497, + "learning_rate": 1.0140845070422535e-05, + "loss": 1.0557070970535278, + "step": 145 + }, + { + "epoch": 0.05145374449339207, + "grad_norm": 1.159146071512081, + "learning_rate": 1.0211267605633803e-05, + "loss": 0.9516133069992065, + "step": 146 + }, + { + "epoch": 0.05180616740088106, + "grad_norm": 1.1694686158556986, + "learning_rate": 1.0281690140845072e-05, + "loss": 0.8965041637420654, + "step": 147 + }, + { + "epoch": 0.05215859030837004, + "grad_norm": 1.2713520268346183, + "learning_rate": 1.0352112676056338e-05, + "loss": 0.8627057075500488, + "step": 148 + }, + { + "epoch": 0.052511013215859034, + "grad_norm": 1.0456048049111641, + "learning_rate": 1.0422535211267606e-05, + "loss": 0.7627567648887634, + "step": 149 + }, + { + "epoch": 0.05286343612334802, + "grad_norm": 1.2332422487154633, + "learning_rate": 1.0492957746478873e-05, + "loss": 0.8522504568099976, + "step": 150 + }, + { + "epoch": 0.053215859030837004, + "grad_norm": 1.1106957565365498, + "learning_rate": 1.056338028169014e-05, + "loss": 0.7164312601089478, + "step": 151 + }, + { + "epoch": 0.05356828193832599, + "grad_norm": 1.0487512934158103, + "learning_rate": 1.0633802816901409e-05, + "loss": 0.9141941070556641, + "step": 152 + }, + { + "epoch": 0.05392070484581498, + "grad_norm": 1.5228596875919753, + "learning_rate": 1.0704225352112675e-05, + "loss": 0.9145504832267761, + "step": 153 + }, + { + "epoch": 0.054273127753303965, + "grad_norm": 1.19745569358961, + "learning_rate": 1.0774647887323943e-05, + "loss": 0.9851646423339844, + "step": 154 + }, + { + "epoch": 0.05462555066079295, + "grad_norm": 1.1547769204431162, + "learning_rate": 1.0845070422535213e-05, + "loss": 0.9319474697113037, + "step": 155 + }, + { + "epoch": 0.05497797356828194, + "grad_norm": 1.242055483054837, + "learning_rate": 1.0915492957746481e-05, + "loss": 0.995783269405365, + "step": 156 + }, + { + "epoch": 0.05533039647577093, + "grad_norm": 1.270129466753014, + "learning_rate": 1.0985915492957748e-05, + "loss": 0.8636226654052734, + "step": 157 + }, + { + "epoch": 0.05568281938325991, + "grad_norm": 1.26388911778751, + "learning_rate": 1.1056338028169016e-05, + "loss": 0.8860869407653809, + "step": 158 + }, + { + "epoch": 0.056035242290748896, + "grad_norm": 1.24911279543244, + "learning_rate": 1.1126760563380284e-05, + "loss": 0.9256196618080139, + "step": 159 + }, + { + "epoch": 0.05638766519823789, + "grad_norm": 1.1684473229538663, + "learning_rate": 1.119718309859155e-05, + "loss": 0.8217915296554565, + "step": 160 + }, + { + "epoch": 0.05674008810572687, + "grad_norm": 1.2938227991615623, + "learning_rate": 1.1267605633802819e-05, + "loss": 0.9808465838432312, + "step": 161 + }, + { + "epoch": 0.05709251101321586, + "grad_norm": 1.2234654171305366, + "learning_rate": 1.1338028169014087e-05, + "loss": 0.7733014822006226, + "step": 162 + }, + { + "epoch": 0.05744493392070485, + "grad_norm": 1.1428802626649461, + "learning_rate": 1.1408450704225353e-05, + "loss": 0.8581304550170898, + "step": 163 + }, + { + "epoch": 0.057797356828193834, + "grad_norm": 1.3252890457476052, + "learning_rate": 1.1478873239436621e-05, + "loss": 0.9242054224014282, + "step": 164 + }, + { + "epoch": 0.05814977973568282, + "grad_norm": 1.3695567443378234, + "learning_rate": 1.1549295774647888e-05, + "loss": 1.0302021503448486, + "step": 165 + }, + { + "epoch": 0.058502202643171804, + "grad_norm": 1.2950143159958714, + "learning_rate": 1.1619718309859156e-05, + "loss": 0.8954275846481323, + "step": 166 + }, + { + "epoch": 0.058854625550660795, + "grad_norm": 1.1779404187828553, + "learning_rate": 1.1690140845070424e-05, + "loss": 0.891846776008606, + "step": 167 + }, + { + "epoch": 0.05920704845814978, + "grad_norm": 1.1837706775348158, + "learning_rate": 1.176056338028169e-05, + "loss": 0.887005627155304, + "step": 168 + }, + { + "epoch": 0.059559471365638765, + "grad_norm": 1.289448297537656, + "learning_rate": 1.1830985915492958e-05, + "loss": 0.9020301103591919, + "step": 169 + }, + { + "epoch": 0.05991189427312775, + "grad_norm": 1.2185831955131692, + "learning_rate": 1.1901408450704227e-05, + "loss": 0.7925454378128052, + "step": 170 + }, + { + "epoch": 0.06026431718061674, + "grad_norm": 1.31750363404193, + "learning_rate": 1.1971830985915493e-05, + "loss": 0.8058332800865173, + "step": 171 + }, + { + "epoch": 0.060616740088105726, + "grad_norm": 1.2435062872951204, + "learning_rate": 1.2042253521126761e-05, + "loss": 0.892992377281189, + "step": 172 + }, + { + "epoch": 0.06096916299559471, + "grad_norm": 1.0835922361658872, + "learning_rate": 1.211267605633803e-05, + "loss": 0.8482734560966492, + "step": 173 + }, + { + "epoch": 0.0613215859030837, + "grad_norm": 1.2806384537102478, + "learning_rate": 1.2183098591549296e-05, + "loss": 0.8652878999710083, + "step": 174 + }, + { + "epoch": 0.06167400881057269, + "grad_norm": 1.183930720799068, + "learning_rate": 1.2253521126760564e-05, + "loss": 0.8590051531791687, + "step": 175 + }, + { + "epoch": 0.06202643171806167, + "grad_norm": 1.1264180921527844, + "learning_rate": 1.232394366197183e-05, + "loss": 0.7106916904449463, + "step": 176 + }, + { + "epoch": 0.06237885462555066, + "grad_norm": 1.5304901042334342, + "learning_rate": 1.2394366197183098e-05, + "loss": 0.9298936128616333, + "step": 177 + }, + { + "epoch": 0.06273127753303964, + "grad_norm": 1.3380597134261425, + "learning_rate": 1.2464788732394367e-05, + "loss": 1.027758240699768, + "step": 178 + }, + { + "epoch": 0.06308370044052863, + "grad_norm": 1.4071851827143296, + "learning_rate": 1.2535211267605636e-05, + "loss": 0.9576354026794434, + "step": 179 + }, + { + "epoch": 0.06343612334801763, + "grad_norm": 1.476054189108656, + "learning_rate": 1.2605633802816903e-05, + "loss": 0.6881245374679565, + "step": 180 + }, + { + "epoch": 0.0637885462555066, + "grad_norm": 1.736658600923819, + "learning_rate": 1.2676056338028171e-05, + "loss": 0.9629781246185303, + "step": 181 + }, + { + "epoch": 0.0641409691629956, + "grad_norm": 1.3990061114909895, + "learning_rate": 1.2746478873239439e-05, + "loss": 0.849892258644104, + "step": 182 + }, + { + "epoch": 0.06449339207048459, + "grad_norm": 1.2776276047787312, + "learning_rate": 1.2816901408450705e-05, + "loss": 0.9294229745864868, + "step": 183 + }, + { + "epoch": 0.06484581497797356, + "grad_norm": 1.346185395248099, + "learning_rate": 1.2887323943661974e-05, + "loss": 0.9534600973129272, + "step": 184 + }, + { + "epoch": 0.06519823788546256, + "grad_norm": 1.2547825941083024, + "learning_rate": 1.2957746478873242e-05, + "loss": 0.7937755584716797, + "step": 185 + }, + { + "epoch": 0.06555066079295155, + "grad_norm": 1.215372024356157, + "learning_rate": 1.3028169014084508e-05, + "loss": 0.9188590049743652, + "step": 186 + }, + { + "epoch": 0.06590308370044053, + "grad_norm": 1.3372931395210206, + "learning_rate": 1.3098591549295776e-05, + "loss": 0.8775123357772827, + "step": 187 + }, + { + "epoch": 0.06625550660792952, + "grad_norm": 1.2703292803517752, + "learning_rate": 1.3169014084507044e-05, + "loss": 0.8562190532684326, + "step": 188 + }, + { + "epoch": 0.0666079295154185, + "grad_norm": 1.1593142823065046, + "learning_rate": 1.323943661971831e-05, + "loss": 0.9427295327186584, + "step": 189 + }, + { + "epoch": 0.06696035242290749, + "grad_norm": 1.1080518257913534, + "learning_rate": 1.3309859154929579e-05, + "loss": 0.6142286062240601, + "step": 190 + }, + { + "epoch": 0.06731277533039648, + "grad_norm": 1.416041365414943, + "learning_rate": 1.3380281690140845e-05, + "loss": 0.7480863332748413, + "step": 191 + }, + { + "epoch": 0.06766519823788546, + "grad_norm": 1.6287312517465182, + "learning_rate": 1.3450704225352114e-05, + "loss": 0.898857593536377, + "step": 192 + }, + { + "epoch": 0.06801762114537445, + "grad_norm": 1.4737642135415263, + "learning_rate": 1.3521126760563382e-05, + "loss": 0.8584127426147461, + "step": 193 + }, + { + "epoch": 0.06837004405286344, + "grad_norm": 1.2178631494207084, + "learning_rate": 1.3591549295774648e-05, + "loss": 0.9400655031204224, + "step": 194 + }, + { + "epoch": 0.06872246696035242, + "grad_norm": 1.2698602238237462, + "learning_rate": 1.3661971830985916e-05, + "loss": 0.7750787734985352, + "step": 195 + }, + { + "epoch": 0.06907488986784141, + "grad_norm": 1.2474557266398312, + "learning_rate": 1.3732394366197184e-05, + "loss": 0.8530284762382507, + "step": 196 + }, + { + "epoch": 0.0694273127753304, + "grad_norm": 1.3191630227557989, + "learning_rate": 1.380281690140845e-05, + "loss": 0.9019994735717773, + "step": 197 + }, + { + "epoch": 0.06977973568281938, + "grad_norm": 1.1994310415476668, + "learning_rate": 1.3873239436619719e-05, + "loss": 0.7749642133712769, + "step": 198 + }, + { + "epoch": 0.07013215859030837, + "grad_norm": 1.3060142025317714, + "learning_rate": 1.3943661971830987e-05, + "loss": 0.956200122833252, + "step": 199 + }, + { + "epoch": 0.07048458149779736, + "grad_norm": 1.3510407726181874, + "learning_rate": 1.4014084507042253e-05, + "loss": 0.8544470071792603, + "step": 200 + }, + { + "epoch": 0.07083700440528634, + "grad_norm": 1.447521091304659, + "learning_rate": 1.4084507042253522e-05, + "loss": 0.8776387572288513, + "step": 201 + }, + { + "epoch": 0.07118942731277533, + "grad_norm": 1.5340123254246993, + "learning_rate": 1.4154929577464788e-05, + "loss": 0.9949591755867004, + "step": 202 + }, + { + "epoch": 0.07154185022026431, + "grad_norm": 1.306920931788941, + "learning_rate": 1.4225352112676058e-05, + "loss": 0.9616764783859253, + "step": 203 + }, + { + "epoch": 0.0718942731277533, + "grad_norm": 1.3490978686730206, + "learning_rate": 1.4295774647887326e-05, + "loss": 0.9247175455093384, + "step": 204 + }, + { + "epoch": 0.0722466960352423, + "grad_norm": 1.4241509312853966, + "learning_rate": 1.4366197183098594e-05, + "loss": 0.7946479320526123, + "step": 205 + }, + { + "epoch": 0.07259911894273127, + "grad_norm": 1.3949991357763207, + "learning_rate": 1.443661971830986e-05, + "loss": 0.7929860353469849, + "step": 206 + }, + { + "epoch": 0.07295154185022026, + "grad_norm": 1.3725430537583514, + "learning_rate": 1.4507042253521129e-05, + "loss": 0.9215391874313354, + "step": 207 + }, + { + "epoch": 0.07330396475770926, + "grad_norm": 1.4247014676365253, + "learning_rate": 1.4577464788732397e-05, + "loss": 0.8767607808113098, + "step": 208 + }, + { + "epoch": 0.07365638766519823, + "grad_norm": 1.3691339839746066, + "learning_rate": 1.4647887323943663e-05, + "loss": 0.8586276769638062, + "step": 209 + }, + { + "epoch": 0.07400881057268723, + "grad_norm": 1.3252388254138234, + "learning_rate": 1.4718309859154931e-05, + "loss": 0.8680851459503174, + "step": 210 + }, + { + "epoch": 0.07436123348017622, + "grad_norm": 1.2834178375463614, + "learning_rate": 1.47887323943662e-05, + "loss": 0.8887720108032227, + "step": 211 + }, + { + "epoch": 0.0747136563876652, + "grad_norm": 1.4918681608584679, + "learning_rate": 1.4859154929577466e-05, + "loss": 0.8887100219726562, + "step": 212 + }, + { + "epoch": 0.07506607929515419, + "grad_norm": 1.247870788657092, + "learning_rate": 1.4929577464788734e-05, + "loss": 0.9257807731628418, + "step": 213 + }, + { + "epoch": 0.07541850220264318, + "grad_norm": 1.2922967878533598, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.8107355833053589, + "step": 214 + }, + { + "epoch": 0.07577092511013216, + "grad_norm": 1.390091314994072, + "learning_rate": 1.5070422535211269e-05, + "loss": 0.8765913844108582, + "step": 215 + }, + { + "epoch": 0.07612334801762115, + "grad_norm": 1.3936279931065536, + "learning_rate": 1.5140845070422537e-05, + "loss": 0.8973524570465088, + "step": 216 + }, + { + "epoch": 0.07647577092511013, + "grad_norm": 1.310665112588589, + "learning_rate": 1.5211267605633803e-05, + "loss": 0.9194613695144653, + "step": 217 + }, + { + "epoch": 0.07682819383259912, + "grad_norm": 1.4152279415932816, + "learning_rate": 1.528169014084507e-05, + "loss": 0.8832643032073975, + "step": 218 + }, + { + "epoch": 0.07718061674008811, + "grad_norm": 1.465705079678902, + "learning_rate": 1.535211267605634e-05, + "loss": 0.9575356245040894, + "step": 219 + }, + { + "epoch": 0.07753303964757709, + "grad_norm": 1.2268114727867823, + "learning_rate": 1.5422535211267607e-05, + "loss": 0.8302342891693115, + "step": 220 + }, + { + "epoch": 0.07788546255506608, + "grad_norm": 1.2978917843344704, + "learning_rate": 1.5492957746478872e-05, + "loss": 0.7999966144561768, + "step": 221 + }, + { + "epoch": 0.07823788546255507, + "grad_norm": 1.271952593735668, + "learning_rate": 1.556338028169014e-05, + "loss": 0.8201859593391418, + "step": 222 + }, + { + "epoch": 0.07859030837004405, + "grad_norm": 1.635464665304201, + "learning_rate": 1.563380281690141e-05, + "loss": 0.872761607170105, + "step": 223 + }, + { + "epoch": 0.07894273127753304, + "grad_norm": 1.7544850567681591, + "learning_rate": 1.5704225352112677e-05, + "loss": 0.8695409297943115, + "step": 224 + }, + { + "epoch": 0.07929515418502203, + "grad_norm": 1.2478131333285527, + "learning_rate": 1.5774647887323945e-05, + "loss": 0.8532050848007202, + "step": 225 + }, + { + "epoch": 0.07964757709251101, + "grad_norm": 1.5276196879895285, + "learning_rate": 1.5845070422535213e-05, + "loss": 0.7875121235847473, + "step": 226 + }, + { + "epoch": 0.08, + "grad_norm": 1.5837485275916963, + "learning_rate": 1.591549295774648e-05, + "loss": 0.7131509780883789, + "step": 227 + }, + { + "epoch": 0.080352422907489, + "grad_norm": 1.4681482709870555, + "learning_rate": 1.598591549295775e-05, + "loss": 0.9758431911468506, + "step": 228 + }, + { + "epoch": 0.08070484581497797, + "grad_norm": 1.4451165548552447, + "learning_rate": 1.6056338028169017e-05, + "loss": 0.7894232273101807, + "step": 229 + }, + { + "epoch": 0.08105726872246696, + "grad_norm": 1.2417235745587356, + "learning_rate": 1.6126760563380285e-05, + "loss": 0.9933483600616455, + "step": 230 + }, + { + "epoch": 0.08140969162995594, + "grad_norm": 1.4745298800972837, + "learning_rate": 1.619718309859155e-05, + "loss": 0.8424056768417358, + "step": 231 + }, + { + "epoch": 0.08176211453744493, + "grad_norm": 1.4626597398090972, + "learning_rate": 1.6267605633802818e-05, + "loss": 0.7957695126533508, + "step": 232 + }, + { + "epoch": 0.08211453744493392, + "grad_norm": 1.243843455131114, + "learning_rate": 1.6338028169014086e-05, + "loss": 0.8491722345352173, + "step": 233 + }, + { + "epoch": 0.0824669603524229, + "grad_norm": 1.407640698868158, + "learning_rate": 1.6408450704225354e-05, + "loss": 0.7010964751243591, + "step": 234 + }, + { + "epoch": 0.0828193832599119, + "grad_norm": 1.4584433632361322, + "learning_rate": 1.6478873239436623e-05, + "loss": 0.8713864088058472, + "step": 235 + }, + { + "epoch": 0.08317180616740089, + "grad_norm": 1.261328425360657, + "learning_rate": 1.6549295774647887e-05, + "loss": 0.6724761128425598, + "step": 236 + }, + { + "epoch": 0.08352422907488986, + "grad_norm": 1.219837126653021, + "learning_rate": 1.6619718309859155e-05, + "loss": 0.8612109422683716, + "step": 237 + }, + { + "epoch": 0.08387665198237886, + "grad_norm": 1.4745868727167897, + "learning_rate": 1.6690140845070424e-05, + "loss": 0.5697110891342163, + "step": 238 + }, + { + "epoch": 0.08422907488986785, + "grad_norm": 1.2506294676144012, + "learning_rate": 1.676056338028169e-05, + "loss": 0.7877228260040283, + "step": 239 + }, + { + "epoch": 0.08458149779735682, + "grad_norm": 1.1492235860181979, + "learning_rate": 1.683098591549296e-05, + "loss": 0.8751014471054077, + "step": 240 + }, + { + "epoch": 0.08493392070484582, + "grad_norm": 1.527957574033417, + "learning_rate": 1.6901408450704228e-05, + "loss": 0.8731381893157959, + "step": 241 + }, + { + "epoch": 0.08528634361233481, + "grad_norm": 1.291362512763109, + "learning_rate": 1.6971830985915493e-05, + "loss": 0.831383228302002, + "step": 242 + }, + { + "epoch": 0.08563876651982379, + "grad_norm": 1.2699070733171296, + "learning_rate": 1.704225352112676e-05, + "loss": 0.792934238910675, + "step": 243 + }, + { + "epoch": 0.08599118942731278, + "grad_norm": 1.1592748972292606, + "learning_rate": 1.711267605633803e-05, + "loss": 0.6723657846450806, + "step": 244 + }, + { + "epoch": 0.08634361233480176, + "grad_norm": 1.4796981905185658, + "learning_rate": 1.7183098591549297e-05, + "loss": 0.8377546072006226, + "step": 245 + }, + { + "epoch": 0.08669603524229075, + "grad_norm": 1.2727987522874769, + "learning_rate": 1.7253521126760565e-05, + "loss": 0.8073972463607788, + "step": 246 + }, + { + "epoch": 0.08704845814977974, + "grad_norm": 1.6240304260373406, + "learning_rate": 1.732394366197183e-05, + "loss": 0.8913615942001343, + "step": 247 + }, + { + "epoch": 0.08740088105726872, + "grad_norm": 1.4436852067854697, + "learning_rate": 1.7394366197183098e-05, + "loss": 0.9133341312408447, + "step": 248 + }, + { + "epoch": 0.08775330396475771, + "grad_norm": 1.6098073633875791, + "learning_rate": 1.7464788732394366e-05, + "loss": 0.7593938112258911, + "step": 249 + }, + { + "epoch": 0.0881057268722467, + "grad_norm": 1.456505700957212, + "learning_rate": 1.7535211267605638e-05, + "loss": 0.8049266934394836, + "step": 250 + }, + { + "epoch": 0.08845814977973568, + "grad_norm": 1.44397678174898, + "learning_rate": 1.7605633802816902e-05, + "loss": 0.9065679311752319, + "step": 251 + }, + { + "epoch": 0.08881057268722467, + "grad_norm": 1.5285644429403964, + "learning_rate": 1.767605633802817e-05, + "loss": 0.9309085011482239, + "step": 252 + }, + { + "epoch": 0.08916299559471366, + "grad_norm": 1.3367293223358285, + "learning_rate": 1.774647887323944e-05, + "loss": 0.7846949100494385, + "step": 253 + }, + { + "epoch": 0.08951541850220264, + "grad_norm": 1.4721492627949804, + "learning_rate": 1.7816901408450707e-05, + "loss": 0.9153063297271729, + "step": 254 + }, + { + "epoch": 0.08986784140969163, + "grad_norm": 1.2843813691966974, + "learning_rate": 1.7887323943661975e-05, + "loss": 0.7743638157844543, + "step": 255 + }, + { + "epoch": 0.09022026431718062, + "grad_norm": 1.6034162783223496, + "learning_rate": 1.7957746478873243e-05, + "loss": 0.887751579284668, + "step": 256 + }, + { + "epoch": 0.0905726872246696, + "grad_norm": 1.2387435479452011, + "learning_rate": 1.8028169014084508e-05, + "loss": 0.8072899580001831, + "step": 257 + }, + { + "epoch": 0.09092511013215859, + "grad_norm": 1.3642448388425203, + "learning_rate": 1.8098591549295776e-05, + "loss": 0.8275943994522095, + "step": 258 + }, + { + "epoch": 0.09127753303964757, + "grad_norm": 1.3287842865535133, + "learning_rate": 1.8169014084507044e-05, + "loss": 0.8300620913505554, + "step": 259 + }, + { + "epoch": 0.09162995594713656, + "grad_norm": 1.26616505669333, + "learning_rate": 1.8239436619718312e-05, + "loss": 0.6886857748031616, + "step": 260 + }, + { + "epoch": 0.09198237885462555, + "grad_norm": 2.689833624979495, + "learning_rate": 1.830985915492958e-05, + "loss": 0.8190158605575562, + "step": 261 + }, + { + "epoch": 0.09233480176211453, + "grad_norm": 1.3392491700180422, + "learning_rate": 1.8380281690140845e-05, + "loss": 0.8500730991363525, + "step": 262 + }, + { + "epoch": 0.09268722466960352, + "grad_norm": 1.499663410513064, + "learning_rate": 1.8450704225352113e-05, + "loss": 0.8340811729431152, + "step": 263 + }, + { + "epoch": 0.09303964757709252, + "grad_norm": 1.3031308803407857, + "learning_rate": 1.852112676056338e-05, + "loss": 0.8055675029754639, + "step": 264 + }, + { + "epoch": 0.0933920704845815, + "grad_norm": 1.410218243221954, + "learning_rate": 1.859154929577465e-05, + "loss": 0.7956680059432983, + "step": 265 + }, + { + "epoch": 0.09374449339207049, + "grad_norm": 1.4181751660111779, + "learning_rate": 1.8661971830985917e-05, + "loss": 0.8232501745223999, + "step": 266 + }, + { + "epoch": 0.09409691629955948, + "grad_norm": 1.472224530959967, + "learning_rate": 1.8732394366197186e-05, + "loss": 0.8808565139770508, + "step": 267 + }, + { + "epoch": 0.09444933920704845, + "grad_norm": 1.5113548411958122, + "learning_rate": 1.880281690140845e-05, + "loss": 0.885380744934082, + "step": 268 + }, + { + "epoch": 0.09480176211453745, + "grad_norm": 1.5009611452094687, + "learning_rate": 1.887323943661972e-05, + "loss": 0.8408790826797485, + "step": 269 + }, + { + "epoch": 0.09515418502202644, + "grad_norm": 1.395810517840328, + "learning_rate": 1.8943661971830987e-05, + "loss": 0.7089993953704834, + "step": 270 + }, + { + "epoch": 0.09550660792951542, + "grad_norm": 1.280231938177333, + "learning_rate": 1.9014084507042255e-05, + "loss": 0.7941038608551025, + "step": 271 + }, + { + "epoch": 0.09585903083700441, + "grad_norm": 1.5210768015450882, + "learning_rate": 1.9084507042253523e-05, + "loss": 0.8269138932228088, + "step": 272 + }, + { + "epoch": 0.09621145374449339, + "grad_norm": 1.5053903060638305, + "learning_rate": 1.9154929577464788e-05, + "loss": 0.8206192255020142, + "step": 273 + }, + { + "epoch": 0.09656387665198238, + "grad_norm": 1.49737615599854, + "learning_rate": 1.922535211267606e-05, + "loss": 0.9146496653556824, + "step": 274 + }, + { + "epoch": 0.09691629955947137, + "grad_norm": 1.1755726979972605, + "learning_rate": 1.9295774647887327e-05, + "loss": 0.6738560199737549, + "step": 275 + }, + { + "epoch": 0.09726872246696035, + "grad_norm": 1.3169911381980228, + "learning_rate": 1.9366197183098595e-05, + "loss": 0.934916615486145, + "step": 276 + }, + { + "epoch": 0.09762114537444934, + "grad_norm": 1.357245739203775, + "learning_rate": 1.943661971830986e-05, + "loss": 0.8952134847640991, + "step": 277 + }, + { + "epoch": 0.09797356828193833, + "grad_norm": 1.3423178147772294, + "learning_rate": 1.9507042253521128e-05, + "loss": 0.9346420764923096, + "step": 278 + }, + { + "epoch": 0.09832599118942731, + "grad_norm": 1.5698833191970427, + "learning_rate": 1.9577464788732396e-05, + "loss": 0.8781993985176086, + "step": 279 + }, + { + "epoch": 0.0986784140969163, + "grad_norm": 1.4703395142125208, + "learning_rate": 1.9647887323943664e-05, + "loss": 0.8283448219299316, + "step": 280 + }, + { + "epoch": 0.09903083700440529, + "grad_norm": 1.2650765439550704, + "learning_rate": 1.9718309859154933e-05, + "loss": 0.8010722398757935, + "step": 281 + }, + { + "epoch": 0.09938325991189427, + "grad_norm": 1.3576050403922397, + "learning_rate": 1.97887323943662e-05, + "loss": 0.8697119951248169, + "step": 282 + }, + { + "epoch": 0.09973568281938326, + "grad_norm": 1.098837792765385, + "learning_rate": 1.9859154929577465e-05, + "loss": 0.6448882818222046, + "step": 283 + }, + { + "epoch": 0.10008810572687225, + "grad_norm": 1.5101908618325302, + "learning_rate": 1.9929577464788734e-05, + "loss": 0.7782007455825806, + "step": 284 + }, + { + "epoch": 0.10044052863436123, + "grad_norm": 1.455658231417001, + "learning_rate": 2e-05, + "loss": 0.8131508827209473, + "step": 285 + }, + { + "epoch": 0.10079295154185022, + "grad_norm": 1.4413777660177336, + "learning_rate": 1.999999830265561e-05, + "loss": 0.8592134714126587, + "step": 286 + }, + { + "epoch": 0.1011453744493392, + "grad_norm": 1.5671417589518397, + "learning_rate": 1.9999993210623002e-05, + "loss": 0.9374675750732422, + "step": 287 + }, + { + "epoch": 0.10149779735682819, + "grad_norm": 1.5499152824954487, + "learning_rate": 1.9999984723903913e-05, + "loss": 0.8416328430175781, + "step": 288 + }, + { + "epoch": 0.10185022026431718, + "grad_norm": 1.267360297703748, + "learning_rate": 1.9999972842501218e-05, + "loss": 0.7587184906005859, + "step": 289 + }, + { + "epoch": 0.10220264317180616, + "grad_norm": 1.4783535336356979, + "learning_rate": 1.9999957566418956e-05, + "loss": 1.010494351387024, + "step": 290 + }, + { + "epoch": 0.10255506607929515, + "grad_norm": 1.3092025632301814, + "learning_rate": 1.999993889566231e-05, + "loss": 0.7942835092544556, + "step": 291 + }, + { + "epoch": 0.10290748898678415, + "grad_norm": 1.4620379458028798, + "learning_rate": 1.999991683023762e-05, + "loss": 0.9069477915763855, + "step": 292 + }, + { + "epoch": 0.10325991189427312, + "grad_norm": 1.781963673155629, + "learning_rate": 1.9999891370152375e-05, + "loss": 0.8776397705078125, + "step": 293 + }, + { + "epoch": 0.10361233480176212, + "grad_norm": 1.3409879305652028, + "learning_rate": 1.9999862515415216e-05, + "loss": 0.8560416102409363, + "step": 294 + }, + { + "epoch": 0.10396475770925111, + "grad_norm": 1.601676543787724, + "learning_rate": 1.9999830266035942e-05, + "loss": 0.9177321195602417, + "step": 295 + }, + { + "epoch": 0.10431718061674009, + "grad_norm": 1.621521883940329, + "learning_rate": 1.99997946220255e-05, + "loss": 0.8830884695053101, + "step": 296 + }, + { + "epoch": 0.10466960352422908, + "grad_norm": 1.5076951372471592, + "learning_rate": 1.9999755583395987e-05, + "loss": 0.913659930229187, + "step": 297 + }, + { + "epoch": 0.10502202643171807, + "grad_norm": 1.48724181087663, + "learning_rate": 1.999971315016066e-05, + "loss": 0.773309588432312, + "step": 298 + }, + { + "epoch": 0.10537444933920705, + "grad_norm": 1.4640758198016095, + "learning_rate": 1.9999667322333916e-05, + "loss": 0.8432563543319702, + "step": 299 + }, + { + "epoch": 0.10572687224669604, + "grad_norm": 1.5419897004531282, + "learning_rate": 1.999961809993132e-05, + "loss": 0.9632397890090942, + "step": 300 + }, + { + "epoch": 0.10607929515418502, + "grad_norm": 1.4657018761848883, + "learning_rate": 1.999956548296958e-05, + "loss": 0.8205600380897522, + "step": 301 + }, + { + "epoch": 0.10643171806167401, + "grad_norm": 1.2908123355748096, + "learning_rate": 1.9999509471466557e-05, + "loss": 0.8789785504341125, + "step": 302 + }, + { + "epoch": 0.106784140969163, + "grad_norm": 1.4062841050093677, + "learning_rate": 1.999945006544126e-05, + "loss": 0.8445791006088257, + "step": 303 + }, + { + "epoch": 0.10713656387665198, + "grad_norm": 1.3201850616961108, + "learning_rate": 1.9999387264913865e-05, + "loss": 0.8025245666503906, + "step": 304 + }, + { + "epoch": 0.10748898678414097, + "grad_norm": 1.3596018005437036, + "learning_rate": 1.9999321069905688e-05, + "loss": 0.9271318912506104, + "step": 305 + }, + { + "epoch": 0.10784140969162996, + "grad_norm": 1.167387591378785, + "learning_rate": 1.999925148043919e-05, + "loss": 0.809894859790802, + "step": 306 + }, + { + "epoch": 0.10819383259911894, + "grad_norm": 1.4267923203712158, + "learning_rate": 1.999917849653801e-05, + "loss": 0.8940669298171997, + "step": 307 + }, + { + "epoch": 0.10854625550660793, + "grad_norm": 1.466148592973388, + "learning_rate": 1.9999102118226912e-05, + "loss": 0.9301233887672424, + "step": 308 + }, + { + "epoch": 0.10889867841409692, + "grad_norm": 1.271175959298383, + "learning_rate": 1.9999022345531834e-05, + "loss": 0.6429216861724854, + "step": 309 + }, + { + "epoch": 0.1092511013215859, + "grad_norm": 1.3392816449794738, + "learning_rate": 1.999893917847985e-05, + "loss": 0.7199009656906128, + "step": 310 + }, + { + "epoch": 0.10960352422907489, + "grad_norm": 1.2732787140894477, + "learning_rate": 1.999885261709919e-05, + "loss": 0.8312395811080933, + "step": 311 + }, + { + "epoch": 0.10995594713656388, + "grad_norm": 1.4809957988420102, + "learning_rate": 1.999876266141924e-05, + "loss": 0.8187745213508606, + "step": 312 + }, + { + "epoch": 0.11030837004405286, + "grad_norm": 1.2638906346778362, + "learning_rate": 1.9998669311470546e-05, + "loss": 0.8632344603538513, + "step": 313 + }, + { + "epoch": 0.11066079295154185, + "grad_norm": 1.5651718256034985, + "learning_rate": 1.9998572567284787e-05, + "loss": 0.8789447546005249, + "step": 314 + }, + { + "epoch": 0.11101321585903083, + "grad_norm": 1.4657438576086577, + "learning_rate": 1.999847242889481e-05, + "loss": 0.7647864818572998, + "step": 315 + }, + { + "epoch": 0.11136563876651982, + "grad_norm": 1.2962284510646964, + "learning_rate": 1.9998368896334606e-05, + "loss": 0.872633695602417, + "step": 316 + }, + { + "epoch": 0.11171806167400881, + "grad_norm": 1.4704185501053861, + "learning_rate": 1.9998261969639324e-05, + "loss": 0.8249840140342712, + "step": 317 + }, + { + "epoch": 0.11207048458149779, + "grad_norm": 1.6298830469717174, + "learning_rate": 1.999815164884526e-05, + "loss": 0.7558056116104126, + "step": 318 + }, + { + "epoch": 0.11242290748898678, + "grad_norm": 1.3075257157183537, + "learning_rate": 1.9998037933989866e-05, + "loss": 0.7447441220283508, + "step": 319 + }, + { + "epoch": 0.11277533039647578, + "grad_norm": 1.4956646267919036, + "learning_rate": 1.9997920825111743e-05, + "loss": 0.8260442018508911, + "step": 320 + }, + { + "epoch": 0.11312775330396475, + "grad_norm": 1.2866274072297625, + "learning_rate": 1.999780032225065e-05, + "loss": 0.7916134595870972, + "step": 321 + }, + { + "epoch": 0.11348017621145375, + "grad_norm": 1.3548711592442237, + "learning_rate": 1.9997676425447486e-05, + "loss": 0.7460259199142456, + "step": 322 + }, + { + "epoch": 0.11383259911894274, + "grad_norm": 1.4664419676620792, + "learning_rate": 1.9997549134744318e-05, + "loss": 0.9739946126937866, + "step": 323 + }, + { + "epoch": 0.11418502202643172, + "grad_norm": 1.3133090693965692, + "learning_rate": 1.9997418450184352e-05, + "loss": 0.7242900133132935, + "step": 324 + }, + { + "epoch": 0.1145374449339207, + "grad_norm": 1.7023646414032152, + "learning_rate": 1.9997284371811955e-05, + "loss": 0.7645323276519775, + "step": 325 + }, + { + "epoch": 0.1148898678414097, + "grad_norm": 1.3437215758424148, + "learning_rate": 1.9997146899672638e-05, + "loss": 0.7377017736434937, + "step": 326 + }, + { + "epoch": 0.11524229074889868, + "grad_norm": 1.3608732999796416, + "learning_rate": 1.9997006033813076e-05, + "loss": 0.7117934226989746, + "step": 327 + }, + { + "epoch": 0.11559471365638767, + "grad_norm": 1.485158034808982, + "learning_rate": 1.999686177428108e-05, + "loss": 0.8517680168151855, + "step": 328 + }, + { + "epoch": 0.11594713656387665, + "grad_norm": 1.3118416735480631, + "learning_rate": 1.9996714121125626e-05, + "loss": 0.7099400758743286, + "step": 329 + }, + { + "epoch": 0.11629955947136564, + "grad_norm": 1.3949559553781739, + "learning_rate": 1.9996563074396838e-05, + "loss": 0.8581711053848267, + "step": 330 + }, + { + "epoch": 0.11665198237885463, + "grad_norm": 1.322464822656225, + "learning_rate": 1.9996408634145994e-05, + "loss": 0.7841953635215759, + "step": 331 + }, + { + "epoch": 0.11700440528634361, + "grad_norm": 1.2580468593989962, + "learning_rate": 1.9996250800425515e-05, + "loss": 0.7376754879951477, + "step": 332 + }, + { + "epoch": 0.1173568281938326, + "grad_norm": 1.3538742269891202, + "learning_rate": 1.9996089573288985e-05, + "loss": 0.8934558033943176, + "step": 333 + }, + { + "epoch": 0.11770925110132159, + "grad_norm": 1.4597310886631008, + "learning_rate": 1.999592495279113e-05, + "loss": 0.7870250940322876, + "step": 334 + }, + { + "epoch": 0.11806167400881057, + "grad_norm": 1.5788273084375275, + "learning_rate": 1.9995756938987846e-05, + "loss": 0.7026203274726868, + "step": 335 + }, + { + "epoch": 0.11841409691629956, + "grad_norm": 2.206437289778364, + "learning_rate": 1.999558553193616e-05, + "loss": 1.0066381692886353, + "step": 336 + }, + { + "epoch": 0.11876651982378855, + "grad_norm": 1.349262918557434, + "learning_rate": 1.9995410731694255e-05, + "loss": 0.7860246896743774, + "step": 337 + }, + { + "epoch": 0.11911894273127753, + "grad_norm": 1.4261295710834618, + "learning_rate": 1.999523253832148e-05, + "loss": 0.8142588138580322, + "step": 338 + }, + { + "epoch": 0.11947136563876652, + "grad_norm": 1.403543131076251, + "learning_rate": 1.9995050951878317e-05, + "loss": 0.9737639427185059, + "step": 339 + }, + { + "epoch": 0.1198237885462555, + "grad_norm": 1.2538473699838193, + "learning_rate": 1.999486597242642e-05, + "loss": 0.6165765523910522, + "step": 340 + }, + { + "epoch": 0.12017621145374449, + "grad_norm": 1.4403971646421685, + "learning_rate": 1.999467760002857e-05, + "loss": 0.8553996086120605, + "step": 341 + }, + { + "epoch": 0.12052863436123348, + "grad_norm": 1.579218034733104, + "learning_rate": 1.9994485834748725e-05, + "loss": 0.9291022419929504, + "step": 342 + }, + { + "epoch": 0.12088105726872246, + "grad_norm": 1.3583147087232978, + "learning_rate": 1.9994290676651977e-05, + "loss": 0.8309136629104614, + "step": 343 + }, + { + "epoch": 0.12123348017621145, + "grad_norm": 1.2343518052190974, + "learning_rate": 1.999409212580458e-05, + "loss": 0.6963932514190674, + "step": 344 + }, + { + "epoch": 0.12158590308370044, + "grad_norm": 1.126432291251887, + "learning_rate": 1.9993890182273932e-05, + "loss": 0.8220632076263428, + "step": 345 + }, + { + "epoch": 0.12193832599118942, + "grad_norm": 1.5283410369228738, + "learning_rate": 1.9993684846128588e-05, + "loss": 0.8407794237136841, + "step": 346 + }, + { + "epoch": 0.12229074889867841, + "grad_norm": 1.479739244816861, + "learning_rate": 1.9993476117438257e-05, + "loss": 0.795718789100647, + "step": 347 + }, + { + "epoch": 0.1226431718061674, + "grad_norm": 1.3466106447402244, + "learning_rate": 1.9993263996273792e-05, + "loss": 0.7482223510742188, + "step": 348 + }, + { + "epoch": 0.12299559471365638, + "grad_norm": 1.4606743428798505, + "learning_rate": 1.99930484827072e-05, + "loss": 0.814468264579773, + "step": 349 + }, + { + "epoch": 0.12334801762114538, + "grad_norm": 1.5345713664893856, + "learning_rate": 1.9992829576811648e-05, + "loss": 0.8105748891830444, + "step": 350 + }, + { + "epoch": 0.12370044052863437, + "grad_norm": 1.6869192314100032, + "learning_rate": 1.9992607278661437e-05, + "loss": 0.8756073713302612, + "step": 351 + }, + { + "epoch": 0.12405286343612335, + "grad_norm": 1.228330868948225, + "learning_rate": 1.9992381588332043e-05, + "loss": 0.8643946647644043, + "step": 352 + }, + { + "epoch": 0.12440528634361234, + "grad_norm": 1.1468400313164093, + "learning_rate": 1.9992152505900067e-05, + "loss": 0.7691172361373901, + "step": 353 + }, + { + "epoch": 0.12475770925110131, + "grad_norm": 1.3198644948783926, + "learning_rate": 1.9991920031443288e-05, + "loss": 0.716686487197876, + "step": 354 + }, + { + "epoch": 0.12511013215859032, + "grad_norm": 1.39334404424432, + "learning_rate": 1.9991684165040616e-05, + "loss": 0.697482705116272, + "step": 355 + }, + { + "epoch": 0.12546255506607928, + "grad_norm": 1.5087579956634654, + "learning_rate": 1.999144490677212e-05, + "loss": 0.8039460182189941, + "step": 356 + }, + { + "epoch": 0.12581497797356828, + "grad_norm": 1.3206582875495743, + "learning_rate": 1.9991202256719032e-05, + "loss": 0.872138261795044, + "step": 357 + }, + { + "epoch": 0.12616740088105727, + "grad_norm": 1.330801420963485, + "learning_rate": 1.999095621496371e-05, + "loss": 0.8659502267837524, + "step": 358 + }, + { + "epoch": 0.12651982378854626, + "grad_norm": 1.2062023445068855, + "learning_rate": 1.9990706781589682e-05, + "loss": 0.7585660219192505, + "step": 359 + }, + { + "epoch": 0.12687224669603525, + "grad_norm": 1.349814688916852, + "learning_rate": 1.9990453956681626e-05, + "loss": 0.86381995677948, + "step": 360 + }, + { + "epoch": 0.12722466960352422, + "grad_norm": 1.3080210647965176, + "learning_rate": 1.9990197740325365e-05, + "loss": 0.7623461484909058, + "step": 361 + }, + { + "epoch": 0.1275770925110132, + "grad_norm": 1.4247026163468757, + "learning_rate": 1.9989938132607877e-05, + "loss": 0.8262917995452881, + "step": 362 + }, + { + "epoch": 0.1279295154185022, + "grad_norm": 1.3245955099655373, + "learning_rate": 1.9989675133617294e-05, + "loss": 0.7879630327224731, + "step": 363 + }, + { + "epoch": 0.1282819383259912, + "grad_norm": 1.5925116832241206, + "learning_rate": 1.9989408743442892e-05, + "loss": 0.8282565474510193, + "step": 364 + }, + { + "epoch": 0.12863436123348018, + "grad_norm": 1.151308483630064, + "learning_rate": 1.9989138962175105e-05, + "loss": 0.8358104228973389, + "step": 365 + }, + { + "epoch": 0.12898678414096917, + "grad_norm": 1.4831450607430074, + "learning_rate": 1.9988865789905513e-05, + "loss": 0.9111027121543884, + "step": 366 + }, + { + "epoch": 0.12933920704845814, + "grad_norm": 1.4181532995073547, + "learning_rate": 1.9988589226726847e-05, + "loss": 0.766915500164032, + "step": 367 + }, + { + "epoch": 0.12969162995594713, + "grad_norm": 1.3923253104774793, + "learning_rate": 1.9988309272733e-05, + "loss": 0.818048357963562, + "step": 368 + }, + { + "epoch": 0.13004405286343612, + "grad_norm": 1.2625645815303237, + "learning_rate": 1.9988025928019e-05, + "loss": 0.8188307285308838, + "step": 369 + }, + { + "epoch": 0.1303964757709251, + "grad_norm": 1.4656557007271924, + "learning_rate": 1.998773919268104e-05, + "loss": 0.88718181848526, + "step": 370 + }, + { + "epoch": 0.1307488986784141, + "grad_norm": 1.3104922660776017, + "learning_rate": 1.998744906681645e-05, + "loss": 0.9173898696899414, + "step": 371 + }, + { + "epoch": 0.1311013215859031, + "grad_norm": 1.4305544884130297, + "learning_rate": 1.9987155550523725e-05, + "loss": 0.8025110960006714, + "step": 372 + }, + { + "epoch": 0.13145374449339206, + "grad_norm": 1.2328392002659898, + "learning_rate": 1.9986858643902502e-05, + "loss": 0.8931341767311096, + "step": 373 + }, + { + "epoch": 0.13180616740088105, + "grad_norm": 1.258415234092876, + "learning_rate": 1.9986558347053574e-05, + "loss": 0.8813796043395996, + "step": 374 + }, + { + "epoch": 0.13215859030837004, + "grad_norm": 1.3254702068923054, + "learning_rate": 1.9986254660078877e-05, + "loss": 0.8021976947784424, + "step": 375 + }, + { + "epoch": 0.13251101321585904, + "grad_norm": 1.3001638136254743, + "learning_rate": 1.9985947583081506e-05, + "loss": 0.8083860874176025, + "step": 376 + }, + { + "epoch": 0.13286343612334803, + "grad_norm": 1.2519881014381842, + "learning_rate": 1.9985637116165705e-05, + "loss": 0.7639983296394348, + "step": 377 + }, + { + "epoch": 0.133215859030837, + "grad_norm": 1.3308962501940544, + "learning_rate": 1.9985323259436874e-05, + "loss": 0.7775800228118896, + "step": 378 + }, + { + "epoch": 0.13356828193832598, + "grad_norm": 1.3822704707659155, + "learning_rate": 1.9985006013001545e-05, + "loss": 0.8892228603363037, + "step": 379 + }, + { + "epoch": 0.13392070484581498, + "grad_norm": 1.4007373611969895, + "learning_rate": 1.998468537696742e-05, + "loss": 0.9158765077590942, + "step": 380 + }, + { + "epoch": 0.13427312775330397, + "grad_norm": 1.2142103786325267, + "learning_rate": 1.9984361351443343e-05, + "loss": 0.7523722648620605, + "step": 381 + }, + { + "epoch": 0.13462555066079296, + "grad_norm": 1.5406874167870075, + "learning_rate": 1.998403393653932e-05, + "loss": 0.8052740693092346, + "step": 382 + }, + { + "epoch": 0.13497797356828195, + "grad_norm": 4.839014305582762, + "learning_rate": 1.9983703132366484e-05, + "loss": 0.8271476626396179, + "step": 383 + }, + { + "epoch": 0.13533039647577091, + "grad_norm": 1.3724243356768093, + "learning_rate": 1.998336893903714e-05, + "loss": 0.8904454112052917, + "step": 384 + }, + { + "epoch": 0.1356828193832599, + "grad_norm": 1.5086695454887955, + "learning_rate": 1.9983031356664733e-05, + "loss": 0.8705847263336182, + "step": 385 + }, + { + "epoch": 0.1360352422907489, + "grad_norm": 1.3562221939291232, + "learning_rate": 1.9982690385363867e-05, + "loss": 0.8269569873809814, + "step": 386 + }, + { + "epoch": 0.1363876651982379, + "grad_norm": 1.6156870918588995, + "learning_rate": 1.998234602525029e-05, + "loss": 0.9796818494796753, + "step": 387 + }, + { + "epoch": 0.13674008810572688, + "grad_norm": 1.5268638185003427, + "learning_rate": 1.9981998276440892e-05, + "loss": 0.8276596665382385, + "step": 388 + }, + { + "epoch": 0.13709251101321585, + "grad_norm": 1.1979978409172833, + "learning_rate": 1.9981647139053737e-05, + "loss": 0.8739231824874878, + "step": 389 + }, + { + "epoch": 0.13744493392070484, + "grad_norm": 1.517970302113154, + "learning_rate": 1.9981292613208018e-05, + "loss": 0.677521824836731, + "step": 390 + }, + { + "epoch": 0.13779735682819383, + "grad_norm": 1.483399153515808, + "learning_rate": 1.9980934699024084e-05, + "loss": 0.744938313961029, + "step": 391 + }, + { + "epoch": 0.13814977973568282, + "grad_norm": 1.423178346498717, + "learning_rate": 1.998057339662344e-05, + "loss": 0.8367065787315369, + "step": 392 + }, + { + "epoch": 0.1385022026431718, + "grad_norm": 1.6714277386990386, + "learning_rate": 1.9980208706128733e-05, + "loss": 0.775547981262207, + "step": 393 + }, + { + "epoch": 0.1388546255506608, + "grad_norm": 1.244274379470138, + "learning_rate": 1.9979840627663764e-05, + "loss": 0.8287982940673828, + "step": 394 + }, + { + "epoch": 0.13920704845814977, + "grad_norm": 1.429588244120958, + "learning_rate": 1.997946916135349e-05, + "loss": 0.7582247257232666, + "step": 395 + }, + { + "epoch": 0.13955947136563876, + "grad_norm": 1.309709423857836, + "learning_rate": 1.997909430732401e-05, + "loss": 0.968267560005188, + "step": 396 + }, + { + "epoch": 0.13991189427312775, + "grad_norm": 1.4247483192434738, + "learning_rate": 1.9978716065702566e-05, + "loss": 0.8850257396697998, + "step": 397 + }, + { + "epoch": 0.14026431718061674, + "grad_norm": 1.1261344584223945, + "learning_rate": 1.9978334436617574e-05, + "loss": 0.7206246852874756, + "step": 398 + }, + { + "epoch": 0.14061674008810573, + "grad_norm": 1.2702546976441136, + "learning_rate": 1.9977949420198576e-05, + "loss": 0.7833065986633301, + "step": 399 + }, + { + "epoch": 0.14096916299559473, + "grad_norm": 1.2940706461552187, + "learning_rate": 1.9977561016576275e-05, + "loss": 0.7199673652648926, + "step": 400 + }, + { + "epoch": 0.1413215859030837, + "grad_norm": 1.3300807823897647, + "learning_rate": 1.9977169225882522e-05, + "loss": 0.7544811367988586, + "step": 401 + }, + { + "epoch": 0.14167400881057268, + "grad_norm": 1.3500860064281444, + "learning_rate": 1.9976774048250317e-05, + "loss": 0.7528219819068909, + "step": 402 + }, + { + "epoch": 0.14202643171806167, + "grad_norm": 1.230028309495833, + "learning_rate": 1.9976375483813814e-05, + "loss": 0.8025565147399902, + "step": 403 + }, + { + "epoch": 0.14237885462555067, + "grad_norm": 1.271700071603726, + "learning_rate": 1.997597353270831e-05, + "loss": 0.6553962230682373, + "step": 404 + }, + { + "epoch": 0.14273127753303966, + "grad_norm": 1.195900427449374, + "learning_rate": 1.9975568195070253e-05, + "loss": 0.7070015072822571, + "step": 405 + }, + { + "epoch": 0.14308370044052862, + "grad_norm": 1.238996854756085, + "learning_rate": 1.9975159471037247e-05, + "loss": 0.7454725503921509, + "step": 406 + }, + { + "epoch": 0.1434361233480176, + "grad_norm": 1.5517260528670263, + "learning_rate": 1.9974747360748038e-05, + "loss": 0.7074518799781799, + "step": 407 + }, + { + "epoch": 0.1437885462555066, + "grad_norm": 1.4240478656973132, + "learning_rate": 1.9974331864342527e-05, + "loss": 0.6870182752609253, + "step": 408 + }, + { + "epoch": 0.1441409691629956, + "grad_norm": 1.5514938206230895, + "learning_rate": 1.9973912981961763e-05, + "loss": 0.826898455619812, + "step": 409 + }, + { + "epoch": 0.1444933920704846, + "grad_norm": 1.483679538302774, + "learning_rate": 1.997349071374794e-05, + "loss": 0.7244436740875244, + "step": 410 + }, + { + "epoch": 0.14484581497797358, + "grad_norm": 1.2681717185328807, + "learning_rate": 1.9973065059844404e-05, + "loss": 0.6885448694229126, + "step": 411 + }, + { + "epoch": 0.14519823788546254, + "grad_norm": 1.3797417122455713, + "learning_rate": 1.9972636020395653e-05, + "loss": 0.8477644920349121, + "step": 412 + }, + { + "epoch": 0.14555066079295154, + "grad_norm": 1.5051840849568912, + "learning_rate": 1.9972203595547334e-05, + "loss": 0.9432111382484436, + "step": 413 + }, + { + "epoch": 0.14590308370044053, + "grad_norm": 1.351618505603555, + "learning_rate": 1.9971767785446243e-05, + "loss": 1.0101501941680908, + "step": 414 + }, + { + "epoch": 0.14625550660792952, + "grad_norm": 1.421926997117087, + "learning_rate": 1.997132859024032e-05, + "loss": 0.8174984455108643, + "step": 415 + }, + { + "epoch": 0.1466079295154185, + "grad_norm": 1.1573592385577054, + "learning_rate": 1.997088601007866e-05, + "loss": 0.6857198476791382, + "step": 416 + }, + { + "epoch": 0.14696035242290748, + "grad_norm": 1.1795540078822444, + "learning_rate": 1.9970440045111505e-05, + "loss": 0.7742792367935181, + "step": 417 + }, + { + "epoch": 0.14731277533039647, + "grad_norm": 1.783143700583216, + "learning_rate": 1.996999069549025e-05, + "loss": 0.7489269971847534, + "step": 418 + }, + { + "epoch": 0.14766519823788546, + "grad_norm": 1.4327273961807123, + "learning_rate": 1.9969537961367423e-05, + "loss": 0.7362021207809448, + "step": 419 + }, + { + "epoch": 0.14801762114537445, + "grad_norm": 1.3763810595433905, + "learning_rate": 1.996908184289673e-05, + "loss": 0.7596213221549988, + "step": 420 + }, + { + "epoch": 0.14837004405286344, + "grad_norm": 1.3357573192960268, + "learning_rate": 1.9968622340232993e-05, + "loss": 0.7739163637161255, + "step": 421 + }, + { + "epoch": 0.14872246696035243, + "grad_norm": 1.2890109075687697, + "learning_rate": 1.9968159453532215e-05, + "loss": 0.9059790372848511, + "step": 422 + }, + { + "epoch": 0.1490748898678414, + "grad_norm": 1.4830814966077062, + "learning_rate": 1.9967693182951516e-05, + "loss": 0.7298871278762817, + "step": 423 + }, + { + "epoch": 0.1494273127753304, + "grad_norm": 1.3303231094936145, + "learning_rate": 1.9967223528649194e-05, + "loss": 0.7218194007873535, + "step": 424 + }, + { + "epoch": 0.14977973568281938, + "grad_norm": 1.3738677080017252, + "learning_rate": 1.996675049078467e-05, + "loss": 0.8031259179115295, + "step": 425 + }, + { + "epoch": 0.15013215859030837, + "grad_norm": 1.402915539690338, + "learning_rate": 1.9966274069518533e-05, + "loss": 0.8583194613456726, + "step": 426 + }, + { + "epoch": 0.15048458149779737, + "grad_norm": 1.5081794718854693, + "learning_rate": 1.9965794265012514e-05, + "loss": 0.7829155921936035, + "step": 427 + }, + { + "epoch": 0.15083700440528636, + "grad_norm": 1.3040065928659967, + "learning_rate": 1.9965311077429484e-05, + "loss": 0.709203839302063, + "step": 428 + }, + { + "epoch": 0.15118942731277532, + "grad_norm": 1.324153309243564, + "learning_rate": 1.996482450693348e-05, + "loss": 0.7515710592269897, + "step": 429 + }, + { + "epoch": 0.1515418502202643, + "grad_norm": 1.5966034920450463, + "learning_rate": 1.9964334553689674e-05, + "loss": 0.8552615642547607, + "step": 430 + }, + { + "epoch": 0.1518942731277533, + "grad_norm": 1.3833039246024212, + "learning_rate": 1.9963841217864385e-05, + "loss": 0.7946224808692932, + "step": 431 + }, + { + "epoch": 0.1522466960352423, + "grad_norm": 1.351342046961, + "learning_rate": 1.9963344499625087e-05, + "loss": 0.7117756605148315, + "step": 432 + }, + { + "epoch": 0.1525991189427313, + "grad_norm": 1.5677032677150589, + "learning_rate": 1.9962844399140405e-05, + "loss": 0.8892849683761597, + "step": 433 + }, + { + "epoch": 0.15295154185022025, + "grad_norm": 1.6682742006947457, + "learning_rate": 1.9962340916580105e-05, + "loss": 0.9037783145904541, + "step": 434 + }, + { + "epoch": 0.15330396475770924, + "grad_norm": 1.3178590359087465, + "learning_rate": 1.9961834052115104e-05, + "loss": 0.7419179677963257, + "step": 435 + }, + { + "epoch": 0.15365638766519824, + "grad_norm": 1.500659178246394, + "learning_rate": 1.9961323805917464e-05, + "loss": 0.847285270690918, + "step": 436 + }, + { + "epoch": 0.15400881057268723, + "grad_norm": 1.520891708486689, + "learning_rate": 1.99608101781604e-05, + "loss": 0.793263852596283, + "step": 437 + }, + { + "epoch": 0.15436123348017622, + "grad_norm": 1.2927327484478677, + "learning_rate": 1.9960293169018276e-05, + "loss": 0.6600923538208008, + "step": 438 + }, + { + "epoch": 0.1547136563876652, + "grad_norm": 1.178823428760428, + "learning_rate": 1.9959772778666592e-05, + "loss": 0.7642164826393127, + "step": 439 + }, + { + "epoch": 0.15506607929515417, + "grad_norm": 1.4230767051116806, + "learning_rate": 1.995924900728201e-05, + "loss": 0.897221565246582, + "step": 440 + }, + { + "epoch": 0.15541850220264317, + "grad_norm": 1.3912415328195475, + "learning_rate": 1.9958721855042338e-05, + "loss": 0.830953061580658, + "step": 441 + }, + { + "epoch": 0.15577092511013216, + "grad_norm": 1.3683790024985447, + "learning_rate": 1.995819132212652e-05, + "loss": 0.7514863014221191, + "step": 442 + }, + { + "epoch": 0.15612334801762115, + "grad_norm": 1.3179910502987273, + "learning_rate": 1.995765740871466e-05, + "loss": 0.7039257287979126, + "step": 443 + }, + { + "epoch": 0.15647577092511014, + "grad_norm": 1.5017230130600239, + "learning_rate": 1.9957120114988e-05, + "loss": 0.810503363609314, + "step": 444 + }, + { + "epoch": 0.1568281938325991, + "grad_norm": 1.4050071397488821, + "learning_rate": 1.9956579441128942e-05, + "loss": 0.616968035697937, + "step": 445 + }, + { + "epoch": 0.1571806167400881, + "grad_norm": 1.3149075420166694, + "learning_rate": 1.9956035387321024e-05, + "loss": 0.7008740901947021, + "step": 446 + }, + { + "epoch": 0.1575330396475771, + "grad_norm": 1.4992101173925434, + "learning_rate": 1.995548795374893e-05, + "loss": 0.847025454044342, + "step": 447 + }, + { + "epoch": 0.15788546255506608, + "grad_norm": 1.3763555067673139, + "learning_rate": 1.9954937140598506e-05, + "loss": 0.7788053750991821, + "step": 448 + }, + { + "epoch": 0.15823788546255507, + "grad_norm": 1.301728118921247, + "learning_rate": 1.9954382948056735e-05, + "loss": 0.7592896819114685, + "step": 449 + }, + { + "epoch": 0.15859030837004406, + "grad_norm": 1.6001158206313053, + "learning_rate": 1.995382537631174e-05, + "loss": 0.9458491802215576, + "step": 450 + }, + { + "epoch": 0.15894273127753303, + "grad_norm": 1.3218132869761372, + "learning_rate": 1.9953264425552804e-05, + "loss": 0.8069632053375244, + "step": 451 + }, + { + "epoch": 0.15929515418502202, + "grad_norm": 1.316918406992957, + "learning_rate": 1.9952700095970357e-05, + "loss": 0.7876379489898682, + "step": 452 + }, + { + "epoch": 0.159647577092511, + "grad_norm": 1.5440089355741875, + "learning_rate": 1.9952132387755965e-05, + "loss": 0.796333909034729, + "step": 453 + }, + { + "epoch": 0.16, + "grad_norm": 1.243828269503452, + "learning_rate": 1.9951561301102348e-05, + "loss": 0.7171634435653687, + "step": 454 + }, + { + "epoch": 0.160352422907489, + "grad_norm": 1.429835470120866, + "learning_rate": 1.9950986836203374e-05, + "loss": 0.8312792778015137, + "step": 455 + }, + { + "epoch": 0.160704845814978, + "grad_norm": 1.4333167021702193, + "learning_rate": 1.995040899325406e-05, + "loss": 0.7496857643127441, + "step": 456 + }, + { + "epoch": 0.16105726872246695, + "grad_norm": 1.2513531381670333, + "learning_rate": 1.9949827772450555e-05, + "loss": 0.89504075050354, + "step": 457 + }, + { + "epoch": 0.16140969162995594, + "grad_norm": 1.5536951579594835, + "learning_rate": 1.9949243173990172e-05, + "loss": 0.7580761313438416, + "step": 458 + }, + { + "epoch": 0.16176211453744493, + "grad_norm": 1.6782383396512721, + "learning_rate": 1.9948655198071365e-05, + "loss": 0.7826676368713379, + "step": 459 + }, + { + "epoch": 0.16211453744493393, + "grad_norm": 1.5979456835427475, + "learning_rate": 1.9948063844893733e-05, + "loss": 0.7591372728347778, + "step": 460 + }, + { + "epoch": 0.16246696035242292, + "grad_norm": 1.394749193132719, + "learning_rate": 1.994746911465802e-05, + "loss": 0.7366905808448792, + "step": 461 + }, + { + "epoch": 0.16281938325991188, + "grad_norm": 1.2449236570155473, + "learning_rate": 1.9946871007566116e-05, + "loss": 0.7152266502380371, + "step": 462 + }, + { + "epoch": 0.16317180616740087, + "grad_norm": 1.475247855733958, + "learning_rate": 1.994626952382107e-05, + "loss": 0.8411930799484253, + "step": 463 + }, + { + "epoch": 0.16352422907488987, + "grad_norm": 1.1709525471997975, + "learning_rate": 1.9945664663627054e-05, + "loss": 0.6689857244491577, + "step": 464 + }, + { + "epoch": 0.16387665198237886, + "grad_norm": 1.3007920668059838, + "learning_rate": 1.9945056427189408e-05, + "loss": 0.6474499106407166, + "step": 465 + }, + { + "epoch": 0.16422907488986785, + "grad_norm": 1.397646475804827, + "learning_rate": 1.9944444814714604e-05, + "loss": 0.7861372232437134, + "step": 466 + }, + { + "epoch": 0.16458149779735684, + "grad_norm": 1.4072541980161448, + "learning_rate": 1.9943829826410273e-05, + "loss": 0.8301665186882019, + "step": 467 + }, + { + "epoch": 0.1649339207048458, + "grad_norm": 1.1473159016242473, + "learning_rate": 1.9943211462485176e-05, + "loss": 0.661811888217926, + "step": 468 + }, + { + "epoch": 0.1652863436123348, + "grad_norm": 1.4009911983471504, + "learning_rate": 1.9942589723149233e-05, + "loss": 0.7768537402153015, + "step": 469 + }, + { + "epoch": 0.1656387665198238, + "grad_norm": 1.209922489625636, + "learning_rate": 1.9941964608613503e-05, + "loss": 0.6139112710952759, + "step": 470 + }, + { + "epoch": 0.16599118942731278, + "grad_norm": 1.3814257371396368, + "learning_rate": 1.9941336119090193e-05, + "loss": 0.8284693956375122, + "step": 471 + }, + { + "epoch": 0.16634361233480177, + "grad_norm": 1.2594577624707568, + "learning_rate": 1.9940704254792655e-05, + "loss": 0.7281739711761475, + "step": 472 + }, + { + "epoch": 0.16669603524229074, + "grad_norm": 1.4773463672265492, + "learning_rate": 1.994006901593539e-05, + "loss": 0.687767744064331, + "step": 473 + }, + { + "epoch": 0.16704845814977973, + "grad_norm": 1.3067539084660165, + "learning_rate": 1.9939430402734046e-05, + "loss": 0.7553595304489136, + "step": 474 + }, + { + "epoch": 0.16740088105726872, + "grad_norm": 1.5537103296420662, + "learning_rate": 1.99387884154054e-05, + "loss": 0.9263294339179993, + "step": 475 + }, + { + "epoch": 0.1677533039647577, + "grad_norm": 1.5514792381885942, + "learning_rate": 1.9938143054167397e-05, + "loss": 0.7014337182044983, + "step": 476 + }, + { + "epoch": 0.1681057268722467, + "grad_norm": 1.1598559513797833, + "learning_rate": 1.9937494319239112e-05, + "loss": 0.6454538106918335, + "step": 477 + }, + { + "epoch": 0.1684581497797357, + "grad_norm": 1.3402764899565285, + "learning_rate": 1.9936842210840775e-05, + "loss": 0.7792352437973022, + "step": 478 + }, + { + "epoch": 0.16881057268722466, + "grad_norm": 1.481603380133959, + "learning_rate": 1.9936186729193753e-05, + "loss": 0.8773127794265747, + "step": 479 + }, + { + "epoch": 0.16916299559471365, + "grad_norm": 1.3472965431143242, + "learning_rate": 1.993552787452056e-05, + "loss": 0.892439603805542, + "step": 480 + }, + { + "epoch": 0.16951541850220264, + "grad_norm": 1.5839752051025837, + "learning_rate": 1.993486564704486e-05, + "loss": 0.89835524559021, + "step": 481 + }, + { + "epoch": 0.16986784140969163, + "grad_norm": 1.4593777249036533, + "learning_rate": 1.9934200046991453e-05, + "loss": 0.8013701438903809, + "step": 482 + }, + { + "epoch": 0.17022026431718063, + "grad_norm": 1.5168797838116639, + "learning_rate": 1.9933531074586296e-05, + "loss": 0.8086763620376587, + "step": 483 + }, + { + "epoch": 0.17057268722466962, + "grad_norm": 1.4399310447978144, + "learning_rate": 1.9932858730056486e-05, + "loss": 0.7736518383026123, + "step": 484 + }, + { + "epoch": 0.17092511013215858, + "grad_norm": 1.2982542574143365, + "learning_rate": 1.9932183013630257e-05, + "loss": 0.6247539520263672, + "step": 485 + }, + { + "epoch": 0.17127753303964757, + "grad_norm": 1.519445958865324, + "learning_rate": 1.9931503925536996e-05, + "loss": 0.7172006368637085, + "step": 486 + }, + { + "epoch": 0.17162995594713656, + "grad_norm": 1.3043787656359138, + "learning_rate": 1.993082146600723e-05, + "loss": 0.7854465246200562, + "step": 487 + }, + { + "epoch": 0.17198237885462556, + "grad_norm": 1.2038371426907561, + "learning_rate": 1.9930135635272637e-05, + "loss": 0.7018419504165649, + "step": 488 + }, + { + "epoch": 0.17233480176211455, + "grad_norm": 1.2578522146284077, + "learning_rate": 1.9929446433566033e-05, + "loss": 0.783660352230072, + "step": 489 + }, + { + "epoch": 0.1726872246696035, + "grad_norm": 1.4288043068768257, + "learning_rate": 1.992875386112138e-05, + "loss": 1.0166207551956177, + "step": 490 + }, + { + "epoch": 0.1730396475770925, + "grad_norm": 1.5208280960226344, + "learning_rate": 1.9928057918173786e-05, + "loss": 0.7692895531654358, + "step": 491 + }, + { + "epoch": 0.1733920704845815, + "grad_norm": 1.3733404774184526, + "learning_rate": 1.9927358604959503e-05, + "loss": 0.8005259037017822, + "step": 492 + }, + { + "epoch": 0.1737444933920705, + "grad_norm": 1.3189354109245792, + "learning_rate": 1.9926655921715924e-05, + "loss": 0.6780292987823486, + "step": 493 + }, + { + "epoch": 0.17409691629955948, + "grad_norm": 1.2272422506889333, + "learning_rate": 1.9925949868681587e-05, + "loss": 0.6501175165176392, + "step": 494 + }, + { + "epoch": 0.17444933920704847, + "grad_norm": 1.3095934443108421, + "learning_rate": 1.9925240446096176e-05, + "loss": 0.781839907169342, + "step": 495 + }, + { + "epoch": 0.17480176211453743, + "grad_norm": 1.4508599784840917, + "learning_rate": 1.992452765420052e-05, + "loss": 0.7617994546890259, + "step": 496 + }, + { + "epoch": 0.17515418502202643, + "grad_norm": 1.2324738440312524, + "learning_rate": 1.992381149323659e-05, + "loss": 0.8019097447395325, + "step": 497 + }, + { + "epoch": 0.17550660792951542, + "grad_norm": 1.3071824216187324, + "learning_rate": 1.9923091963447496e-05, + "loss": 0.7526847124099731, + "step": 498 + }, + { + "epoch": 0.1758590308370044, + "grad_norm": 1.340463358272731, + "learning_rate": 1.9922369065077497e-05, + "loss": 0.7101150751113892, + "step": 499 + }, + { + "epoch": 0.1762114537444934, + "grad_norm": 1.396850141714641, + "learning_rate": 1.9921642798372e-05, + "loss": 0.8519806861877441, + "step": 500 + }, + { + "epoch": 0.17656387665198237, + "grad_norm": 1.5427241760761283, + "learning_rate": 1.9920913163577542e-05, + "loss": 0.774759829044342, + "step": 501 + }, + { + "epoch": 0.17691629955947136, + "grad_norm": 1.4501760642130928, + "learning_rate": 1.992018016094182e-05, + "loss": 0.8597595691680908, + "step": 502 + }, + { + "epoch": 0.17726872246696035, + "grad_norm": 1.6336800938277667, + "learning_rate": 1.9919443790713658e-05, + "loss": 0.7023826241493225, + "step": 503 + }, + { + "epoch": 0.17762114537444934, + "grad_norm": 1.8758125980343456, + "learning_rate": 1.991870405314303e-05, + "loss": 0.8290892839431763, + "step": 504 + }, + { + "epoch": 0.17797356828193833, + "grad_norm": 1.368620384992611, + "learning_rate": 1.9917960948481062e-05, + "loss": 0.9240517020225525, + "step": 505 + }, + { + "epoch": 0.17832599118942732, + "grad_norm": 1.4203507781601712, + "learning_rate": 1.9917214476980012e-05, + "loss": 0.8247153759002686, + "step": 506 + }, + { + "epoch": 0.1786784140969163, + "grad_norm": 1.5364946844029868, + "learning_rate": 1.991646463889328e-05, + "loss": 0.9101368188858032, + "step": 507 + }, + { + "epoch": 0.17903083700440528, + "grad_norm": 1.3883082747026767, + "learning_rate": 1.9915711434475416e-05, + "loss": 0.7688114643096924, + "step": 508 + }, + { + "epoch": 0.17938325991189427, + "grad_norm": 1.41173691792053, + "learning_rate": 1.9914954863982106e-05, + "loss": 0.820112943649292, + "step": 509 + }, + { + "epoch": 0.17973568281938326, + "grad_norm": 1.2372115494246672, + "learning_rate": 1.9914194927670186e-05, + "loss": 0.6393542289733887, + "step": 510 + }, + { + "epoch": 0.18008810572687226, + "grad_norm": 1.5514274082803117, + "learning_rate": 1.991343162579763e-05, + "loss": 0.9463154673576355, + "step": 511 + }, + { + "epoch": 0.18044052863436125, + "grad_norm": 1.2818287593652882, + "learning_rate": 1.9912664958623556e-05, + "loss": 0.9498215913772583, + "step": 512 + }, + { + "epoch": 0.1807929515418502, + "grad_norm": 1.3538150363158374, + "learning_rate": 1.991189492640822e-05, + "loss": 0.7659052014350891, + "step": 513 + }, + { + "epoch": 0.1811453744493392, + "grad_norm": 1.3014303918670855, + "learning_rate": 1.9911121529413028e-05, + "loss": 0.9946317672729492, + "step": 514 + }, + { + "epoch": 0.1814977973568282, + "grad_norm": 1.2888096801517381, + "learning_rate": 1.991034476790052e-05, + "loss": 0.762086033821106, + "step": 515 + }, + { + "epoch": 0.18185022026431719, + "grad_norm": 1.2685969775930512, + "learning_rate": 1.990956464213438e-05, + "loss": 0.7507720589637756, + "step": 516 + }, + { + "epoch": 0.18220264317180618, + "grad_norm": 1.2567492686992259, + "learning_rate": 1.990878115237945e-05, + "loss": 0.7859716415405273, + "step": 517 + }, + { + "epoch": 0.18255506607929514, + "grad_norm": 1.3199744761398897, + "learning_rate": 1.9907994298901688e-05, + "loss": 0.8585234880447388, + "step": 518 + }, + { + "epoch": 0.18290748898678413, + "grad_norm": 1.2014345702103446, + "learning_rate": 1.990720408196821e-05, + "loss": 0.8569823503494263, + "step": 519 + }, + { + "epoch": 0.18325991189427313, + "grad_norm": 1.4066812868889107, + "learning_rate": 1.990641050184727e-05, + "loss": 0.8297367095947266, + "step": 520 + }, + { + "epoch": 0.18361233480176212, + "grad_norm": 1.4158335601181062, + "learning_rate": 1.9905613558808262e-05, + "loss": 0.7918041348457336, + "step": 521 + }, + { + "epoch": 0.1839647577092511, + "grad_norm": 1.3066639133280875, + "learning_rate": 1.9904813253121727e-05, + "loss": 0.8322931528091431, + "step": 522 + }, + { + "epoch": 0.1843171806167401, + "grad_norm": 1.600997340162295, + "learning_rate": 1.990400958505934e-05, + "loss": 0.6822292804718018, + "step": 523 + }, + { + "epoch": 0.18466960352422906, + "grad_norm": 1.344951810567012, + "learning_rate": 1.9903202554893925e-05, + "loss": 0.8989835977554321, + "step": 524 + }, + { + "epoch": 0.18502202643171806, + "grad_norm": 1.4644963211452282, + "learning_rate": 1.990239216289944e-05, + "loss": 0.671294093132019, + "step": 525 + }, + { + "epoch": 0.18537444933920705, + "grad_norm": 1.3104672306859468, + "learning_rate": 1.990157840935099e-05, + "loss": 0.9045379161834717, + "step": 526 + }, + { + "epoch": 0.18572687224669604, + "grad_norm": 1.2000125993399395, + "learning_rate": 1.990076129452482e-05, + "loss": 0.7117471694946289, + "step": 527 + }, + { + "epoch": 0.18607929515418503, + "grad_norm": 1.406356072194557, + "learning_rate": 1.9899940818698315e-05, + "loss": 0.890752911567688, + "step": 528 + }, + { + "epoch": 0.186431718061674, + "grad_norm": 1.3199977159633904, + "learning_rate": 1.9899116982149994e-05, + "loss": 0.7209222316741943, + "step": 529 + }, + { + "epoch": 0.186784140969163, + "grad_norm": 1.4346812218183875, + "learning_rate": 1.9898289785159534e-05, + "loss": 0.6912863254547119, + "step": 530 + }, + { + "epoch": 0.18713656387665198, + "grad_norm": 1.4271479463954384, + "learning_rate": 1.9897459228007736e-05, + "loss": 0.7060319185256958, + "step": 531 + }, + { + "epoch": 0.18748898678414097, + "grad_norm": 1.2685161281492263, + "learning_rate": 1.9896625310976553e-05, + "loss": 0.6975364685058594, + "step": 532 + }, + { + "epoch": 0.18784140969162996, + "grad_norm": 1.513572022269192, + "learning_rate": 1.989578803434907e-05, + "loss": 0.8576006293296814, + "step": 533 + }, + { + "epoch": 0.18819383259911895, + "grad_norm": 1.3324082350150075, + "learning_rate": 1.9894947398409516e-05, + "loss": 0.7182095646858215, + "step": 534 + }, + { + "epoch": 0.18854625550660792, + "grad_norm": 1.4808328503550712, + "learning_rate": 1.9894103403443265e-05, + "loss": 0.7546031475067139, + "step": 535 + }, + { + "epoch": 0.1888986784140969, + "grad_norm": 1.4412494852286755, + "learning_rate": 1.9893256049736824e-05, + "loss": 0.7083312273025513, + "step": 536 + }, + { + "epoch": 0.1892511013215859, + "grad_norm": 1.178231291011438, + "learning_rate": 1.9892405337577846e-05, + "loss": 0.5614915490150452, + "step": 537 + }, + { + "epoch": 0.1896035242290749, + "grad_norm": 1.440292679191453, + "learning_rate": 1.9891551267255114e-05, + "loss": 0.7647485733032227, + "step": 538 + }, + { + "epoch": 0.18995594713656389, + "grad_norm": 1.2459192275692494, + "learning_rate": 1.9890693839058566e-05, + "loss": 0.776042103767395, + "step": 539 + }, + { + "epoch": 0.19030837004405288, + "grad_norm": 1.4553672353845373, + "learning_rate": 1.9889833053279268e-05, + "loss": 0.7694810628890991, + "step": 540 + }, + { + "epoch": 0.19066079295154184, + "grad_norm": 1.3521577159056863, + "learning_rate": 1.9888968910209433e-05, + "loss": 0.6935995817184448, + "step": 541 + }, + { + "epoch": 0.19101321585903083, + "grad_norm": 1.7084028073476007, + "learning_rate": 1.988810141014241e-05, + "loss": 0.7538039088249207, + "step": 542 + }, + { + "epoch": 0.19136563876651982, + "grad_norm": 1.305324041919721, + "learning_rate": 1.9887230553372686e-05, + "loss": 0.8149158954620361, + "step": 543 + }, + { + "epoch": 0.19171806167400882, + "grad_norm": 1.6378712299065388, + "learning_rate": 1.988635634019589e-05, + "loss": 0.7776780128479004, + "step": 544 + }, + { + "epoch": 0.1920704845814978, + "grad_norm": 1.48919568324374, + "learning_rate": 1.9885478770908793e-05, + "loss": 0.8527307510375977, + "step": 545 + }, + { + "epoch": 0.19242290748898677, + "grad_norm": 1.1804269388923583, + "learning_rate": 1.98845978458093e-05, + "loss": 0.7239484190940857, + "step": 546 + }, + { + "epoch": 0.19277533039647576, + "grad_norm": 1.4096556213691402, + "learning_rate": 1.9883713565196462e-05, + "loss": 0.6937836408615112, + "step": 547 + }, + { + "epoch": 0.19312775330396476, + "grad_norm": 1.3705456771921078, + "learning_rate": 1.9882825929370456e-05, + "loss": 0.8567923903465271, + "step": 548 + }, + { + "epoch": 0.19348017621145375, + "grad_norm": 1.2851787163283013, + "learning_rate": 1.9881934938632615e-05, + "loss": 0.7948861122131348, + "step": 549 + }, + { + "epoch": 0.19383259911894274, + "grad_norm": 1.2883171824741761, + "learning_rate": 1.9881040593285398e-05, + "loss": 0.6808983087539673, + "step": 550 + }, + { + "epoch": 0.19418502202643173, + "grad_norm": 1.349988333670182, + "learning_rate": 1.9880142893632412e-05, + "loss": 0.9089908599853516, + "step": 551 + }, + { + "epoch": 0.1945374449339207, + "grad_norm": 1.373721278775904, + "learning_rate": 1.9879241839978393e-05, + "loss": 0.7947918176651001, + "step": 552 + }, + { + "epoch": 0.1948898678414097, + "grad_norm": 1.1679807856929723, + "learning_rate": 1.9878337432629224e-05, + "loss": 0.880418598651886, + "step": 553 + }, + { + "epoch": 0.19524229074889868, + "grad_norm": 1.4208435552970164, + "learning_rate": 1.9877429671891917e-05, + "loss": 0.8845832347869873, + "step": 554 + }, + { + "epoch": 0.19559471365638767, + "grad_norm": 1.4905396338040395, + "learning_rate": 1.9876518558074638e-05, + "loss": 0.7635341286659241, + "step": 555 + }, + { + "epoch": 0.19594713656387666, + "grad_norm": 1.3799865187677636, + "learning_rate": 1.9875604091486678e-05, + "loss": 0.9301069974899292, + "step": 556 + }, + { + "epoch": 0.19629955947136563, + "grad_norm": 1.255573262915276, + "learning_rate": 1.9874686272438467e-05, + "loss": 0.8788589239120483, + "step": 557 + }, + { + "epoch": 0.19665198237885462, + "grad_norm": 1.2418539833380446, + "learning_rate": 1.987376510124158e-05, + "loss": 0.7452565431594849, + "step": 558 + }, + { + "epoch": 0.1970044052863436, + "grad_norm": 1.5278788646328887, + "learning_rate": 1.9872840578208722e-05, + "loss": 0.819628119468689, + "step": 559 + }, + { + "epoch": 0.1973568281938326, + "grad_norm": 1.4844496784402743, + "learning_rate": 1.9871912703653744e-05, + "loss": 0.7807571291923523, + "step": 560 + }, + { + "epoch": 0.1977092511013216, + "grad_norm": 1.2611555469759475, + "learning_rate": 1.9870981477891626e-05, + "loss": 0.7091392278671265, + "step": 561 + }, + { + "epoch": 0.19806167400881058, + "grad_norm": 1.2433638561435678, + "learning_rate": 1.9870046901238496e-05, + "loss": 0.8174105882644653, + "step": 562 + }, + { + "epoch": 0.19841409691629955, + "grad_norm": 1.2352337461151273, + "learning_rate": 1.9869108974011607e-05, + "loss": 0.696865439414978, + "step": 563 + }, + { + "epoch": 0.19876651982378854, + "grad_norm": 1.4794543945089762, + "learning_rate": 1.986816769652936e-05, + "loss": 0.914303183555603, + "step": 564 + }, + { + "epoch": 0.19911894273127753, + "grad_norm": 1.2869835497381619, + "learning_rate": 1.986722306911129e-05, + "loss": 0.8397856950759888, + "step": 565 + }, + { + "epoch": 0.19947136563876652, + "grad_norm": 1.216082157504287, + "learning_rate": 1.9866275092078066e-05, + "loss": 0.7206380367279053, + "step": 566 + }, + { + "epoch": 0.19982378854625552, + "grad_norm": 1.3259146958291776, + "learning_rate": 1.98653237657515e-05, + "loss": 0.7017316818237305, + "step": 567 + }, + { + "epoch": 0.2001762114537445, + "grad_norm": 1.6365100661152858, + "learning_rate": 1.9864369090454538e-05, + "loss": 0.8797772526741028, + "step": 568 + }, + { + "epoch": 0.20052863436123347, + "grad_norm": 1.3948984288943356, + "learning_rate": 1.9863411066511257e-05, + "loss": 0.6643391847610474, + "step": 569 + }, + { + "epoch": 0.20088105726872246, + "grad_norm": 1.4631143705399865, + "learning_rate": 1.9862449694246878e-05, + "loss": 0.8662393093109131, + "step": 570 + }, + { + "epoch": 0.20123348017621145, + "grad_norm": 1.4103722629610054, + "learning_rate": 1.9861484973987762e-05, + "loss": 0.7766140699386597, + "step": 571 + }, + { + "epoch": 0.20158590308370045, + "grad_norm": 1.4422501075340284, + "learning_rate": 1.9860516906061397e-05, + "loss": 0.8582239151000977, + "step": 572 + }, + { + "epoch": 0.20193832599118944, + "grad_norm": 1.2359229208879663, + "learning_rate": 1.9859545490796414e-05, + "loss": 0.5838385820388794, + "step": 573 + }, + { + "epoch": 0.2022907488986784, + "grad_norm": 1.4256083108556754, + "learning_rate": 1.9858570728522573e-05, + "loss": 0.6715164184570312, + "step": 574 + }, + { + "epoch": 0.2026431718061674, + "grad_norm": 1.604413564730453, + "learning_rate": 1.9857592619570783e-05, + "loss": 0.7665218114852905, + "step": 575 + }, + { + "epoch": 0.20299559471365639, + "grad_norm": 1.3992633216102752, + "learning_rate": 1.985661116427308e-05, + "loss": 0.8060458898544312, + "step": 576 + }, + { + "epoch": 0.20334801762114538, + "grad_norm": 1.3647027340900928, + "learning_rate": 1.985562636296264e-05, + "loss": 0.8354060649871826, + "step": 577 + }, + { + "epoch": 0.20370044052863437, + "grad_norm": 1.61178503454425, + "learning_rate": 1.985463821597376e-05, + "loss": 0.8814351558685303, + "step": 578 + }, + { + "epoch": 0.20405286343612336, + "grad_norm": 1.3581614903846795, + "learning_rate": 1.9853646723641895e-05, + "loss": 0.9068918228149414, + "step": 579 + }, + { + "epoch": 0.20440528634361232, + "grad_norm": 1.4217775001953692, + "learning_rate": 1.9852651886303624e-05, + "loss": 0.7671997547149658, + "step": 580 + }, + { + "epoch": 0.20475770925110132, + "grad_norm": 1.2987191699893856, + "learning_rate": 1.9851653704296664e-05, + "loss": 0.7906886339187622, + "step": 581 + }, + { + "epoch": 0.2051101321585903, + "grad_norm": 1.4550942850887114, + "learning_rate": 1.985065217795987e-05, + "loss": 0.8424232006072998, + "step": 582 + }, + { + "epoch": 0.2054625550660793, + "grad_norm": 1.2767538498679667, + "learning_rate": 1.984964730763322e-05, + "loss": 0.8335819244384766, + "step": 583 + }, + { + "epoch": 0.2058149779735683, + "grad_norm": 1.2913652769028938, + "learning_rate": 1.9848639093657844e-05, + "loss": 0.8340694308280945, + "step": 584 + }, + { + "epoch": 0.20616740088105726, + "grad_norm": 1.3161255240413319, + "learning_rate": 1.9847627536376e-05, + "loss": 0.9228274822235107, + "step": 585 + }, + { + "epoch": 0.20651982378854625, + "grad_norm": 1.548405161064148, + "learning_rate": 1.984661263613107e-05, + "loss": 0.7843449115753174, + "step": 586 + }, + { + "epoch": 0.20687224669603524, + "grad_norm": 1.3039537503613003, + "learning_rate": 1.9845594393267594e-05, + "loss": 0.7411990165710449, + "step": 587 + }, + { + "epoch": 0.20722466960352423, + "grad_norm": 1.3644443695047568, + "learning_rate": 1.9844572808131228e-05, + "loss": 0.7520540356636047, + "step": 588 + }, + { + "epoch": 0.20757709251101322, + "grad_norm": 1.2894133104841217, + "learning_rate": 1.9843547881068763e-05, + "loss": 0.795365571975708, + "step": 589 + }, + { + "epoch": 0.20792951541850221, + "grad_norm": 1.280356655308606, + "learning_rate": 1.984251961242814e-05, + "loss": 0.8415528535842896, + "step": 590 + }, + { + "epoch": 0.20828193832599118, + "grad_norm": 1.4654647998731167, + "learning_rate": 1.9841488002558416e-05, + "loss": 0.8555570244789124, + "step": 591 + }, + { + "epoch": 0.20863436123348017, + "grad_norm": 1.314593410908928, + "learning_rate": 1.9840453051809792e-05, + "loss": 0.8214600086212158, + "step": 592 + }, + { + "epoch": 0.20898678414096916, + "grad_norm": 1.2598900623176714, + "learning_rate": 1.9839414760533607e-05, + "loss": 0.7746415138244629, + "step": 593 + }, + { + "epoch": 0.20933920704845815, + "grad_norm": 1.6285440778435663, + "learning_rate": 1.9838373129082325e-05, + "loss": 1.0861419439315796, + "step": 594 + }, + { + "epoch": 0.20969162995594715, + "grad_norm": 1.327372383451943, + "learning_rate": 1.9837328157809547e-05, + "loss": 0.7530953884124756, + "step": 595 + }, + { + "epoch": 0.21004405286343614, + "grad_norm": 1.420023169388647, + "learning_rate": 1.9836279847070004e-05, + "loss": 0.8811959624290466, + "step": 596 + }, + { + "epoch": 0.2103964757709251, + "grad_norm": 1.2274254083036087, + "learning_rate": 1.9835228197219573e-05, + "loss": 0.7956523299217224, + "step": 597 + }, + { + "epoch": 0.2107488986784141, + "grad_norm": 1.306015861681406, + "learning_rate": 1.9834173208615253e-05, + "loss": 0.8710414171218872, + "step": 598 + }, + { + "epoch": 0.21110132158590308, + "grad_norm": 1.303850147164254, + "learning_rate": 1.983311488161518e-05, + "loss": 0.9057297706604004, + "step": 599 + }, + { + "epoch": 0.21145374449339208, + "grad_norm": 1.2517049783711822, + "learning_rate": 1.983205321657862e-05, + "loss": 0.7531988024711609, + "step": 600 + }, + { + "epoch": 0.21180616740088107, + "grad_norm": 1.392455719061042, + "learning_rate": 1.983098821386598e-05, + "loss": 0.6508063077926636, + "step": 601 + }, + { + "epoch": 0.21215859030837003, + "grad_norm": 1.238668234857589, + "learning_rate": 1.9829919873838796e-05, + "loss": 0.7267025709152222, + "step": 602 + }, + { + "epoch": 0.21251101321585902, + "grad_norm": 1.2232739877442529, + "learning_rate": 1.9828848196859727e-05, + "loss": 0.6930510997772217, + "step": 603 + }, + { + "epoch": 0.21286343612334802, + "grad_norm": 1.4104259448916805, + "learning_rate": 1.9827773183292583e-05, + "loss": 0.7613120079040527, + "step": 604 + }, + { + "epoch": 0.213215859030837, + "grad_norm": 1.2586328753898472, + "learning_rate": 1.9826694833502295e-05, + "loss": 0.763299822807312, + "step": 605 + }, + { + "epoch": 0.213568281938326, + "grad_norm": 1.4431352363644856, + "learning_rate": 1.9825613147854928e-05, + "loss": 0.7599194049835205, + "step": 606 + }, + { + "epoch": 0.213920704845815, + "grad_norm": 1.3487971590690426, + "learning_rate": 1.9824528126717687e-05, + "loss": 0.869399905204773, + "step": 607 + }, + { + "epoch": 0.21427312775330395, + "grad_norm": 1.3853231700631432, + "learning_rate": 1.9823439770458893e-05, + "loss": 0.733409583568573, + "step": 608 + }, + { + "epoch": 0.21462555066079295, + "grad_norm": 1.2766333009964275, + "learning_rate": 1.9822348079448014e-05, + "loss": 0.8302386999130249, + "step": 609 + }, + { + "epoch": 0.21497797356828194, + "grad_norm": 1.1872454682531661, + "learning_rate": 1.9821253054055645e-05, + "loss": 0.8234561681747437, + "step": 610 + }, + { + "epoch": 0.21533039647577093, + "grad_norm": 1.336729476582052, + "learning_rate": 1.9820154694653514e-05, + "loss": 0.81988525390625, + "step": 611 + }, + { + "epoch": 0.21568281938325992, + "grad_norm": 1.1619766622665528, + "learning_rate": 1.9819053001614478e-05, + "loss": 0.6437678933143616, + "step": 612 + }, + { + "epoch": 0.21603524229074889, + "grad_norm": 1.398835884660331, + "learning_rate": 1.9817947975312527e-05, + "loss": 0.8256562948226929, + "step": 613 + }, + { + "epoch": 0.21638766519823788, + "grad_norm": 1.4423824320045469, + "learning_rate": 1.9816839616122787e-05, + "loss": 0.8204725980758667, + "step": 614 + }, + { + "epoch": 0.21674008810572687, + "grad_norm": 1.4648639859051293, + "learning_rate": 1.9815727924421507e-05, + "loss": 0.7492775917053223, + "step": 615 + }, + { + "epoch": 0.21709251101321586, + "grad_norm": 1.4585481343848268, + "learning_rate": 1.9814612900586075e-05, + "loss": 0.629736065864563, + "step": 616 + }, + { + "epoch": 0.21744493392070485, + "grad_norm": 1.3908853161597456, + "learning_rate": 1.9813494544995e-05, + "loss": 0.7974159717559814, + "step": 617 + }, + { + "epoch": 0.21779735682819384, + "grad_norm": 1.4158223772493663, + "learning_rate": 1.981237285802794e-05, + "loss": 0.8367668390274048, + "step": 618 + }, + { + "epoch": 0.2181497797356828, + "grad_norm": 1.2051753792883582, + "learning_rate": 1.9811247840065667e-05, + "loss": 0.7942521572113037, + "step": 619 + }, + { + "epoch": 0.2185022026431718, + "grad_norm": 1.3827277102573685, + "learning_rate": 1.981011949149009e-05, + "loss": 0.7863545417785645, + "step": 620 + }, + { + "epoch": 0.2188546255506608, + "grad_norm": 1.3809343727942922, + "learning_rate": 1.9808987812684247e-05, + "loss": 0.8667019605636597, + "step": 621 + }, + { + "epoch": 0.21920704845814978, + "grad_norm": 1.5738475739563456, + "learning_rate": 1.9807852804032306e-05, + "loss": 0.8555353283882141, + "step": 622 + }, + { + "epoch": 0.21955947136563878, + "grad_norm": 1.244926951925701, + "learning_rate": 1.9806714465919573e-05, + "loss": 0.8170013427734375, + "step": 623 + }, + { + "epoch": 0.21991189427312777, + "grad_norm": 1.331256668600172, + "learning_rate": 1.9805572798732475e-05, + "loss": 0.9277342557907104, + "step": 624 + }, + { + "epoch": 0.22026431718061673, + "grad_norm": 1.4090219105247375, + "learning_rate": 1.980442780285857e-05, + "loss": 0.6536964178085327, + "step": 625 + }, + { + "epoch": 0.22061674008810572, + "grad_norm": 1.4088256669280743, + "learning_rate": 1.980327947868655e-05, + "loss": 0.7197799682617188, + "step": 626 + }, + { + "epoch": 0.22096916299559471, + "grad_norm": 1.1381025512945977, + "learning_rate": 1.980212782660624e-05, + "loss": 0.7558401823043823, + "step": 627 + }, + { + "epoch": 0.2213215859030837, + "grad_norm": 1.4031284519802554, + "learning_rate": 1.9800972847008586e-05, + "loss": 0.7918291091918945, + "step": 628 + }, + { + "epoch": 0.2216740088105727, + "grad_norm": 1.4810910878326864, + "learning_rate": 1.979981454028567e-05, + "loss": 0.7159492373466492, + "step": 629 + }, + { + "epoch": 0.22202643171806166, + "grad_norm": 1.5418605472416471, + "learning_rate": 1.9798652906830694e-05, + "loss": 0.854686439037323, + "step": 630 + }, + { + "epoch": 0.22237885462555065, + "grad_norm": 1.6329149097762432, + "learning_rate": 1.9797487947038007e-05, + "loss": 0.736785888671875, + "step": 631 + }, + { + "epoch": 0.22273127753303965, + "grad_norm": 1.2749674694710476, + "learning_rate": 1.9796319661303065e-05, + "loss": 0.7092996835708618, + "step": 632 + }, + { + "epoch": 0.22308370044052864, + "grad_norm": 1.4592836621170417, + "learning_rate": 1.9795148050022477e-05, + "loss": 0.8890455961227417, + "step": 633 + }, + { + "epoch": 0.22343612334801763, + "grad_norm": 1.2618947600836363, + "learning_rate": 1.979397311359396e-05, + "loss": 0.7476855516433716, + "step": 634 + }, + { + "epoch": 0.22378854625550662, + "grad_norm": 1.4307363207113668, + "learning_rate": 1.979279485241637e-05, + "loss": 0.7810029983520508, + "step": 635 + }, + { + "epoch": 0.22414096916299558, + "grad_norm": 1.2070666788938549, + "learning_rate": 1.9791613266889688e-05, + "loss": 0.6679891347885132, + "step": 636 + }, + { + "epoch": 0.22449339207048458, + "grad_norm": 1.6320710320094325, + "learning_rate": 1.979042835741503e-05, + "loss": 0.809790849685669, + "step": 637 + }, + { + "epoch": 0.22484581497797357, + "grad_norm": 1.6737967848633384, + "learning_rate": 1.9789240124394638e-05, + "loss": 0.8347213268280029, + "step": 638 + }, + { + "epoch": 0.22519823788546256, + "grad_norm": 1.1935958187808327, + "learning_rate": 1.9788048568231875e-05, + "loss": 0.6620997190475464, + "step": 639 + }, + { + "epoch": 0.22555066079295155, + "grad_norm": 1.2898316066784317, + "learning_rate": 1.9786853689331235e-05, + "loss": 0.7727694511413574, + "step": 640 + }, + { + "epoch": 0.22590308370044052, + "grad_norm": 1.2854878709867101, + "learning_rate": 1.9785655488098348e-05, + "loss": 0.7433278560638428, + "step": 641 + }, + { + "epoch": 0.2262555066079295, + "grad_norm": 1.3523753090224933, + "learning_rate": 1.9784453964939966e-05, + "loss": 0.7375571727752686, + "step": 642 + }, + { + "epoch": 0.2266079295154185, + "grad_norm": 1.3285668366741343, + "learning_rate": 1.9783249120263962e-05, + "loss": 0.5838407874107361, + "step": 643 + }, + { + "epoch": 0.2269603524229075, + "grad_norm": 1.3906475095958148, + "learning_rate": 1.978204095447935e-05, + "loss": 0.7120088934898376, + "step": 644 + }, + { + "epoch": 0.22731277533039648, + "grad_norm": 1.5058740006044322, + "learning_rate": 1.9780829467996262e-05, + "loss": 0.7668102383613586, + "step": 645 + }, + { + "epoch": 0.22766519823788547, + "grad_norm": 1.2993959173766831, + "learning_rate": 1.977961466122596e-05, + "loss": 0.748942494392395, + "step": 646 + }, + { + "epoch": 0.22801762114537444, + "grad_norm": 1.476253609353715, + "learning_rate": 1.9778396534580836e-05, + "loss": 0.7569374442100525, + "step": 647 + }, + { + "epoch": 0.22837004405286343, + "grad_norm": 1.352884217242173, + "learning_rate": 1.97771750884744e-05, + "loss": 0.7981363534927368, + "step": 648 + }, + { + "epoch": 0.22872246696035242, + "grad_norm": 1.5069792289976334, + "learning_rate": 1.97759503233213e-05, + "loss": 0.7501301765441895, + "step": 649 + }, + { + "epoch": 0.2290748898678414, + "grad_norm": 1.4079968546467614, + "learning_rate": 1.9774722239537305e-05, + "loss": 0.7880003452301025, + "step": 650 + }, + { + "epoch": 0.2294273127753304, + "grad_norm": 1.3141024886679253, + "learning_rate": 1.977349083753931e-05, + "loss": 0.9007930755615234, + "step": 651 + }, + { + "epoch": 0.2297797356828194, + "grad_norm": 1.1634171776911992, + "learning_rate": 1.9772256117745335e-05, + "loss": 0.6291126012802124, + "step": 652 + }, + { + "epoch": 0.23013215859030836, + "grad_norm": 1.1487631323898542, + "learning_rate": 1.9771018080574534e-05, + "loss": 0.8155031204223633, + "step": 653 + }, + { + "epoch": 0.23048458149779735, + "grad_norm": 1.2941785819245946, + "learning_rate": 1.976977672644718e-05, + "loss": 0.7103240489959717, + "step": 654 + }, + { + "epoch": 0.23083700440528634, + "grad_norm": 1.4170836267106273, + "learning_rate": 1.9768532055784678e-05, + "loss": 0.8590278625488281, + "step": 655 + }, + { + "epoch": 0.23118942731277534, + "grad_norm": 1.6156852038452685, + "learning_rate": 1.9767284069009545e-05, + "loss": 0.7729001641273499, + "step": 656 + }, + { + "epoch": 0.23154185022026433, + "grad_norm": 1.543950265697803, + "learning_rate": 1.9766032766545445e-05, + "loss": 0.8287409543991089, + "step": 657 + }, + { + "epoch": 0.2318942731277533, + "grad_norm": 1.327581925526745, + "learning_rate": 1.9764778148817147e-05, + "loss": 0.8651477098464966, + "step": 658 + }, + { + "epoch": 0.23224669603524228, + "grad_norm": 1.3954780395501065, + "learning_rate": 1.976352021625056e-05, + "loss": 0.7582576274871826, + "step": 659 + }, + { + "epoch": 0.23259911894273128, + "grad_norm": 1.2510605377459358, + "learning_rate": 1.976225896927271e-05, + "loss": 0.6579675078392029, + "step": 660 + }, + { + "epoch": 0.23295154185022027, + "grad_norm": 1.4130234326235036, + "learning_rate": 1.9760994408311757e-05, + "loss": 0.8817700147628784, + "step": 661 + }, + { + "epoch": 0.23330396475770926, + "grad_norm": 1.3799441341137708, + "learning_rate": 1.9759726533796976e-05, + "loss": 0.7241606712341309, + "step": 662 + }, + { + "epoch": 0.23365638766519825, + "grad_norm": 1.2880787484904483, + "learning_rate": 1.9758455346158768e-05, + "loss": 0.7434183359146118, + "step": 663 + }, + { + "epoch": 0.23400881057268721, + "grad_norm": 1.3406860649308125, + "learning_rate": 1.9757180845828663e-05, + "loss": 0.632422685623169, + "step": 664 + }, + { + "epoch": 0.2343612334801762, + "grad_norm": 1.394213400542702, + "learning_rate": 1.9755903033239318e-05, + "loss": 0.7276040315628052, + "step": 665 + }, + { + "epoch": 0.2347136563876652, + "grad_norm": 1.4191729622512466, + "learning_rate": 1.975462190882451e-05, + "loss": 0.8070325255393982, + "step": 666 + }, + { + "epoch": 0.2350660792951542, + "grad_norm": 1.505939347053283, + "learning_rate": 1.9753337473019133e-05, + "loss": 0.867915689945221, + "step": 667 + }, + { + "epoch": 0.23541850220264318, + "grad_norm": 1.2080841146883634, + "learning_rate": 1.9752049726259223e-05, + "loss": 0.7905307412147522, + "step": 668 + }, + { + "epoch": 0.23577092511013215, + "grad_norm": 1.3166867899458456, + "learning_rate": 1.9750758668981925e-05, + "loss": 0.7721420526504517, + "step": 669 + }, + { + "epoch": 0.23612334801762114, + "grad_norm": 1.3746426458674128, + "learning_rate": 1.9749464301625515e-05, + "loss": 0.7926005125045776, + "step": 670 + }, + { + "epoch": 0.23647577092511013, + "grad_norm": 1.387001164209418, + "learning_rate": 1.974816662462939e-05, + "loss": 0.7651785612106323, + "step": 671 + }, + { + "epoch": 0.23682819383259912, + "grad_norm": 1.3285492717471519, + "learning_rate": 1.974686563843407e-05, + "loss": 0.7548795938491821, + "step": 672 + }, + { + "epoch": 0.2371806167400881, + "grad_norm": 1.256836928643264, + "learning_rate": 1.9745561343481197e-05, + "loss": 0.5405399799346924, + "step": 673 + }, + { + "epoch": 0.2375330396475771, + "grad_norm": 1.429166434081011, + "learning_rate": 1.9744253740213542e-05, + "loss": 0.7561137080192566, + "step": 674 + }, + { + "epoch": 0.23788546255506607, + "grad_norm": 1.2880562459402407, + "learning_rate": 1.9742942829074993e-05, + "loss": 0.8809534907341003, + "step": 675 + }, + { + "epoch": 0.23823788546255506, + "grad_norm": 1.4170174919214424, + "learning_rate": 1.974162861051057e-05, + "loss": 0.750350832939148, + "step": 676 + }, + { + "epoch": 0.23859030837004405, + "grad_norm": 1.629083058939835, + "learning_rate": 1.9740311084966398e-05, + "loss": 0.89476478099823, + "step": 677 + }, + { + "epoch": 0.23894273127753304, + "grad_norm": 1.2576348651951754, + "learning_rate": 1.9738990252889748e-05, + "loss": 0.8647176027297974, + "step": 678 + }, + { + "epoch": 0.23929515418502204, + "grad_norm": 1.4086313229573832, + "learning_rate": 1.9737666114728996e-05, + "loss": 0.7331727743148804, + "step": 679 + }, + { + "epoch": 0.239647577092511, + "grad_norm": 1.471872239566745, + "learning_rate": 1.9736338670933642e-05, + "loss": 0.7714364528656006, + "step": 680 + }, + { + "epoch": 0.24, + "grad_norm": 1.2246586432486557, + "learning_rate": 1.973500792195432e-05, + "loss": 0.7840908765792847, + "step": 681 + }, + { + "epoch": 0.24035242290748898, + "grad_norm": 1.5714154435783916, + "learning_rate": 1.9733673868242767e-05, + "loss": 0.8723878860473633, + "step": 682 + }, + { + "epoch": 0.24070484581497797, + "grad_norm": 1.3325473695906174, + "learning_rate": 1.9732336510251864e-05, + "loss": 0.782090425491333, + "step": 683 + }, + { + "epoch": 0.24105726872246697, + "grad_norm": 1.4114017797446734, + "learning_rate": 1.9730995848435594e-05, + "loss": 0.8000990152359009, + "step": 684 + }, + { + "epoch": 0.24140969162995596, + "grad_norm": 1.2098442989857856, + "learning_rate": 1.9729651883249075e-05, + "loss": 0.7499237060546875, + "step": 685 + }, + { + "epoch": 0.24176211453744492, + "grad_norm": 1.376086425817015, + "learning_rate": 1.972830461514854e-05, + "loss": 0.8786858916282654, + "step": 686 + }, + { + "epoch": 0.2421145374449339, + "grad_norm": 1.2058295584451697, + "learning_rate": 1.972695404459134e-05, + "loss": 0.7039557695388794, + "step": 687 + }, + { + "epoch": 0.2424669603524229, + "grad_norm": 1.2391412724176054, + "learning_rate": 1.9725600172035962e-05, + "loss": 0.6699448823928833, + "step": 688 + }, + { + "epoch": 0.2428193832599119, + "grad_norm": 1.4984585662906706, + "learning_rate": 1.9724242997941995e-05, + "loss": 0.6753977537155151, + "step": 689 + }, + { + "epoch": 0.2431718061674009, + "grad_norm": 1.465232022987203, + "learning_rate": 1.9722882522770163e-05, + "loss": 0.7139854431152344, + "step": 690 + }, + { + "epoch": 0.24352422907488988, + "grad_norm": 1.2814158831499989, + "learning_rate": 1.9721518746982296e-05, + "loss": 0.7894896864891052, + "step": 691 + }, + { + "epoch": 0.24387665198237884, + "grad_norm": 1.2615077213285395, + "learning_rate": 1.972015167104136e-05, + "loss": 0.5663755536079407, + "step": 692 + }, + { + "epoch": 0.24422907488986784, + "grad_norm": 2.058599574246893, + "learning_rate": 1.971878129541144e-05, + "loss": 0.8607856035232544, + "step": 693 + }, + { + "epoch": 0.24458149779735683, + "grad_norm": 1.351791839280567, + "learning_rate": 1.9717407620557724e-05, + "loss": 0.7384383678436279, + "step": 694 + }, + { + "epoch": 0.24493392070484582, + "grad_norm": 1.3580988060863546, + "learning_rate": 1.971603064694654e-05, + "loss": 0.6145502328872681, + "step": 695 + }, + { + "epoch": 0.2452863436123348, + "grad_norm": 1.216736398001555, + "learning_rate": 1.9714650375045328e-05, + "loss": 0.6758620738983154, + "step": 696 + }, + { + "epoch": 0.24563876651982378, + "grad_norm": 1.4471588548341505, + "learning_rate": 1.9713266805322643e-05, + "loss": 0.7416598200798035, + "step": 697 + }, + { + "epoch": 0.24599118942731277, + "grad_norm": 1.5476710427855191, + "learning_rate": 1.9711879938248163e-05, + "loss": 0.7603555917739868, + "step": 698 + }, + { + "epoch": 0.24634361233480176, + "grad_norm": 1.442293220466076, + "learning_rate": 1.9710489774292692e-05, + "loss": 0.9119949340820312, + "step": 699 + }, + { + "epoch": 0.24669603524229075, + "grad_norm": 1.3843099449438452, + "learning_rate": 1.9709096313928144e-05, + "loss": 0.6884537935256958, + "step": 700 + }, + { + "epoch": 0.24704845814977974, + "grad_norm": 1.618333940643818, + "learning_rate": 1.9707699557627554e-05, + "loss": 0.7928721904754639, + "step": 701 + }, + { + "epoch": 0.24740088105726873, + "grad_norm": 1.593414442103489, + "learning_rate": 1.970629950586508e-05, + "loss": 0.888218104839325, + "step": 702 + }, + { + "epoch": 0.2477533039647577, + "grad_norm": 1.484965940613647, + "learning_rate": 1.9704896159115997e-05, + "loss": 0.7949875593185425, + "step": 703 + }, + { + "epoch": 0.2481057268722467, + "grad_norm": 1.5094809465076762, + "learning_rate": 1.970348951785669e-05, + "loss": 0.9031823873519897, + "step": 704 + }, + { + "epoch": 0.24845814977973568, + "grad_norm": 1.4099687182713576, + "learning_rate": 1.9702079582564682e-05, + "loss": 0.636865496635437, + "step": 705 + }, + { + "epoch": 0.24881057268722467, + "grad_norm": 1.5392719282626255, + "learning_rate": 1.9700666353718593e-05, + "loss": 0.731717586517334, + "step": 706 + }, + { + "epoch": 0.24916299559471367, + "grad_norm": 1.5878589631749256, + "learning_rate": 1.9699249831798172e-05, + "loss": 0.7571220397949219, + "step": 707 + }, + { + "epoch": 0.24951541850220263, + "grad_norm": 1.5180992539956903, + "learning_rate": 1.969783001728429e-05, + "loss": 0.6112762689590454, + "step": 708 + }, + { + "epoch": 0.24986784140969162, + "grad_norm": 1.3651864060041954, + "learning_rate": 1.9696406910658918e-05, + "loss": 0.6737902164459229, + "step": 709 + }, + { + "epoch": 0.25022026431718064, + "grad_norm": 1.328645038543607, + "learning_rate": 1.9694980512405167e-05, + "loss": 0.6525848507881165, + "step": 710 + }, + { + "epoch": 0.2505726872246696, + "grad_norm": 1.302186292631501, + "learning_rate": 1.9693550823007248e-05, + "loss": 0.9107403755187988, + "step": 711 + }, + { + "epoch": 0.25092511013215857, + "grad_norm": 1.5423262639437814, + "learning_rate": 1.96921178429505e-05, + "loss": 0.7373934984207153, + "step": 712 + }, + { + "epoch": 0.25127753303964756, + "grad_norm": 1.4043304459804222, + "learning_rate": 1.9690681572721377e-05, + "loss": 0.6383399963378906, + "step": 713 + }, + { + "epoch": 0.25162995594713655, + "grad_norm": 1.3203935888344693, + "learning_rate": 1.9689242012807442e-05, + "loss": 0.6600236296653748, + "step": 714 + }, + { + "epoch": 0.25198237885462554, + "grad_norm": 1.6489156261044324, + "learning_rate": 1.9687799163697386e-05, + "loss": 0.9195891618728638, + "step": 715 + }, + { + "epoch": 0.25233480176211454, + "grad_norm": 1.300868905936819, + "learning_rate": 1.968635302588101e-05, + "loss": 0.7122433185577393, + "step": 716 + }, + { + "epoch": 0.2526872246696035, + "grad_norm": 1.467731789065586, + "learning_rate": 1.968490359984923e-05, + "loss": 0.7601606845855713, + "step": 717 + }, + { + "epoch": 0.2530396475770925, + "grad_norm": 1.2967441771844141, + "learning_rate": 1.9683450886094087e-05, + "loss": 0.8216352462768555, + "step": 718 + }, + { + "epoch": 0.2533920704845815, + "grad_norm": 1.4134852768930402, + "learning_rate": 1.9681994885108727e-05, + "loss": 0.8783165216445923, + "step": 719 + }, + { + "epoch": 0.2537444933920705, + "grad_norm": 1.5566095938184208, + "learning_rate": 1.9680535597387416e-05, + "loss": 0.7323269844055176, + "step": 720 + }, + { + "epoch": 0.2540969162995595, + "grad_norm": 1.6250423495927373, + "learning_rate": 1.9679073023425542e-05, + "loss": 0.93906170129776, + "step": 721 + }, + { + "epoch": 0.25444933920704843, + "grad_norm": 1.3857164700730882, + "learning_rate": 1.96776071637196e-05, + "loss": 0.774397611618042, + "step": 722 + }, + { + "epoch": 0.2548017621145374, + "grad_norm": 1.3653604324598565, + "learning_rate": 1.9676138018767204e-05, + "loss": 0.6634535789489746, + "step": 723 + }, + { + "epoch": 0.2551541850220264, + "grad_norm": 1.3364894441034205, + "learning_rate": 1.9674665589067082e-05, + "loss": 0.7705625295639038, + "step": 724 + }, + { + "epoch": 0.2555066079295154, + "grad_norm": 1.5708708799323368, + "learning_rate": 1.9673189875119082e-05, + "loss": 0.706364631652832, + "step": 725 + }, + { + "epoch": 0.2558590308370044, + "grad_norm": 1.2599963014034798, + "learning_rate": 1.9671710877424158e-05, + "loss": 0.7295894622802734, + "step": 726 + }, + { + "epoch": 0.2562114537444934, + "grad_norm": 1.6926806599843667, + "learning_rate": 1.9670228596484383e-05, + "loss": 0.8135089874267578, + "step": 727 + }, + { + "epoch": 0.2565638766519824, + "grad_norm": 1.5978181657651334, + "learning_rate": 1.966874303280295e-05, + "loss": 0.801734209060669, + "step": 728 + }, + { + "epoch": 0.2569162995594714, + "grad_norm": 1.728546952239603, + "learning_rate": 1.9667254186884164e-05, + "loss": 0.8405104875564575, + "step": 729 + }, + { + "epoch": 0.25726872246696036, + "grad_norm": 1.2523029350782668, + "learning_rate": 1.9665762059233434e-05, + "loss": 0.8320014476776123, + "step": 730 + }, + { + "epoch": 0.25762114537444936, + "grad_norm": 1.2667340666882572, + "learning_rate": 1.96642666503573e-05, + "loss": 0.8701308965682983, + "step": 731 + }, + { + "epoch": 0.25797356828193835, + "grad_norm": 1.1982399130470203, + "learning_rate": 1.9662767960763394e-05, + "loss": 0.7980693578720093, + "step": 732 + }, + { + "epoch": 0.25832599118942734, + "grad_norm": 1.3765503313855298, + "learning_rate": 1.9661265990960486e-05, + "loss": 0.7258214950561523, + "step": 733 + }, + { + "epoch": 0.2586784140969163, + "grad_norm": 1.1683887680739682, + "learning_rate": 1.9659760741458444e-05, + "loss": 0.6860172748565674, + "step": 734 + }, + { + "epoch": 0.25903083700440527, + "grad_norm": 1.4034749748766104, + "learning_rate": 1.9658252212768252e-05, + "loss": 0.7438071370124817, + "step": 735 + }, + { + "epoch": 0.25938325991189426, + "grad_norm": 1.6140837506314978, + "learning_rate": 1.9656740405402007e-05, + "loss": 0.8680309057235718, + "step": 736 + }, + { + "epoch": 0.25973568281938325, + "grad_norm": 1.5365221656010954, + "learning_rate": 1.9655225319872925e-05, + "loss": 0.933163046836853, + "step": 737 + }, + { + "epoch": 0.26008810572687224, + "grad_norm": 1.3636194628802456, + "learning_rate": 1.9653706956695333e-05, + "loss": 0.8746597170829773, + "step": 738 + }, + { + "epoch": 0.26044052863436123, + "grad_norm": 1.31799671460777, + "learning_rate": 1.965218531638466e-05, + "loss": 0.857211709022522, + "step": 739 + }, + { + "epoch": 0.2607929515418502, + "grad_norm": 1.313241643085953, + "learning_rate": 1.965066039945746e-05, + "loss": 0.7837733030319214, + "step": 740 + }, + { + "epoch": 0.2611453744493392, + "grad_norm": 1.3527479757495662, + "learning_rate": 1.9649132206431395e-05, + "loss": 0.8401491641998291, + "step": 741 + }, + { + "epoch": 0.2614977973568282, + "grad_norm": 1.14302378839197, + "learning_rate": 1.9647600737825235e-05, + "loss": 0.7070307731628418, + "step": 742 + }, + { + "epoch": 0.2618502202643172, + "grad_norm": 1.756317003631787, + "learning_rate": 1.9646065994158873e-05, + "loss": 0.7649509310722351, + "step": 743 + }, + { + "epoch": 0.2622026431718062, + "grad_norm": 1.5152987231460182, + "learning_rate": 1.9644527975953302e-05, + "loss": 0.7759182453155518, + "step": 744 + }, + { + "epoch": 0.26255506607929513, + "grad_norm": 1.5151017458848213, + "learning_rate": 1.9642986683730626e-05, + "loss": 0.8176295757293701, + "step": 745 + }, + { + "epoch": 0.2629074889867841, + "grad_norm": 1.2974538182792636, + "learning_rate": 1.9641442118014078e-05, + "loss": 0.8406162261962891, + "step": 746 + }, + { + "epoch": 0.2632599118942731, + "grad_norm": 1.3410871141615202, + "learning_rate": 1.9639894279327985e-05, + "loss": 0.8064795732498169, + "step": 747 + }, + { + "epoch": 0.2636123348017621, + "grad_norm": 1.2769637989850176, + "learning_rate": 1.9638343168197784e-05, + "loss": 0.6662956476211548, + "step": 748 + }, + { + "epoch": 0.2639647577092511, + "grad_norm": 1.5105008685571195, + "learning_rate": 1.9636788785150037e-05, + "loss": 0.8747783899307251, + "step": 749 + }, + { + "epoch": 0.2643171806167401, + "grad_norm": 1.4261291763421449, + "learning_rate": 1.9635231130712406e-05, + "loss": 0.7893349528312683, + "step": 750 + }, + { + "epoch": 0.2646696035242291, + "grad_norm": 1.2907133964100823, + "learning_rate": 1.9633670205413665e-05, + "loss": 0.7380903959274292, + "step": 751 + }, + { + "epoch": 0.26502202643171807, + "grad_norm": 1.5293000163357584, + "learning_rate": 1.96321060097837e-05, + "loss": 0.9164873957633972, + "step": 752 + }, + { + "epoch": 0.26537444933920706, + "grad_norm": 1.5448314355627197, + "learning_rate": 1.9630538544353505e-05, + "loss": 0.7664264440536499, + "step": 753 + }, + { + "epoch": 0.26572687224669606, + "grad_norm": 1.4037068281656377, + "learning_rate": 1.9628967809655187e-05, + "loss": 0.8117275238037109, + "step": 754 + }, + { + "epoch": 0.26607929515418505, + "grad_norm": 1.3044642797371147, + "learning_rate": 1.9627393806221967e-05, + "loss": 0.6203808784484863, + "step": 755 + }, + { + "epoch": 0.266431718061674, + "grad_norm": 1.5462507455011187, + "learning_rate": 1.9625816534588163e-05, + "loss": 0.8777878284454346, + "step": 756 + }, + { + "epoch": 0.266784140969163, + "grad_norm": 1.2883365910622429, + "learning_rate": 1.9624235995289212e-05, + "loss": 0.6984438300132751, + "step": 757 + }, + { + "epoch": 0.26713656387665197, + "grad_norm": 1.5746997664717406, + "learning_rate": 1.962265218886166e-05, + "loss": 0.7806228399276733, + "step": 758 + }, + { + "epoch": 0.26748898678414096, + "grad_norm": 1.318579751564355, + "learning_rate": 1.9621065115843155e-05, + "loss": 0.6924373507499695, + "step": 759 + }, + { + "epoch": 0.26784140969162995, + "grad_norm": 1.2867883287922122, + "learning_rate": 1.9619474776772462e-05, + "loss": 0.6809841394424438, + "step": 760 + }, + { + "epoch": 0.26819383259911894, + "grad_norm": 1.3766475304418688, + "learning_rate": 1.961788117218945e-05, + "loss": 0.8346723318099976, + "step": 761 + }, + { + "epoch": 0.26854625550660793, + "grad_norm": 1.3717126814625271, + "learning_rate": 1.96162843026351e-05, + "loss": 0.8000205755233765, + "step": 762 + }, + { + "epoch": 0.2688986784140969, + "grad_norm": 1.256040752163899, + "learning_rate": 1.9614684168651504e-05, + "loss": 0.8026692271232605, + "step": 763 + }, + { + "epoch": 0.2692511013215859, + "grad_norm": 1.4850412299335856, + "learning_rate": 1.961308077078185e-05, + "loss": 0.921292781829834, + "step": 764 + }, + { + "epoch": 0.2696035242290749, + "grad_norm": 1.6577133714061814, + "learning_rate": 1.9611474109570446e-05, + "loss": 0.8018487095832825, + "step": 765 + }, + { + "epoch": 0.2699559471365639, + "grad_norm": 1.2440268554728864, + "learning_rate": 1.9609864185562698e-05, + "loss": 0.7400588989257812, + "step": 766 + }, + { + "epoch": 0.27030837004405284, + "grad_norm": 1.2094824954459686, + "learning_rate": 1.960825099930513e-05, + "loss": 0.6243399977684021, + "step": 767 + }, + { + "epoch": 0.27066079295154183, + "grad_norm": 1.415024134390762, + "learning_rate": 1.9606634551345373e-05, + "loss": 0.7680903673171997, + "step": 768 + }, + { + "epoch": 0.2710132158590308, + "grad_norm": 1.3126349106428246, + "learning_rate": 1.960501484223215e-05, + "loss": 0.8783930540084839, + "step": 769 + }, + { + "epoch": 0.2713656387665198, + "grad_norm": 1.4964756858010921, + "learning_rate": 1.9603391872515308e-05, + "loss": 0.7910561561584473, + "step": 770 + }, + { + "epoch": 0.2717180616740088, + "grad_norm": 1.4400527227532898, + "learning_rate": 1.9601765642745795e-05, + "loss": 0.7325295209884644, + "step": 771 + }, + { + "epoch": 0.2720704845814978, + "grad_norm": 1.3018158119605838, + "learning_rate": 1.9600136153475666e-05, + "loss": 0.7017170190811157, + "step": 772 + }, + { + "epoch": 0.2724229074889868, + "grad_norm": 1.5395904311410002, + "learning_rate": 1.959850340525808e-05, + "loss": 0.9281908273696899, + "step": 773 + }, + { + "epoch": 0.2727753303964758, + "grad_norm": 1.256408104414643, + "learning_rate": 1.95968673986473e-05, + "loss": 0.7421029806137085, + "step": 774 + }, + { + "epoch": 0.27312775330396477, + "grad_norm": 1.3171523536350294, + "learning_rate": 1.9595228134198708e-05, + "loss": 0.7474848031997681, + "step": 775 + }, + { + "epoch": 0.27348017621145376, + "grad_norm": 1.3683438241049553, + "learning_rate": 1.9593585612468776e-05, + "loss": 0.7267760038375854, + "step": 776 + }, + { + "epoch": 0.27383259911894275, + "grad_norm": 1.4883233103137832, + "learning_rate": 1.9591939834015096e-05, + "loss": 0.739683985710144, + "step": 777 + }, + { + "epoch": 0.2741850220264317, + "grad_norm": 1.2437408403604437, + "learning_rate": 1.9590290799396353e-05, + "loss": 0.6615399122238159, + "step": 778 + }, + { + "epoch": 0.2745374449339207, + "grad_norm": 1.5863201035209105, + "learning_rate": 1.9588638509172343e-05, + "loss": 0.8045977354049683, + "step": 779 + }, + { + "epoch": 0.2748898678414097, + "grad_norm": 1.5522608295626732, + "learning_rate": 1.958698296390397e-05, + "loss": 0.8760169744491577, + "step": 780 + }, + { + "epoch": 0.27524229074889867, + "grad_norm": 1.5297761597873432, + "learning_rate": 1.9585324164153236e-05, + "loss": 0.6676662564277649, + "step": 781 + }, + { + "epoch": 0.27559471365638766, + "grad_norm": 1.1706549585314092, + "learning_rate": 1.958366211048326e-05, + "loss": 0.6650630235671997, + "step": 782 + }, + { + "epoch": 0.27594713656387665, + "grad_norm": 1.157826702613003, + "learning_rate": 1.9581996803458248e-05, + "loss": 0.7399466037750244, + "step": 783 + }, + { + "epoch": 0.27629955947136564, + "grad_norm": 1.316772401506473, + "learning_rate": 1.9580328243643528e-05, + "loss": 0.6121753454208374, + "step": 784 + }, + { + "epoch": 0.27665198237885463, + "grad_norm": 1.334268754223845, + "learning_rate": 1.9578656431605515e-05, + "loss": 0.8562870025634766, + "step": 785 + }, + { + "epoch": 0.2770044052863436, + "grad_norm": 1.441421130314338, + "learning_rate": 1.9576981367911746e-05, + "loss": 0.717842161655426, + "step": 786 + }, + { + "epoch": 0.2773568281938326, + "grad_norm": 1.582720247126145, + "learning_rate": 1.9575303053130847e-05, + "loss": 0.802294135093689, + "step": 787 + }, + { + "epoch": 0.2777092511013216, + "grad_norm": 1.2234390313515955, + "learning_rate": 1.957362148783256e-05, + "loss": 0.6636664867401123, + "step": 788 + }, + { + "epoch": 0.2780616740088106, + "grad_norm": 1.3850288575091645, + "learning_rate": 1.9571936672587718e-05, + "loss": 0.7177780866622925, + "step": 789 + }, + { + "epoch": 0.27841409691629954, + "grad_norm": 1.6181532263095582, + "learning_rate": 1.957024860796826e-05, + "loss": 0.8263623714447021, + "step": 790 + }, + { + "epoch": 0.27876651982378853, + "grad_norm": 1.4470113515398348, + "learning_rate": 1.9568557294547244e-05, + "loss": 0.7620534896850586, + "step": 791 + }, + { + "epoch": 0.2791189427312775, + "grad_norm": 1.5101791429302596, + "learning_rate": 1.956686273289881e-05, + "loss": 0.812814474105835, + "step": 792 + }, + { + "epoch": 0.2794713656387665, + "grad_norm": 1.2812553609430264, + "learning_rate": 1.956516492359821e-05, + "loss": 0.6494747400283813, + "step": 793 + }, + { + "epoch": 0.2798237885462555, + "grad_norm": 1.3413487769011592, + "learning_rate": 1.9563463867221793e-05, + "loss": 0.7152044773101807, + "step": 794 + }, + { + "epoch": 0.2801762114537445, + "grad_norm": 1.5718962936709213, + "learning_rate": 1.956175956434702e-05, + "loss": 0.7607219815254211, + "step": 795 + }, + { + "epoch": 0.2805286343612335, + "grad_norm": 1.4483911078118432, + "learning_rate": 1.9560052015552455e-05, + "loss": 0.8793845176696777, + "step": 796 + }, + { + "epoch": 0.2808810572687225, + "grad_norm": 1.509282266928049, + "learning_rate": 1.9558341221417744e-05, + "loss": 0.8314816951751709, + "step": 797 + }, + { + "epoch": 0.28123348017621147, + "grad_norm": 1.2634078342185056, + "learning_rate": 1.9556627182523656e-05, + "loss": 0.8195264339447021, + "step": 798 + }, + { + "epoch": 0.28158590308370046, + "grad_norm": 1.2808718319688324, + "learning_rate": 1.9554909899452055e-05, + "loss": 0.8079999685287476, + "step": 799 + }, + { + "epoch": 0.28193832599118945, + "grad_norm": 1.6867283155032318, + "learning_rate": 1.9553189372785903e-05, + "loss": 0.7614034414291382, + "step": 800 + }, + { + "epoch": 0.2822907488986784, + "grad_norm": 1.13179227916607, + "learning_rate": 1.9551465603109263e-05, + "loss": 0.6271458268165588, + "step": 801 + }, + { + "epoch": 0.2826431718061674, + "grad_norm": 1.5872328004173855, + "learning_rate": 1.9549738591007302e-05, + "loss": 0.8061915040016174, + "step": 802 + }, + { + "epoch": 0.2829955947136564, + "grad_norm": 1.3494758196376249, + "learning_rate": 1.9548008337066294e-05, + "loss": 0.663912296295166, + "step": 803 + }, + { + "epoch": 0.28334801762114536, + "grad_norm": 1.503624867364233, + "learning_rate": 1.9546274841873597e-05, + "loss": 0.7582170963287354, + "step": 804 + }, + { + "epoch": 0.28370044052863436, + "grad_norm": 1.3181032025931252, + "learning_rate": 1.9544538106017682e-05, + "loss": 0.7855465412139893, + "step": 805 + }, + { + "epoch": 0.28405286343612335, + "grad_norm": 1.4462567272335825, + "learning_rate": 1.9542798130088116e-05, + "loss": 0.6976481676101685, + "step": 806 + }, + { + "epoch": 0.28440528634361234, + "grad_norm": 1.8291330426153005, + "learning_rate": 1.954105491467557e-05, + "loss": 0.7678342461585999, + "step": 807 + }, + { + "epoch": 0.28475770925110133, + "grad_norm": 1.2407582020259869, + "learning_rate": 1.9539308460371812e-05, + "loss": 0.6238858699798584, + "step": 808 + }, + { + "epoch": 0.2851101321585903, + "grad_norm": 1.239785831064125, + "learning_rate": 1.95375587677697e-05, + "loss": 0.7756681442260742, + "step": 809 + }, + { + "epoch": 0.2854625550660793, + "grad_norm": 1.462836813794646, + "learning_rate": 1.953580583746321e-05, + "loss": 0.8908202648162842, + "step": 810 + }, + { + "epoch": 0.2858149779735683, + "grad_norm": 1.1795831445984086, + "learning_rate": 1.9534049670047402e-05, + "loss": 0.6769838929176331, + "step": 811 + }, + { + "epoch": 0.28616740088105724, + "grad_norm": 1.2674126564024601, + "learning_rate": 1.953229026611844e-05, + "loss": 0.8452527523040771, + "step": 812 + }, + { + "epoch": 0.28651982378854624, + "grad_norm": 1.1830287106246784, + "learning_rate": 1.9530527626273592e-05, + "loss": 0.7494348287582397, + "step": 813 + }, + { + "epoch": 0.2868722466960352, + "grad_norm": 1.399665317775642, + "learning_rate": 1.9528761751111215e-05, + "loss": 0.7691028714179993, + "step": 814 + }, + { + "epoch": 0.2872246696035242, + "grad_norm": 1.2077153417445161, + "learning_rate": 1.9526992641230768e-05, + "loss": 0.6854703426361084, + "step": 815 + }, + { + "epoch": 0.2875770925110132, + "grad_norm": 1.4832887577463363, + "learning_rate": 1.9525220297232815e-05, + "loss": 0.7520424127578735, + "step": 816 + }, + { + "epoch": 0.2879295154185022, + "grad_norm": 1.499896401145914, + "learning_rate": 1.9523444719719003e-05, + "loss": 0.7894444465637207, + "step": 817 + }, + { + "epoch": 0.2882819383259912, + "grad_norm": 1.4246285223246848, + "learning_rate": 1.952166590929209e-05, + "loss": 0.7835032939910889, + "step": 818 + }, + { + "epoch": 0.2886343612334802, + "grad_norm": 1.4284322886298129, + "learning_rate": 1.9519883866555928e-05, + "loss": 0.7932062149047852, + "step": 819 + }, + { + "epoch": 0.2889867841409692, + "grad_norm": 1.3689283839888975, + "learning_rate": 1.951809859211546e-05, + "loss": 0.7917006015777588, + "step": 820 + }, + { + "epoch": 0.28933920704845817, + "grad_norm": 1.1579342690806749, + "learning_rate": 1.9516310086576734e-05, + "loss": 0.5330606698989868, + "step": 821 + }, + { + "epoch": 0.28969162995594716, + "grad_norm": 1.3284680872988386, + "learning_rate": 1.9514518350546893e-05, + "loss": 0.7243788242340088, + "step": 822 + }, + { + "epoch": 0.2900440528634361, + "grad_norm": 1.5494348453743318, + "learning_rate": 1.9512723384634175e-05, + "loss": 0.7692278623580933, + "step": 823 + }, + { + "epoch": 0.2903964757709251, + "grad_norm": 1.4784711521599085, + "learning_rate": 1.9510925189447916e-05, + "loss": 0.7537804841995239, + "step": 824 + }, + { + "epoch": 0.2907488986784141, + "grad_norm": 1.6724318756605505, + "learning_rate": 1.9509123765598545e-05, + "loss": 0.9168751239776611, + "step": 825 + }, + { + "epoch": 0.2911013215859031, + "grad_norm": 1.3269710873120673, + "learning_rate": 1.9507319113697592e-05, + "loss": 0.7863682508468628, + "step": 826 + }, + { + "epoch": 0.29145374449339206, + "grad_norm": 1.3629585622585614, + "learning_rate": 1.9505511234357677e-05, + "loss": 0.7119239568710327, + "step": 827 + }, + { + "epoch": 0.29180616740088106, + "grad_norm": 1.3362093903256012, + "learning_rate": 1.950370012819252e-05, + "loss": 0.6071019172668457, + "step": 828 + }, + { + "epoch": 0.29215859030837005, + "grad_norm": 1.479432309492857, + "learning_rate": 1.9501885795816937e-05, + "loss": 0.9750580787658691, + "step": 829 + }, + { + "epoch": 0.29251101321585904, + "grad_norm": 1.3797663030734688, + "learning_rate": 1.9500068237846837e-05, + "loss": 0.7465370297431946, + "step": 830 + }, + { + "epoch": 0.29286343612334803, + "grad_norm": 1.3385246660479724, + "learning_rate": 1.949824745489922e-05, + "loss": 0.7821183204650879, + "step": 831 + }, + { + "epoch": 0.293215859030837, + "grad_norm": 1.4671979426722186, + "learning_rate": 1.949642344759219e-05, + "loss": 0.7555009126663208, + "step": 832 + }, + { + "epoch": 0.293568281938326, + "grad_norm": 1.4661056896012497, + "learning_rate": 1.9494596216544942e-05, + "loss": 0.841058075428009, + "step": 833 + }, + { + "epoch": 0.29392070484581495, + "grad_norm": 1.4108089015351581, + "learning_rate": 1.9492765762377762e-05, + "loss": 0.737910270690918, + "step": 834 + }, + { + "epoch": 0.29427312775330394, + "grad_norm": 1.430769526790491, + "learning_rate": 1.9490932085712027e-05, + "loss": 0.6817367076873779, + "step": 835 + }, + { + "epoch": 0.29462555066079293, + "grad_norm": 1.4513808156166068, + "learning_rate": 1.9489095187170218e-05, + "loss": 0.6739218235015869, + "step": 836 + }, + { + "epoch": 0.2949779735682819, + "grad_norm": 1.3318980758968664, + "learning_rate": 1.9487255067375907e-05, + "loss": 0.8632504940032959, + "step": 837 + }, + { + "epoch": 0.2953303964757709, + "grad_norm": 1.4421404890889282, + "learning_rate": 1.9485411726953753e-05, + "loss": 0.6615850925445557, + "step": 838 + }, + { + "epoch": 0.2956828193832599, + "grad_norm": 1.5159755088266125, + "learning_rate": 1.9483565166529515e-05, + "loss": 0.8647087812423706, + "step": 839 + }, + { + "epoch": 0.2960352422907489, + "grad_norm": 1.0418453596772383, + "learning_rate": 1.9481715386730044e-05, + "loss": 0.5152087807655334, + "step": 840 + }, + { + "epoch": 0.2963876651982379, + "grad_norm": 1.265898832717726, + "learning_rate": 1.9479862388183283e-05, + "loss": 0.7942806482315063, + "step": 841 + }, + { + "epoch": 0.2967400881057269, + "grad_norm": 1.4728087242398047, + "learning_rate": 1.947800617151826e-05, + "loss": 0.6364283561706543, + "step": 842 + }, + { + "epoch": 0.2970925110132159, + "grad_norm": 1.322764194763318, + "learning_rate": 1.9476146737365112e-05, + "loss": 0.8278179168701172, + "step": 843 + }, + { + "epoch": 0.29744493392070487, + "grad_norm": 1.4629329991948483, + "learning_rate": 1.9474284086355057e-05, + "loss": 0.7369956970214844, + "step": 844 + }, + { + "epoch": 0.29779735682819386, + "grad_norm": 1.314820640789224, + "learning_rate": 1.9472418219120403e-05, + "loss": 0.6879928112030029, + "step": 845 + }, + { + "epoch": 0.2981497797356828, + "grad_norm": 1.357319714737328, + "learning_rate": 1.9470549136294554e-05, + "loss": 0.8312973976135254, + "step": 846 + }, + { + "epoch": 0.2985022026431718, + "grad_norm": 1.3920317025034097, + "learning_rate": 1.946867683851201e-05, + "loss": 0.8102964162826538, + "step": 847 + }, + { + "epoch": 0.2988546255506608, + "grad_norm": 1.502648854525568, + "learning_rate": 1.9466801326408355e-05, + "loss": 0.6136792898178101, + "step": 848 + }, + { + "epoch": 0.29920704845814977, + "grad_norm": 1.4028028409959845, + "learning_rate": 1.946492260062027e-05, + "loss": 0.6388760805130005, + "step": 849 + }, + { + "epoch": 0.29955947136563876, + "grad_norm": 1.2376129930975477, + "learning_rate": 1.9463040661785516e-05, + "loss": 0.6443628072738647, + "step": 850 + }, + { + "epoch": 0.29991189427312775, + "grad_norm": 1.4811436209967876, + "learning_rate": 1.9461155510542962e-05, + "loss": 0.7763667702674866, + "step": 851 + }, + { + "epoch": 0.30026431718061675, + "grad_norm": 1.3770267697185403, + "learning_rate": 1.9459267147532555e-05, + "loss": 0.8040921688079834, + "step": 852 + }, + { + "epoch": 0.30061674008810574, + "grad_norm": 1.5848801035694915, + "learning_rate": 1.9457375573395334e-05, + "loss": 0.6271079778671265, + "step": 853 + }, + { + "epoch": 0.30096916299559473, + "grad_norm": 1.4490523944014555, + "learning_rate": 1.945548078877343e-05, + "loss": 0.6970022916793823, + "step": 854 + }, + { + "epoch": 0.3013215859030837, + "grad_norm": 1.5989299247861681, + "learning_rate": 1.9453582794310063e-05, + "loss": 0.8283002972602844, + "step": 855 + }, + { + "epoch": 0.3016740088105727, + "grad_norm": 1.3183590658260465, + "learning_rate": 1.9451681590649545e-05, + "loss": 0.7989551424980164, + "step": 856 + }, + { + "epoch": 0.30202643171806165, + "grad_norm": 1.6791390781024222, + "learning_rate": 1.9449777178437274e-05, + "loss": 0.7000687122344971, + "step": 857 + }, + { + "epoch": 0.30237885462555064, + "grad_norm": 1.8022925697135672, + "learning_rate": 1.944786955831974e-05, + "loss": 0.8005126714706421, + "step": 858 + }, + { + "epoch": 0.30273127753303963, + "grad_norm": 1.4999207234472591, + "learning_rate": 1.9445958730944515e-05, + "loss": 0.7060712575912476, + "step": 859 + }, + { + "epoch": 0.3030837004405286, + "grad_norm": 1.4072429406012825, + "learning_rate": 1.9444044696960277e-05, + "loss": 0.6979726552963257, + "step": 860 + }, + { + "epoch": 0.3034361233480176, + "grad_norm": 1.4515999764557612, + "learning_rate": 1.9442127457016768e-05, + "loss": 0.7916465401649475, + "step": 861 + }, + { + "epoch": 0.3037885462555066, + "grad_norm": 1.1660322947657744, + "learning_rate": 1.944020701176484e-05, + "loss": 0.6980502009391785, + "step": 862 + }, + { + "epoch": 0.3041409691629956, + "grad_norm": 1.7481448087127538, + "learning_rate": 1.943828336185642e-05, + "loss": 0.8479218482971191, + "step": 863 + }, + { + "epoch": 0.3044933920704846, + "grad_norm": 1.6916771358958562, + "learning_rate": 1.9436356507944532e-05, + "loss": 0.8374297022819519, + "step": 864 + }, + { + "epoch": 0.3048458149779736, + "grad_norm": 1.3059238044039985, + "learning_rate": 1.943442645068328e-05, + "loss": 0.6871248483657837, + "step": 865 + }, + { + "epoch": 0.3051982378854626, + "grad_norm": 1.4668202087885096, + "learning_rate": 1.9432493190727854e-05, + "loss": 0.92267906665802, + "step": 866 + }, + { + "epoch": 0.30555066079295157, + "grad_norm": 1.7147503992363287, + "learning_rate": 1.9430556728734543e-05, + "loss": 0.7068654298782349, + "step": 867 + }, + { + "epoch": 0.3059030837004405, + "grad_norm": 1.354783765213683, + "learning_rate": 1.942861706536071e-05, + "loss": 0.830272912979126, + "step": 868 + }, + { + "epoch": 0.3062555066079295, + "grad_norm": 1.5223972366721212, + "learning_rate": 1.9426674201264814e-05, + "loss": 0.7996113300323486, + "step": 869 + }, + { + "epoch": 0.3066079295154185, + "grad_norm": 1.7576483396811688, + "learning_rate": 1.9424728137106398e-05, + "loss": 0.7519441843032837, + "step": 870 + }, + { + "epoch": 0.3069603524229075, + "grad_norm": 1.92300313533063, + "learning_rate": 1.9422778873546084e-05, + "loss": 0.5812790393829346, + "step": 871 + }, + { + "epoch": 0.30731277533039647, + "grad_norm": 1.058437605318741, + "learning_rate": 1.9420826411245595e-05, + "loss": 0.5953323841094971, + "step": 872 + }, + { + "epoch": 0.30766519823788546, + "grad_norm": 1.3954802825469448, + "learning_rate": 1.941887075086772e-05, + "loss": 0.8307937979698181, + "step": 873 + }, + { + "epoch": 0.30801762114537445, + "grad_norm": 1.5122839417773277, + "learning_rate": 1.9416911893076358e-05, + "loss": 0.7753443121910095, + "step": 874 + }, + { + "epoch": 0.30837004405286345, + "grad_norm": 1.3848386830658772, + "learning_rate": 1.9414949838536468e-05, + "loss": 0.8803520798683167, + "step": 875 + }, + { + "epoch": 0.30872246696035244, + "grad_norm": 1.3111930018969615, + "learning_rate": 1.9412984587914115e-05, + "loss": 0.6811587810516357, + "step": 876 + }, + { + "epoch": 0.30907488986784143, + "grad_norm": 1.3880932208512609, + "learning_rate": 1.9411016141876438e-05, + "loss": 0.802099347114563, + "step": 877 + }, + { + "epoch": 0.3094273127753304, + "grad_norm": 1.560285458084049, + "learning_rate": 1.940904450109166e-05, + "loss": 0.7325229644775391, + "step": 878 + }, + { + "epoch": 0.30977973568281936, + "grad_norm": 1.5126812875374416, + "learning_rate": 1.9407069666229097e-05, + "loss": 0.6515973210334778, + "step": 879 + }, + { + "epoch": 0.31013215859030835, + "grad_norm": 1.2990709527675965, + "learning_rate": 1.9405091637959138e-05, + "loss": 0.7314589619636536, + "step": 880 + }, + { + "epoch": 0.31048458149779734, + "grad_norm": 1.2146229290292494, + "learning_rate": 1.9403110416953267e-05, + "loss": 0.6668078303337097, + "step": 881 + }, + { + "epoch": 0.31083700440528633, + "grad_norm": 1.4214853985415763, + "learning_rate": 1.9401126003884047e-05, + "loss": 0.693236231803894, + "step": 882 + }, + { + "epoch": 0.3111894273127753, + "grad_norm": 2.210010730425174, + "learning_rate": 1.939913839942512e-05, + "loss": 0.8242754936218262, + "step": 883 + }, + { + "epoch": 0.3115418502202643, + "grad_norm": 1.4121001226290237, + "learning_rate": 1.939714760425122e-05, + "loss": 0.7776592373847961, + "step": 884 + }, + { + "epoch": 0.3118942731277533, + "grad_norm": 1.6297557283357365, + "learning_rate": 1.9395153619038158e-05, + "loss": 0.7023555636405945, + "step": 885 + }, + { + "epoch": 0.3122466960352423, + "grad_norm": 1.333512905730993, + "learning_rate": 1.939315644446283e-05, + "loss": 0.690382182598114, + "step": 886 + }, + { + "epoch": 0.3125991189427313, + "grad_norm": 1.4632969046362096, + "learning_rate": 1.9391156081203214e-05, + "loss": 0.7590082287788391, + "step": 887 + }, + { + "epoch": 0.3129515418502203, + "grad_norm": 1.3672878296080273, + "learning_rate": 1.9389152529938377e-05, + "loss": 0.7378168702125549, + "step": 888 + }, + { + "epoch": 0.3133039647577093, + "grad_norm": 1.3616414763479574, + "learning_rate": 1.938714579134845e-05, + "loss": 0.7036890983581543, + "step": 889 + }, + { + "epoch": 0.3136563876651982, + "grad_norm": 1.4808362954559244, + "learning_rate": 1.938513586611467e-05, + "loss": 0.8881829977035522, + "step": 890 + }, + { + "epoch": 0.3140088105726872, + "grad_norm": 1.5370313355999317, + "learning_rate": 1.9383122754919342e-05, + "loss": 0.7467600107192993, + "step": 891 + }, + { + "epoch": 0.3143612334801762, + "grad_norm": 1.6168044285318155, + "learning_rate": 1.938110645844585e-05, + "loss": 0.9358077049255371, + "step": 892 + }, + { + "epoch": 0.3147136563876652, + "grad_norm": 1.3982581442164577, + "learning_rate": 1.9379086977378664e-05, + "loss": 0.7751256227493286, + "step": 893 + }, + { + "epoch": 0.3150660792951542, + "grad_norm": 1.3071717433837386, + "learning_rate": 1.9377064312403338e-05, + "loss": 0.8020666837692261, + "step": 894 + }, + { + "epoch": 0.31541850220264317, + "grad_norm": 1.2076526617304193, + "learning_rate": 1.9375038464206507e-05, + "loss": 0.7251513004302979, + "step": 895 + }, + { + "epoch": 0.31577092511013216, + "grad_norm": 1.3323484110232422, + "learning_rate": 1.9373009433475874e-05, + "loss": 0.7163990139961243, + "step": 896 + }, + { + "epoch": 0.31612334801762115, + "grad_norm": 1.7368098259899396, + "learning_rate": 1.937097722090024e-05, + "loss": 0.7208842039108276, + "step": 897 + }, + { + "epoch": 0.31647577092511014, + "grad_norm": 1.3890083085574685, + "learning_rate": 1.9368941827169475e-05, + "loss": 0.7660849690437317, + "step": 898 + }, + { + "epoch": 0.31682819383259914, + "grad_norm": 1.4598849060474621, + "learning_rate": 1.9366903252974532e-05, + "loss": 0.7017598152160645, + "step": 899 + }, + { + "epoch": 0.31718061674008813, + "grad_norm": 1.1578269588811556, + "learning_rate": 1.9364861499007443e-05, + "loss": 0.6831692457199097, + "step": 900 + }, + { + "epoch": 0.3175330396475771, + "grad_norm": 1.5110843884258551, + "learning_rate": 1.936281656596132e-05, + "loss": 0.6555520296096802, + "step": 901 + }, + { + "epoch": 0.31788546255506606, + "grad_norm": 1.5455350998398028, + "learning_rate": 1.9360768454530356e-05, + "loss": 0.7401334047317505, + "step": 902 + }, + { + "epoch": 0.31823788546255505, + "grad_norm": 1.445337217541868, + "learning_rate": 1.935871716540982e-05, + "loss": 0.7415893077850342, + "step": 903 + }, + { + "epoch": 0.31859030837004404, + "grad_norm": 1.280838808592201, + "learning_rate": 1.935666269929606e-05, + "loss": 0.8254752159118652, + "step": 904 + }, + { + "epoch": 0.31894273127753303, + "grad_norm": 1.4164353369528349, + "learning_rate": 1.9354605056886505e-05, + "loss": 0.708149254322052, + "step": 905 + }, + { + "epoch": 0.319295154185022, + "grad_norm": 5.868993531178127, + "learning_rate": 1.9352544238879654e-05, + "loss": 0.8084006905555725, + "step": 906 + }, + { + "epoch": 0.319647577092511, + "grad_norm": 1.264327413823813, + "learning_rate": 1.93504802459751e-05, + "loss": 0.8039542436599731, + "step": 907 + }, + { + "epoch": 0.32, + "grad_norm": 1.3552380315038073, + "learning_rate": 1.93484130788735e-05, + "loss": 0.7563241720199585, + "step": 908 + }, + { + "epoch": 0.320352422907489, + "grad_norm": 1.4802258000623036, + "learning_rate": 1.9346342738276593e-05, + "loss": 0.7972971200942993, + "step": 909 + }, + { + "epoch": 0.320704845814978, + "grad_norm": 1.2978401429696003, + "learning_rate": 1.93442692248872e-05, + "loss": 0.6693121790885925, + "step": 910 + }, + { + "epoch": 0.321057268722467, + "grad_norm": 1.567978048588056, + "learning_rate": 1.9342192539409203e-05, + "loss": 0.6597858667373657, + "step": 911 + }, + { + "epoch": 0.321409691629956, + "grad_norm": 1.368700143265877, + "learning_rate": 1.934011268254758e-05, + "loss": 0.6771499514579773, + "step": 912 + }, + { + "epoch": 0.3217621145374449, + "grad_norm": 1.2365987861589656, + "learning_rate": 1.9338029655008375e-05, + "loss": 0.6903397440910339, + "step": 913 + }, + { + "epoch": 0.3221145374449339, + "grad_norm": 1.1408319382533163, + "learning_rate": 1.9335943457498717e-05, + "loss": 0.6287999153137207, + "step": 914 + }, + { + "epoch": 0.3224669603524229, + "grad_norm": 1.6382789883498257, + "learning_rate": 1.93338540907268e-05, + "loss": 0.7199264764785767, + "step": 915 + }, + { + "epoch": 0.3228193832599119, + "grad_norm": 1.3951711845041654, + "learning_rate": 1.9331761555401896e-05, + "loss": 0.6960160732269287, + "step": 916 + }, + { + "epoch": 0.3231718061674009, + "grad_norm": 1.4692108732272398, + "learning_rate": 1.932966585223436e-05, + "loss": 0.8981958627700806, + "step": 917 + }, + { + "epoch": 0.32352422907488987, + "grad_norm": 1.5685980092664367, + "learning_rate": 1.932756698193562e-05, + "loss": 0.786432147026062, + "step": 918 + }, + { + "epoch": 0.32387665198237886, + "grad_norm": 1.5208274085752962, + "learning_rate": 1.9325464945218172e-05, + "loss": 0.7260904312133789, + "step": 919 + }, + { + "epoch": 0.32422907488986785, + "grad_norm": 1.5076861367086136, + "learning_rate": 1.9323359742795595e-05, + "loss": 0.715835452079773, + "step": 920 + }, + { + "epoch": 0.32458149779735684, + "grad_norm": 1.5022880591009429, + "learning_rate": 1.932125137538254e-05, + "loss": 0.6312157511711121, + "step": 921 + }, + { + "epoch": 0.32493392070484584, + "grad_norm": 1.3825485581433186, + "learning_rate": 1.931913984369473e-05, + "loss": 0.7565821409225464, + "step": 922 + }, + { + "epoch": 0.3252863436123348, + "grad_norm": 1.3787375139479208, + "learning_rate": 1.931702514844896e-05, + "loss": 0.6866531372070312, + "step": 923 + }, + { + "epoch": 0.32563876651982376, + "grad_norm": 2.06933203374066, + "learning_rate": 1.9314907290363117e-05, + "loss": 0.879021167755127, + "step": 924 + }, + { + "epoch": 0.32599118942731276, + "grad_norm": 1.4876230584538193, + "learning_rate": 1.9312786270156135e-05, + "loss": 0.6972150802612305, + "step": 925 + }, + { + "epoch": 0.32634361233480175, + "grad_norm": 1.5939077112190465, + "learning_rate": 1.9310662088548042e-05, + "loss": 0.8735189437866211, + "step": 926 + }, + { + "epoch": 0.32669603524229074, + "grad_norm": 1.4550040646675775, + "learning_rate": 1.930853474625993e-05, + "loss": 0.6114254593849182, + "step": 927 + }, + { + "epoch": 0.32704845814977973, + "grad_norm": 1.5850836788259668, + "learning_rate": 1.930640424401396e-05, + "loss": 0.8032322525978088, + "step": 928 + }, + { + "epoch": 0.3274008810572687, + "grad_norm": 1.3666090686487828, + "learning_rate": 1.9304270582533376e-05, + "loss": 0.7391160726547241, + "step": 929 + }, + { + "epoch": 0.3277533039647577, + "grad_norm": 1.3744182830455962, + "learning_rate": 1.930213376254249e-05, + "loss": 0.7055366039276123, + "step": 930 + }, + { + "epoch": 0.3281057268722467, + "grad_norm": 1.3717314993069374, + "learning_rate": 1.9299993784766684e-05, + "loss": 0.671670138835907, + "step": 931 + }, + { + "epoch": 0.3284581497797357, + "grad_norm": 1.4961694507376992, + "learning_rate": 1.9297850649932416e-05, + "loss": 0.7486976385116577, + "step": 932 + }, + { + "epoch": 0.3288105726872247, + "grad_norm": 1.3777653583239398, + "learning_rate": 1.929570435876721e-05, + "loss": 0.8767625093460083, + "step": 933 + }, + { + "epoch": 0.3291629955947137, + "grad_norm": 1.5767252427705674, + "learning_rate": 1.929355491199967e-05, + "loss": 0.6841862797737122, + "step": 934 + }, + { + "epoch": 0.3295154185022026, + "grad_norm": 1.4985001262879563, + "learning_rate": 1.929140231035946e-05, + "loss": 0.7745054960250854, + "step": 935 + }, + { + "epoch": 0.3298678414096916, + "grad_norm": 1.4538548583561628, + "learning_rate": 1.928924655457733e-05, + "loss": 0.5879434943199158, + "step": 936 + }, + { + "epoch": 0.3302202643171806, + "grad_norm": 1.4292680321712006, + "learning_rate": 1.9287087645385084e-05, + "loss": 0.8484170436859131, + "step": 937 + }, + { + "epoch": 0.3305726872246696, + "grad_norm": 1.3383126778675687, + "learning_rate": 1.9284925583515604e-05, + "loss": 0.6518877148628235, + "step": 938 + }, + { + "epoch": 0.3309251101321586, + "grad_norm": 1.3496744406534642, + "learning_rate": 1.928276036970285e-05, + "loss": 0.7694787383079529, + "step": 939 + }, + { + "epoch": 0.3312775330396476, + "grad_norm": 1.475669634065235, + "learning_rate": 1.928059200468184e-05, + "loss": 0.6893239617347717, + "step": 940 + }, + { + "epoch": 0.33162995594713657, + "grad_norm": 1.9386710613485005, + "learning_rate": 1.927842048918867e-05, + "loss": 0.7731181383132935, + "step": 941 + }, + { + "epoch": 0.33198237885462556, + "grad_norm": 1.2730945433300995, + "learning_rate": 1.9276245823960495e-05, + "loss": 0.652579665184021, + "step": 942 + }, + { + "epoch": 0.33233480176211455, + "grad_norm": 1.4590802585162193, + "learning_rate": 1.927406800973555e-05, + "loss": 0.7504575252532959, + "step": 943 + }, + { + "epoch": 0.33268722466960354, + "grad_norm": 1.2636242756085148, + "learning_rate": 1.927188704725314e-05, + "loss": 0.6199444532394409, + "step": 944 + }, + { + "epoch": 0.33303964757709253, + "grad_norm": 1.3381297141173314, + "learning_rate": 1.9269702937253623e-05, + "loss": 0.7452073693275452, + "step": 945 + }, + { + "epoch": 0.33339207048458147, + "grad_norm": 1.6220831494484687, + "learning_rate": 1.926751568047845e-05, + "loss": 0.7538012266159058, + "step": 946 + }, + { + "epoch": 0.33374449339207046, + "grad_norm": 1.298282312930767, + "learning_rate": 1.9265325277670114e-05, + "loss": 0.6670408248901367, + "step": 947 + }, + { + "epoch": 0.33409691629955945, + "grad_norm": 1.3861711594873305, + "learning_rate": 1.926313172957219e-05, + "loss": 0.8060495853424072, + "step": 948 + }, + { + "epoch": 0.33444933920704845, + "grad_norm": 1.497135036962013, + "learning_rate": 1.926093503692933e-05, + "loss": 0.7494044303894043, + "step": 949 + }, + { + "epoch": 0.33480176211453744, + "grad_norm": 1.4954420855155135, + "learning_rate": 1.9258735200487235e-05, + "loss": 0.5751914978027344, + "step": 950 + }, + { + "epoch": 0.33515418502202643, + "grad_norm": 1.3135496972020755, + "learning_rate": 1.9256532220992683e-05, + "loss": 0.7234281301498413, + "step": 951 + }, + { + "epoch": 0.3355066079295154, + "grad_norm": 1.648299384166419, + "learning_rate": 1.9254326099193515e-05, + "loss": 0.7721251249313354, + "step": 952 + }, + { + "epoch": 0.3358590308370044, + "grad_norm": 1.5273494870998061, + "learning_rate": 1.925211683583864e-05, + "loss": 0.7240835428237915, + "step": 953 + }, + { + "epoch": 0.3362114537444934, + "grad_norm": 1.5101195617398009, + "learning_rate": 1.9249904431678037e-05, + "loss": 0.6622776985168457, + "step": 954 + }, + { + "epoch": 0.3365638766519824, + "grad_norm": 1.7484785330432984, + "learning_rate": 1.9247688887462747e-05, + "loss": 0.9682766199111938, + "step": 955 + }, + { + "epoch": 0.3369162995594714, + "grad_norm": 1.5743447413941896, + "learning_rate": 1.9245470203944878e-05, + "loss": 0.8363134860992432, + "step": 956 + }, + { + "epoch": 0.3372687224669604, + "grad_norm": 1.4500608043156524, + "learning_rate": 1.9243248381877605e-05, + "loss": 0.6530857086181641, + "step": 957 + }, + { + "epoch": 0.3376211453744493, + "grad_norm": 1.2035108561422267, + "learning_rate": 1.924102342201517e-05, + "loss": 0.5186585187911987, + "step": 958 + }, + { + "epoch": 0.3379735682819383, + "grad_norm": 1.3827408215949344, + "learning_rate": 1.9238795325112867e-05, + "loss": 0.6729516983032227, + "step": 959 + }, + { + "epoch": 0.3383259911894273, + "grad_norm": 4.587971824519282, + "learning_rate": 1.9236564091927083e-05, + "loss": 0.6991842985153198, + "step": 960 + }, + { + "epoch": 0.3386784140969163, + "grad_norm": 1.516889979226708, + "learning_rate": 1.9234329723215235e-05, + "loss": 0.7738245725631714, + "step": 961 + }, + { + "epoch": 0.3390308370044053, + "grad_norm": 1.4574207335379696, + "learning_rate": 1.923209221973583e-05, + "loss": 0.7027466893196106, + "step": 962 + }, + { + "epoch": 0.3393832599118943, + "grad_norm": 1.401098486802875, + "learning_rate": 1.922985158224843e-05, + "loss": 0.7868508696556091, + "step": 963 + }, + { + "epoch": 0.33973568281938327, + "grad_norm": 1.3325223534105368, + "learning_rate": 1.9227607811513662e-05, + "loss": 0.7499512434005737, + "step": 964 + }, + { + "epoch": 0.34008810572687226, + "grad_norm": 1.3198116129339372, + "learning_rate": 1.9225360908293217e-05, + "loss": 0.6662228107452393, + "step": 965 + }, + { + "epoch": 0.34044052863436125, + "grad_norm": 1.4854956624988247, + "learning_rate": 1.9223110873349847e-05, + "loss": 0.8570939302444458, + "step": 966 + }, + { + "epoch": 0.34079295154185024, + "grad_norm": 1.3385040645698225, + "learning_rate": 1.9220857707447372e-05, + "loss": 0.7497669458389282, + "step": 967 + }, + { + "epoch": 0.34114537444933923, + "grad_norm": 1.2753268813313299, + "learning_rate": 1.9218601411350663e-05, + "loss": 0.7356737852096558, + "step": 968 + }, + { + "epoch": 0.34149779735682817, + "grad_norm": 2.3286924006274896, + "learning_rate": 1.9216341985825672e-05, + "loss": 0.7880491018295288, + "step": 969 + }, + { + "epoch": 0.34185022026431716, + "grad_norm": 1.4677269303314853, + "learning_rate": 1.92140794316394e-05, + "loss": 0.734922468662262, + "step": 970 + }, + { + "epoch": 0.34220264317180615, + "grad_norm": 1.5501144518696521, + "learning_rate": 1.9211813749559916e-05, + "loss": 0.6710363626480103, + "step": 971 + }, + { + "epoch": 0.34255506607929515, + "grad_norm": 1.256856073477316, + "learning_rate": 1.920954494035634e-05, + "loss": 0.7300584316253662, + "step": 972 + }, + { + "epoch": 0.34290748898678414, + "grad_norm": 1.5351698758546528, + "learning_rate": 1.9207273004798873e-05, + "loss": 0.8584152460098267, + "step": 973 + }, + { + "epoch": 0.34325991189427313, + "grad_norm": 1.4115351274616093, + "learning_rate": 1.9204997943658764e-05, + "loss": 0.7307419776916504, + "step": 974 + }, + { + "epoch": 0.3436123348017621, + "grad_norm": 1.434441373942747, + "learning_rate": 1.920271975770832e-05, + "loss": 0.6004960536956787, + "step": 975 + }, + { + "epoch": 0.3439647577092511, + "grad_norm": 1.4015679334218965, + "learning_rate": 1.920043844772092e-05, + "loss": 0.7951763868331909, + "step": 976 + }, + { + "epoch": 0.3443171806167401, + "grad_norm": 1.6575061104845086, + "learning_rate": 1.919815401447099e-05, + "loss": 0.6835082769393921, + "step": 977 + }, + { + "epoch": 0.3446696035242291, + "grad_norm": 1.3790503468449504, + "learning_rate": 1.9195866458734034e-05, + "loss": 0.7556526064872742, + "step": 978 + }, + { + "epoch": 0.3450220264317181, + "grad_norm": 1.2642410643718298, + "learning_rate": 1.91935757812866e-05, + "loss": 0.6918114423751831, + "step": 979 + }, + { + "epoch": 0.345374449339207, + "grad_norm": 1.6331863015088222, + "learning_rate": 1.9191281982906304e-05, + "loss": 0.8197037577629089, + "step": 980 + }, + { + "epoch": 0.345726872246696, + "grad_norm": 1.9820857497043596, + "learning_rate": 1.9188985064371818e-05, + "loss": 0.833138644695282, + "step": 981 + }, + { + "epoch": 0.346079295154185, + "grad_norm": 1.3877421520016489, + "learning_rate": 1.9186685026462874e-05, + "loss": 0.6593397855758667, + "step": 982 + }, + { + "epoch": 0.346431718061674, + "grad_norm": 1.750501089720619, + "learning_rate": 1.918438186996026e-05, + "loss": 0.7535643577575684, + "step": 983 + }, + { + "epoch": 0.346784140969163, + "grad_norm": 1.5295833510904033, + "learning_rate": 1.9182075595645836e-05, + "loss": 0.6959745287895203, + "step": 984 + }, + { + "epoch": 0.347136563876652, + "grad_norm": 1.4045200992789866, + "learning_rate": 1.91797662043025e-05, + "loss": 0.7349518537521362, + "step": 985 + }, + { + "epoch": 0.347488986784141, + "grad_norm": 1.3769518272852244, + "learning_rate": 1.9177453696714224e-05, + "loss": 0.7677974700927734, + "step": 986 + }, + { + "epoch": 0.34784140969162997, + "grad_norm": 1.4486626509256493, + "learning_rate": 1.917513807366603e-05, + "loss": 0.7302255630493164, + "step": 987 + }, + { + "epoch": 0.34819383259911896, + "grad_norm": 1.477891236612788, + "learning_rate": 1.9172819335944003e-05, + "loss": 0.838138222694397, + "step": 988 + }, + { + "epoch": 0.34854625550660795, + "grad_norm": 1.555345260078333, + "learning_rate": 1.9170497484335276e-05, + "loss": 0.8018180131912231, + "step": 989 + }, + { + "epoch": 0.34889867841409694, + "grad_norm": 1.4299439839627417, + "learning_rate": 1.9168172519628056e-05, + "loss": 0.8085787296295166, + "step": 990 + }, + { + "epoch": 0.3492511013215859, + "grad_norm": 1.407734167007011, + "learning_rate": 1.9165844442611584e-05, + "loss": 0.8419004082679749, + "step": 991 + }, + { + "epoch": 0.34960352422907487, + "grad_norm": 1.485093259368171, + "learning_rate": 1.916351325407618e-05, + "loss": 0.8255139589309692, + "step": 992 + }, + { + "epoch": 0.34995594713656386, + "grad_norm": 1.3581016847128187, + "learning_rate": 1.9161178954813203e-05, + "loss": 0.7588528990745544, + "step": 993 + }, + { + "epoch": 0.35030837004405285, + "grad_norm": 1.3722258517458088, + "learning_rate": 1.9158841545615076e-05, + "loss": 0.7057096362113953, + "step": 994 + }, + { + "epoch": 0.35066079295154184, + "grad_norm": 1.3264479954648483, + "learning_rate": 1.915650102727528e-05, + "loss": 0.6913125514984131, + "step": 995 + }, + { + "epoch": 0.35101321585903084, + "grad_norm": 1.4277288783882767, + "learning_rate": 1.9154157400588348e-05, + "loss": 0.7622898817062378, + "step": 996 + }, + { + "epoch": 0.3513656387665198, + "grad_norm": 1.3345359637809249, + "learning_rate": 1.915181066634986e-05, + "loss": 0.6918702125549316, + "step": 997 + }, + { + "epoch": 0.3517180616740088, + "grad_norm": 1.4330955991310976, + "learning_rate": 1.914946082535647e-05, + "loss": 0.8801462650299072, + "step": 998 + }, + { + "epoch": 0.3520704845814978, + "grad_norm": 1.6364104196010791, + "learning_rate": 1.9147107878405873e-05, + "loss": 0.7901172637939453, + "step": 999 + }, + { + "epoch": 0.3524229074889868, + "grad_norm": 1.3202428944557627, + "learning_rate": 1.9144751826296818e-05, + "loss": 0.7308447360992432, + "step": 1000 + }, + { + "epoch": 0.3527753303964758, + "grad_norm": 1.3152547105893029, + "learning_rate": 1.9142392669829114e-05, + "loss": 0.5733275413513184, + "step": 1001 + }, + { + "epoch": 0.35312775330396473, + "grad_norm": 1.4327185784306546, + "learning_rate": 1.9140030409803622e-05, + "loss": 0.7251306772232056, + "step": 1002 + }, + { + "epoch": 0.3534801762114537, + "grad_norm": 1.3492122584167072, + "learning_rate": 1.913766504702225e-05, + "loss": 0.7983027696609497, + "step": 1003 + }, + { + "epoch": 0.3538325991189427, + "grad_norm": 1.5284478719025472, + "learning_rate": 1.9135296582287973e-05, + "loss": 0.7464017868041992, + "step": 1004 + }, + { + "epoch": 0.3541850220264317, + "grad_norm": 1.3377291300677683, + "learning_rate": 1.9132925016404805e-05, + "loss": 0.7333002686500549, + "step": 1005 + }, + { + "epoch": 0.3545374449339207, + "grad_norm": 1.4170618275882645, + "learning_rate": 1.9130550350177823e-05, + "loss": 0.729085385799408, + "step": 1006 + }, + { + "epoch": 0.3548898678414097, + "grad_norm": 1.1531700234964573, + "learning_rate": 1.9128172584413148e-05, + "loss": 0.7599227428436279, + "step": 1007 + }, + { + "epoch": 0.3552422907488987, + "grad_norm": 1.3499603875621307, + "learning_rate": 1.9125791719917962e-05, + "loss": 0.8110464811325073, + "step": 1008 + }, + { + "epoch": 0.3555947136563877, + "grad_norm": 1.443391069493257, + "learning_rate": 1.912340775750049e-05, + "loss": 0.7431697845458984, + "step": 1009 + }, + { + "epoch": 0.35594713656387666, + "grad_norm": 1.3353700802371913, + "learning_rate": 1.9121020697970016e-05, + "loss": 0.7833640575408936, + "step": 1010 + }, + { + "epoch": 0.35629955947136566, + "grad_norm": 1.2927496434698726, + "learning_rate": 1.9118630542136874e-05, + "loss": 0.7693058252334595, + "step": 1011 + }, + { + "epoch": 0.35665198237885465, + "grad_norm": 1.3593779388270224, + "learning_rate": 1.9116237290812445e-05, + "loss": 0.7724676132202148, + "step": 1012 + }, + { + "epoch": 0.3570044052863436, + "grad_norm": 1.3849928303091037, + "learning_rate": 1.911384094480916e-05, + "loss": 0.6024055480957031, + "step": 1013 + }, + { + "epoch": 0.3573568281938326, + "grad_norm": 1.254237630036734, + "learning_rate": 1.9111441504940514e-05, + "loss": 0.7710703611373901, + "step": 1014 + }, + { + "epoch": 0.35770925110132157, + "grad_norm": 1.3917926832468532, + "learning_rate": 1.910903897202103e-05, + "loss": 0.7591651678085327, + "step": 1015 + }, + { + "epoch": 0.35806167400881056, + "grad_norm": 1.3250641662724636, + "learning_rate": 1.9106633346866302e-05, + "loss": 0.7721874713897705, + "step": 1016 + }, + { + "epoch": 0.35841409691629955, + "grad_norm": 1.3837097156983347, + "learning_rate": 1.910422463029296e-05, + "loss": 0.6767420172691345, + "step": 1017 + }, + { + "epoch": 0.35876651982378854, + "grad_norm": 1.5808312779065312, + "learning_rate": 1.910181282311869e-05, + "loss": 0.6704902648925781, + "step": 1018 + }, + { + "epoch": 0.35911894273127754, + "grad_norm": 1.3288966146848866, + "learning_rate": 1.9099397926162227e-05, + "loss": 0.8871079683303833, + "step": 1019 + }, + { + "epoch": 0.3594713656387665, + "grad_norm": 1.5716465127646195, + "learning_rate": 1.909697994024335e-05, + "loss": 0.7222549319267273, + "step": 1020 + }, + { + "epoch": 0.3598237885462555, + "grad_norm": 1.4050103839828958, + "learning_rate": 1.9094558866182892e-05, + "loss": 0.7443021535873413, + "step": 1021 + }, + { + "epoch": 0.3601762114537445, + "grad_norm": 1.3877313570980134, + "learning_rate": 1.9092134704802735e-05, + "loss": 0.7698349952697754, + "step": 1022 + }, + { + "epoch": 0.3605286343612335, + "grad_norm": 1.9010750041325926, + "learning_rate": 1.9089707456925798e-05, + "loss": 0.863248348236084, + "step": 1023 + }, + { + "epoch": 0.3608810572687225, + "grad_norm": 1.1572981545597187, + "learning_rate": 1.9087277123376068e-05, + "loss": 0.7036338448524475, + "step": 1024 + }, + { + "epoch": 0.36123348017621143, + "grad_norm": 1.5140044810060398, + "learning_rate": 1.9084843704978558e-05, + "loss": 0.7427274584770203, + "step": 1025 + }, + { + "epoch": 0.3615859030837004, + "grad_norm": 1.5903685422277276, + "learning_rate": 1.908240720255934e-05, + "loss": 0.6548313498497009, + "step": 1026 + }, + { + "epoch": 0.3619383259911894, + "grad_norm": 1.3326463394362358, + "learning_rate": 1.9079967616945534e-05, + "loss": 0.7586454749107361, + "step": 1027 + }, + { + "epoch": 0.3622907488986784, + "grad_norm": 1.45389698507953, + "learning_rate": 1.90775249489653e-05, + "loss": 0.6954889297485352, + "step": 1028 + }, + { + "epoch": 0.3626431718061674, + "grad_norm": 1.6543950271160617, + "learning_rate": 1.907507919944785e-05, + "loss": 0.8798770904541016, + "step": 1029 + }, + { + "epoch": 0.3629955947136564, + "grad_norm": 1.3815054682339305, + "learning_rate": 1.9072630369223433e-05, + "loss": 0.6600694060325623, + "step": 1030 + }, + { + "epoch": 0.3633480176211454, + "grad_norm": 1.5776995405913148, + "learning_rate": 1.9070178459123366e-05, + "loss": 0.6830897927284241, + "step": 1031 + }, + { + "epoch": 0.36370044052863437, + "grad_norm": 1.1973844620945089, + "learning_rate": 1.906772346997998e-05, + "loss": 0.6283613443374634, + "step": 1032 + }, + { + "epoch": 0.36405286343612336, + "grad_norm": 1.2892968799675324, + "learning_rate": 1.9065265402626676e-05, + "loss": 0.6451754570007324, + "step": 1033 + }, + { + "epoch": 0.36440528634361236, + "grad_norm": 1.4387559441313162, + "learning_rate": 1.9062804257897887e-05, + "loss": 0.7949883937835693, + "step": 1034 + }, + { + "epoch": 0.36475770925110135, + "grad_norm": 1.4366893391590683, + "learning_rate": 1.90603400366291e-05, + "loss": 0.625343918800354, + "step": 1035 + }, + { + "epoch": 0.3651101321585903, + "grad_norm": 1.5716897663583798, + "learning_rate": 1.9057872739656843e-05, + "loss": 0.8398839235305786, + "step": 1036 + }, + { + "epoch": 0.3654625550660793, + "grad_norm": 1.6515297053174456, + "learning_rate": 1.9055402367818673e-05, + "loss": 0.8628166913986206, + "step": 1037 + }, + { + "epoch": 0.36581497797356827, + "grad_norm": 1.6000244306696312, + "learning_rate": 1.905292892195322e-05, + "loss": 0.7494110465049744, + "step": 1038 + }, + { + "epoch": 0.36616740088105726, + "grad_norm": 1.6358981860019415, + "learning_rate": 1.9050452402900134e-05, + "loss": 0.7695099115371704, + "step": 1039 + }, + { + "epoch": 0.36651982378854625, + "grad_norm": 1.3948395289772064, + "learning_rate": 1.904797281150012e-05, + "loss": 0.8067067861557007, + "step": 1040 + }, + { + "epoch": 0.36687224669603524, + "grad_norm": 1.5430196098026252, + "learning_rate": 1.9045490148594917e-05, + "loss": 0.7542074918746948, + "step": 1041 + }, + { + "epoch": 0.36722466960352423, + "grad_norm": 1.4232871422135234, + "learning_rate": 1.9043004415027314e-05, + "loss": 0.7027335166931152, + "step": 1042 + }, + { + "epoch": 0.3675770925110132, + "grad_norm": 1.2842638834648272, + "learning_rate": 1.9040515611641142e-05, + "loss": 0.7779253721237183, + "step": 1043 + }, + { + "epoch": 0.3679295154185022, + "grad_norm": 1.4713589430159515, + "learning_rate": 1.9038023739281275e-05, + "loss": 0.6840049028396606, + "step": 1044 + }, + { + "epoch": 0.3682819383259912, + "grad_norm": 1.2252786450532585, + "learning_rate": 1.903552879879362e-05, + "loss": 0.6183794736862183, + "step": 1045 + }, + { + "epoch": 0.3686343612334802, + "grad_norm": 1.3239395642180716, + "learning_rate": 1.9033030791025127e-05, + "loss": 0.7770168781280518, + "step": 1046 + }, + { + "epoch": 0.36898678414096914, + "grad_norm": 1.5646813675557831, + "learning_rate": 1.9030529716823806e-05, + "loss": 0.7192036509513855, + "step": 1047 + }, + { + "epoch": 0.36933920704845813, + "grad_norm": 1.3179369082607764, + "learning_rate": 1.9028025577038688e-05, + "loss": 0.6604419946670532, + "step": 1048 + }, + { + "epoch": 0.3696916299559471, + "grad_norm": 1.7088212085954357, + "learning_rate": 1.9025518372519847e-05, + "loss": 0.7999060153961182, + "step": 1049 + }, + { + "epoch": 0.3700440528634361, + "grad_norm": 1.6369356635778263, + "learning_rate": 1.9023008104118404e-05, + "loss": 0.7487536668777466, + "step": 1050 + }, + { + "epoch": 0.3703964757709251, + "grad_norm": 1.4534592079598474, + "learning_rate": 1.9020494772686513e-05, + "loss": 0.7786455154418945, + "step": 1051 + }, + { + "epoch": 0.3707488986784141, + "grad_norm": 1.5556124976221868, + "learning_rate": 1.9017978379077378e-05, + "loss": 0.7592626214027405, + "step": 1052 + }, + { + "epoch": 0.3711013215859031, + "grad_norm": 1.3193440168525459, + "learning_rate": 1.901545892414523e-05, + "loss": 0.774850606918335, + "step": 1053 + }, + { + "epoch": 0.3714537444933921, + "grad_norm": 1.4859587321900767, + "learning_rate": 1.901293640874535e-05, + "loss": 0.5430009365081787, + "step": 1054 + }, + { + "epoch": 0.37180616740088107, + "grad_norm": 1.4541817899150224, + "learning_rate": 1.9010410833734053e-05, + "loss": 0.7459923624992371, + "step": 1055 + }, + { + "epoch": 0.37215859030837006, + "grad_norm": 1.6269332982530442, + "learning_rate": 1.9007882199968692e-05, + "loss": 0.6372017860412598, + "step": 1056 + }, + { + "epoch": 0.37251101321585905, + "grad_norm": 1.6522112420188226, + "learning_rate": 1.900535050830766e-05, + "loss": 0.6773583292961121, + "step": 1057 + }, + { + "epoch": 0.372863436123348, + "grad_norm": 1.7342256392022233, + "learning_rate": 1.900281575961039e-05, + "loss": 0.8431004285812378, + "step": 1058 + }, + { + "epoch": 0.373215859030837, + "grad_norm": 1.4085085883480681, + "learning_rate": 1.9000277954737342e-05, + "loss": 0.6361340284347534, + "step": 1059 + }, + { + "epoch": 0.373568281938326, + "grad_norm": 1.3793359019510345, + "learning_rate": 1.8997737094550033e-05, + "loss": 0.7153787612915039, + "step": 1060 + }, + { + "epoch": 0.37392070484581497, + "grad_norm": 1.4220392348844544, + "learning_rate": 1.8995193179911e-05, + "loss": 0.7244935631752014, + "step": 1061 + }, + { + "epoch": 0.37427312775330396, + "grad_norm": 1.4061330426818142, + "learning_rate": 1.8992646211683817e-05, + "loss": 0.6648202538490295, + "step": 1062 + }, + { + "epoch": 0.37462555066079295, + "grad_norm": 1.4217807346058315, + "learning_rate": 1.8990096190733113e-05, + "loss": 0.6528836488723755, + "step": 1063 + }, + { + "epoch": 0.37497797356828194, + "grad_norm": 1.4695679092519263, + "learning_rate": 1.8987543117924532e-05, + "loss": 0.6749341487884521, + "step": 1064 + }, + { + "epoch": 0.37533039647577093, + "grad_norm": 1.3287092803608218, + "learning_rate": 1.8984986994124766e-05, + "loss": 0.7402256727218628, + "step": 1065 + }, + { + "epoch": 0.3756828193832599, + "grad_norm": 1.2181513754192281, + "learning_rate": 1.898242782020154e-05, + "loss": 0.5638695955276489, + "step": 1066 + }, + { + "epoch": 0.3760352422907489, + "grad_norm": 1.5457056768133204, + "learning_rate": 1.897986559702361e-05, + "loss": 0.829822838306427, + "step": 1067 + }, + { + "epoch": 0.3763876651982379, + "grad_norm": 1.3351440834834858, + "learning_rate": 1.8977300325460774e-05, + "loss": 0.6796025037765503, + "step": 1068 + }, + { + "epoch": 0.37674008810572684, + "grad_norm": 1.3611135527247238, + "learning_rate": 1.897473200638386e-05, + "loss": 0.8584038615226746, + "step": 1069 + }, + { + "epoch": 0.37709251101321584, + "grad_norm": 1.4622377307020165, + "learning_rate": 1.897216064066474e-05, + "loss": 0.8069149255752563, + "step": 1070 + }, + { + "epoch": 0.37744493392070483, + "grad_norm": 1.2194173424769332, + "learning_rate": 1.89695862291763e-05, + "loss": 0.5762223601341248, + "step": 1071 + }, + { + "epoch": 0.3777973568281938, + "grad_norm": 1.3827918624348656, + "learning_rate": 1.8967008772792483e-05, + "loss": 0.6626466512680054, + "step": 1072 + }, + { + "epoch": 0.3781497797356828, + "grad_norm": 1.15359758590964, + "learning_rate": 1.896442827238825e-05, + "loss": 0.6260244250297546, + "step": 1073 + }, + { + "epoch": 0.3785022026431718, + "grad_norm": 1.8994686915407593, + "learning_rate": 1.8961844728839602e-05, + "loss": 0.8090343475341797, + "step": 1074 + }, + { + "epoch": 0.3788546255506608, + "grad_norm": 1.4116056126096472, + "learning_rate": 1.8959258143023575e-05, + "loss": 0.66957026720047, + "step": 1075 + }, + { + "epoch": 0.3792070484581498, + "grad_norm": 1.308974606662818, + "learning_rate": 1.8956668515818223e-05, + "loss": 0.7103087306022644, + "step": 1076 + }, + { + "epoch": 0.3795594713656388, + "grad_norm": 1.468914156940793, + "learning_rate": 1.895407584810266e-05, + "loss": 0.7469112277030945, + "step": 1077 + }, + { + "epoch": 0.37991189427312777, + "grad_norm": 1.624950928787921, + "learning_rate": 1.8951480140757003e-05, + "loss": 0.8252213001251221, + "step": 1078 + }, + { + "epoch": 0.38026431718061676, + "grad_norm": 1.4238044077341658, + "learning_rate": 1.8948881394662417e-05, + "loss": 0.7204562425613403, + "step": 1079 + }, + { + "epoch": 0.38061674008810575, + "grad_norm": 1.5659608304591812, + "learning_rate": 1.89462796107011e-05, + "loss": 0.7325669527053833, + "step": 1080 + }, + { + "epoch": 0.3809691629955947, + "grad_norm": 1.2964480504204927, + "learning_rate": 1.8943674789756276e-05, + "loss": 0.738972008228302, + "step": 1081 + }, + { + "epoch": 0.3813215859030837, + "grad_norm": 1.5892566433984823, + "learning_rate": 1.8941066932712194e-05, + "loss": 0.7468631267547607, + "step": 1082 + }, + { + "epoch": 0.3816740088105727, + "grad_norm": 1.6145182365902104, + "learning_rate": 1.893845604045415e-05, + "loss": 0.6479831337928772, + "step": 1083 + }, + { + "epoch": 0.38202643171806167, + "grad_norm": 1.3615750017210906, + "learning_rate": 1.893584211386845e-05, + "loss": 0.7615871429443359, + "step": 1084 + }, + { + "epoch": 0.38237885462555066, + "grad_norm": 1.8901071385329251, + "learning_rate": 1.8933225153842446e-05, + "loss": 0.6934449076652527, + "step": 1085 + }, + { + "epoch": 0.38273127753303965, + "grad_norm": 1.2384833194245852, + "learning_rate": 1.8930605161264517e-05, + "loss": 0.5267079472541809, + "step": 1086 + }, + { + "epoch": 0.38308370044052864, + "grad_norm": 1.524832028509735, + "learning_rate": 1.892798213702407e-05, + "loss": 0.7309125661849976, + "step": 1087 + }, + { + "epoch": 0.38343612334801763, + "grad_norm": 1.3743253361073855, + "learning_rate": 1.892535608201153e-05, + "loss": 0.8133678436279297, + "step": 1088 + }, + { + "epoch": 0.3837885462555066, + "grad_norm": 1.3915725940468886, + "learning_rate": 1.892272699711837e-05, + "loss": 0.6097027063369751, + "step": 1089 + }, + { + "epoch": 0.3841409691629956, + "grad_norm": 1.548287022579551, + "learning_rate": 1.8920094883237082e-05, + "loss": 0.70456862449646, + "step": 1090 + }, + { + "epoch": 0.3844933920704846, + "grad_norm": 1.2952569165029428, + "learning_rate": 1.8917459741261183e-05, + "loss": 0.7236523628234863, + "step": 1091 + }, + { + "epoch": 0.38484581497797354, + "grad_norm": 1.5039785189114319, + "learning_rate": 1.8914821572085224e-05, + "loss": 0.7251272201538086, + "step": 1092 + }, + { + "epoch": 0.38519823788546254, + "grad_norm": 1.271767676796452, + "learning_rate": 1.8912180376604777e-05, + "loss": 0.7381070852279663, + "step": 1093 + }, + { + "epoch": 0.3855506607929515, + "grad_norm": 1.6023999081974447, + "learning_rate": 1.8909536155716458e-05, + "loss": 0.6654129028320312, + "step": 1094 + }, + { + "epoch": 0.3859030837004405, + "grad_norm": 1.4351957388528893, + "learning_rate": 1.8906888910317883e-05, + "loss": 0.7823128700256348, + "step": 1095 + }, + { + "epoch": 0.3862555066079295, + "grad_norm": 1.2302320218391962, + "learning_rate": 1.8904238641307718e-05, + "loss": 0.5988126993179321, + "step": 1096 + }, + { + "epoch": 0.3866079295154185, + "grad_norm": 1.6745614533481283, + "learning_rate": 1.8901585349585643e-05, + "loss": 0.7671465873718262, + "step": 1097 + }, + { + "epoch": 0.3869603524229075, + "grad_norm": 1.4027982600434907, + "learning_rate": 1.889892903605237e-05, + "loss": 0.7878838777542114, + "step": 1098 + }, + { + "epoch": 0.3873127753303965, + "grad_norm": 1.2802181437962392, + "learning_rate": 1.8896269701609634e-05, + "loss": 0.72254878282547, + "step": 1099 + }, + { + "epoch": 0.3876651982378855, + "grad_norm": 1.4183908379879375, + "learning_rate": 1.8893607347160198e-05, + "loss": 0.6796868443489075, + "step": 1100 + }, + { + "epoch": 0.38801762114537447, + "grad_norm": 1.510469064523606, + "learning_rate": 1.8890941973607843e-05, + "loss": 0.6378471851348877, + "step": 1101 + }, + { + "epoch": 0.38837004405286346, + "grad_norm": 1.327169163711753, + "learning_rate": 1.888827358185739e-05, + "loss": 0.8473032712936401, + "step": 1102 + }, + { + "epoch": 0.3887224669603524, + "grad_norm": 1.4704779902492213, + "learning_rate": 1.8885602172814667e-05, + "loss": 0.8272742033004761, + "step": 1103 + }, + { + "epoch": 0.3890748898678414, + "grad_norm": 1.45593169268278, + "learning_rate": 1.8882927747386533e-05, + "loss": 0.7244507670402527, + "step": 1104 + }, + { + "epoch": 0.3894273127753304, + "grad_norm": 1.3081271484466186, + "learning_rate": 1.888025030648088e-05, + "loss": 0.5764014720916748, + "step": 1105 + }, + { + "epoch": 0.3897797356828194, + "grad_norm": 1.230279760550168, + "learning_rate": 1.887756985100661e-05, + "loss": 0.6944009065628052, + "step": 1106 + }, + { + "epoch": 0.39013215859030836, + "grad_norm": 1.381963017332696, + "learning_rate": 1.8874886381873657e-05, + "loss": 0.7096902132034302, + "step": 1107 + }, + { + "epoch": 0.39048458149779736, + "grad_norm": 1.6526795986169043, + "learning_rate": 1.8872199899992973e-05, + "loss": 0.6304805278778076, + "step": 1108 + }, + { + "epoch": 0.39083700440528635, + "grad_norm": 1.3081643743142675, + "learning_rate": 1.8869510406276538e-05, + "loss": 0.7091327905654907, + "step": 1109 + }, + { + "epoch": 0.39118942731277534, + "grad_norm": 1.4257979117717376, + "learning_rate": 1.886681790163735e-05, + "loss": 0.6575565338134766, + "step": 1110 + }, + { + "epoch": 0.39154185022026433, + "grad_norm": 1.6155582257297172, + "learning_rate": 1.8864122386989426e-05, + "loss": 0.837468147277832, + "step": 1111 + }, + { + "epoch": 0.3918942731277533, + "grad_norm": 1.4395330206284223, + "learning_rate": 1.8861423863247816e-05, + "loss": 0.6861380338668823, + "step": 1112 + }, + { + "epoch": 0.3922466960352423, + "grad_norm": 1.3206140573248442, + "learning_rate": 1.8858722331328582e-05, + "loss": 0.7421156167984009, + "step": 1113 + }, + { + "epoch": 0.39259911894273125, + "grad_norm": 1.4106532753820455, + "learning_rate": 1.8856017792148807e-05, + "loss": 0.8037575483322144, + "step": 1114 + }, + { + "epoch": 0.39295154185022024, + "grad_norm": 1.34412494732323, + "learning_rate": 1.8853310246626608e-05, + "loss": 0.6530179381370544, + "step": 1115 + }, + { + "epoch": 0.39330396475770923, + "grad_norm": 1.7480111733406445, + "learning_rate": 1.88505996956811e-05, + "loss": 0.9039478302001953, + "step": 1116 + }, + { + "epoch": 0.3936563876651982, + "grad_norm": 1.2556675250098766, + "learning_rate": 1.8847886140232438e-05, + "loss": 0.7734917998313904, + "step": 1117 + }, + { + "epoch": 0.3940088105726872, + "grad_norm": 1.4809117769611548, + "learning_rate": 1.8845169581201786e-05, + "loss": 0.7146204113960266, + "step": 1118 + }, + { + "epoch": 0.3943612334801762, + "grad_norm": 1.4108388267740644, + "learning_rate": 1.8842450019511337e-05, + "loss": 0.6427414417266846, + "step": 1119 + }, + { + "epoch": 0.3947136563876652, + "grad_norm": 1.462443026711516, + "learning_rate": 1.883972745608429e-05, + "loss": 0.7241504192352295, + "step": 1120 + }, + { + "epoch": 0.3950660792951542, + "grad_norm": 1.5796197427651677, + "learning_rate": 1.8837001891844875e-05, + "loss": 0.7085466384887695, + "step": 1121 + }, + { + "epoch": 0.3954185022026432, + "grad_norm": 1.220037664049328, + "learning_rate": 1.8834273327718334e-05, + "loss": 0.6099711656570435, + "step": 1122 + }, + { + "epoch": 0.3957709251101322, + "grad_norm": 1.7637467057266936, + "learning_rate": 1.8831541764630936e-05, + "loss": 0.9153809547424316, + "step": 1123 + }, + { + "epoch": 0.39612334801762117, + "grad_norm": 1.432058114739846, + "learning_rate": 1.8828807203509953e-05, + "loss": 0.7025514841079712, + "step": 1124 + }, + { + "epoch": 0.3964757709251101, + "grad_norm": 1.3170228531933665, + "learning_rate": 1.882606964528369e-05, + "loss": 0.8254855275154114, + "step": 1125 + }, + { + "epoch": 0.3968281938325991, + "grad_norm": 1.3015643549096694, + "learning_rate": 1.8823329090881457e-05, + "loss": 0.6812278032302856, + "step": 1126 + }, + { + "epoch": 0.3971806167400881, + "grad_norm": 1.4379402990614556, + "learning_rate": 1.8820585541233592e-05, + "loss": 0.6570114493370056, + "step": 1127 + }, + { + "epoch": 0.3975330396475771, + "grad_norm": 1.4245448514304093, + "learning_rate": 1.881783899727144e-05, + "loss": 0.636163592338562, + "step": 1128 + }, + { + "epoch": 0.39788546255506607, + "grad_norm": 1.4535684365173425, + "learning_rate": 1.8815089459927373e-05, + "loss": 0.6744807958602905, + "step": 1129 + }, + { + "epoch": 0.39823788546255506, + "grad_norm": 1.2654983836452696, + "learning_rate": 1.8812336930134768e-05, + "loss": 0.6739502549171448, + "step": 1130 + }, + { + "epoch": 0.39859030837004406, + "grad_norm": 1.5274150360278067, + "learning_rate": 1.8809581408828026e-05, + "loss": 0.800058126449585, + "step": 1131 + }, + { + "epoch": 0.39894273127753305, + "grad_norm": 1.293199138820765, + "learning_rate": 1.880682289694256e-05, + "loss": 0.7158734798431396, + "step": 1132 + }, + { + "epoch": 0.39929515418502204, + "grad_norm": 1.426620948967722, + "learning_rate": 1.8804061395414795e-05, + "loss": 0.7142150402069092, + "step": 1133 + }, + { + "epoch": 0.39964757709251103, + "grad_norm": 1.5712220679274596, + "learning_rate": 1.8801296905182184e-05, + "loss": 0.7830438613891602, + "step": 1134 + }, + { + "epoch": 0.4, + "grad_norm": 1.3789411964854812, + "learning_rate": 1.879852942718318e-05, + "loss": 0.7037091255187988, + "step": 1135 + }, + { + "epoch": 0.400352422907489, + "grad_norm": 1.5410576826642701, + "learning_rate": 1.8795758962357254e-05, + "loss": 0.7634316682815552, + "step": 1136 + }, + { + "epoch": 0.40070484581497795, + "grad_norm": 1.3380525485574057, + "learning_rate": 1.8792985511644895e-05, + "loss": 0.8569636344909668, + "step": 1137 + }, + { + "epoch": 0.40105726872246694, + "grad_norm": 1.4697640342217926, + "learning_rate": 1.8790209075987603e-05, + "loss": 0.8589881062507629, + "step": 1138 + }, + { + "epoch": 0.40140969162995593, + "grad_norm": 1.4119711578026037, + "learning_rate": 1.8787429656327892e-05, + "loss": 0.6667177677154541, + "step": 1139 + }, + { + "epoch": 0.4017621145374449, + "grad_norm": 1.5302691962759787, + "learning_rate": 1.8784647253609286e-05, + "loss": 0.8272922039031982, + "step": 1140 + }, + { + "epoch": 0.4021145374449339, + "grad_norm": 1.4934073596410382, + "learning_rate": 1.8781861868776328e-05, + "loss": 0.735906720161438, + "step": 1141 + }, + { + "epoch": 0.4024669603524229, + "grad_norm": 1.6214826290901958, + "learning_rate": 1.8779073502774567e-05, + "loss": 0.7496200799942017, + "step": 1142 + }, + { + "epoch": 0.4028193832599119, + "grad_norm": 1.5379634398249482, + "learning_rate": 1.8776282156550563e-05, + "loss": 0.741244912147522, + "step": 1143 + }, + { + "epoch": 0.4031718061674009, + "grad_norm": 1.6175484470841388, + "learning_rate": 1.87734878310519e-05, + "loss": 0.6074572205543518, + "step": 1144 + }, + { + "epoch": 0.4035242290748899, + "grad_norm": 1.5403137415943102, + "learning_rate": 1.8770690527227156e-05, + "loss": 0.7852963805198669, + "step": 1145 + }, + { + "epoch": 0.4038766519823789, + "grad_norm": 1.3167947695811832, + "learning_rate": 1.8767890246025934e-05, + "loss": 0.8041664361953735, + "step": 1146 + }, + { + "epoch": 0.40422907488986787, + "grad_norm": 1.2847896666293108, + "learning_rate": 1.876508698839884e-05, + "loss": 0.6014564037322998, + "step": 1147 + }, + { + "epoch": 0.4045814977973568, + "grad_norm": 1.6737775020761936, + "learning_rate": 1.876228075529749e-05, + "loss": 0.7389206886291504, + "step": 1148 + }, + { + "epoch": 0.4049339207048458, + "grad_norm": 1.5291026740622409, + "learning_rate": 1.875947154767452e-05, + "loss": 0.7540062665939331, + "step": 1149 + }, + { + "epoch": 0.4052863436123348, + "grad_norm": 1.5780731113626183, + "learning_rate": 1.8756659366483564e-05, + "loss": 0.6953487396240234, + "step": 1150 + }, + { + "epoch": 0.4056387665198238, + "grad_norm": 1.8069469411894516, + "learning_rate": 1.875384421267927e-05, + "loss": 0.6715666055679321, + "step": 1151 + }, + { + "epoch": 0.40599118942731277, + "grad_norm": 1.6113428960633331, + "learning_rate": 1.8751026087217294e-05, + "loss": 0.7763206362724304, + "step": 1152 + }, + { + "epoch": 0.40634361233480176, + "grad_norm": 1.7227531605547286, + "learning_rate": 1.8748204991054304e-05, + "loss": 0.8445626497268677, + "step": 1153 + }, + { + "epoch": 0.40669603524229075, + "grad_norm": 1.4170830085508515, + "learning_rate": 1.8745380925147976e-05, + "loss": 0.6789584159851074, + "step": 1154 + }, + { + "epoch": 0.40704845814977975, + "grad_norm": 1.403092590323935, + "learning_rate": 1.8742553890456986e-05, + "loss": 0.6301349401473999, + "step": 1155 + }, + { + "epoch": 0.40740088105726874, + "grad_norm": 1.243923442253091, + "learning_rate": 1.873972388794103e-05, + "loss": 0.5968909859657288, + "step": 1156 + }, + { + "epoch": 0.40775330396475773, + "grad_norm": 1.489269903668207, + "learning_rate": 1.873689091856081e-05, + "loss": 0.759127676486969, + "step": 1157 + }, + { + "epoch": 0.4081057268722467, + "grad_norm": 1.7062525426103168, + "learning_rate": 1.873405498327802e-05, + "loss": 0.8113895654678345, + "step": 1158 + }, + { + "epoch": 0.40845814977973566, + "grad_norm": 2.2841166697739266, + "learning_rate": 1.8731216083055373e-05, + "loss": 0.6294944286346436, + "step": 1159 + }, + { + "epoch": 0.40881057268722465, + "grad_norm": 1.7643300465666825, + "learning_rate": 1.87283742188566e-05, + "loss": 0.7024469375610352, + "step": 1160 + }, + { + "epoch": 0.40916299559471364, + "grad_norm": 1.6192702903054457, + "learning_rate": 1.8725529391646413e-05, + "loss": 0.6593793034553528, + "step": 1161 + }, + { + "epoch": 0.40951541850220263, + "grad_norm": 1.491465083071803, + "learning_rate": 1.8722681602390548e-05, + "loss": 0.72177654504776, + "step": 1162 + }, + { + "epoch": 0.4098678414096916, + "grad_norm": 1.5089448151062697, + "learning_rate": 1.8719830852055736e-05, + "loss": 0.7099393606185913, + "step": 1163 + }, + { + "epoch": 0.4102202643171806, + "grad_norm": 1.3870038981594819, + "learning_rate": 1.871697714160972e-05, + "loss": 0.6221687197685242, + "step": 1164 + }, + { + "epoch": 0.4105726872246696, + "grad_norm": 1.6034975452453926, + "learning_rate": 1.8714120472021252e-05, + "loss": 0.7236911058425903, + "step": 1165 + }, + { + "epoch": 0.4109251101321586, + "grad_norm": 1.6733335742616042, + "learning_rate": 1.8711260844260072e-05, + "loss": 0.6777583360671997, + "step": 1166 + }, + { + "epoch": 0.4112775330396476, + "grad_norm": 1.2685396486773262, + "learning_rate": 1.870839825929694e-05, + "loss": 0.6408713459968567, + "step": 1167 + }, + { + "epoch": 0.4116299559471366, + "grad_norm": 1.5501797457897155, + "learning_rate": 1.870553271810362e-05, + "loss": 0.6081968545913696, + "step": 1168 + }, + { + "epoch": 0.4119823788546256, + "grad_norm": 1.324315376857478, + "learning_rate": 1.8702664221652864e-05, + "loss": 0.7269757986068726, + "step": 1169 + }, + { + "epoch": 0.4123348017621145, + "grad_norm": 1.359571395974998, + "learning_rate": 1.8699792770918443e-05, + "loss": 0.6563149094581604, + "step": 1170 + }, + { + "epoch": 0.4126872246696035, + "grad_norm": 1.412304869808958, + "learning_rate": 1.8696918366875123e-05, + "loss": 0.6900039911270142, + "step": 1171 + }, + { + "epoch": 0.4130396475770925, + "grad_norm": 1.6666238046463622, + "learning_rate": 1.869404101049868e-05, + "loss": 0.6575014591217041, + "step": 1172 + }, + { + "epoch": 0.4133920704845815, + "grad_norm": 1.7453316480937289, + "learning_rate": 1.8691160702765878e-05, + "loss": 0.8178410530090332, + "step": 1173 + }, + { + "epoch": 0.4137444933920705, + "grad_norm": 1.2369225358107252, + "learning_rate": 1.8688277444654495e-05, + "loss": 0.6247331500053406, + "step": 1174 + }, + { + "epoch": 0.41409691629955947, + "grad_norm": 1.4809443864869283, + "learning_rate": 1.868539123714331e-05, + "loss": 0.7220792770385742, + "step": 1175 + }, + { + "epoch": 0.41444933920704846, + "grad_norm": 1.3133478143499064, + "learning_rate": 1.8682502081212104e-05, + "loss": 0.6279594302177429, + "step": 1176 + }, + { + "epoch": 0.41480176211453745, + "grad_norm": 1.9965951061666904, + "learning_rate": 1.8679609977841646e-05, + "loss": 0.8814467787742615, + "step": 1177 + }, + { + "epoch": 0.41515418502202645, + "grad_norm": 1.337413771448709, + "learning_rate": 1.867671492801372e-05, + "loss": 0.6601974368095398, + "step": 1178 + }, + { + "epoch": 0.41550660792951544, + "grad_norm": 1.5188708939818696, + "learning_rate": 1.8673816932711107e-05, + "loss": 0.7004785537719727, + "step": 1179 + }, + { + "epoch": 0.41585903083700443, + "grad_norm": 1.5057078901191085, + "learning_rate": 1.8670915992917586e-05, + "loss": 0.7409330606460571, + "step": 1180 + }, + { + "epoch": 0.41621145374449336, + "grad_norm": 1.4232223858260633, + "learning_rate": 1.8668012109617933e-05, + "loss": 0.6698065996170044, + "step": 1181 + }, + { + "epoch": 0.41656387665198236, + "grad_norm": 1.5925482634189316, + "learning_rate": 1.8665105283797927e-05, + "loss": 0.7420671582221985, + "step": 1182 + }, + { + "epoch": 0.41691629955947135, + "grad_norm": 1.5560634478711484, + "learning_rate": 1.8662195516444345e-05, + "loss": 0.7719774842262268, + "step": 1183 + }, + { + "epoch": 0.41726872246696034, + "grad_norm": 1.4792437797078573, + "learning_rate": 1.8659282808544966e-05, + "loss": 0.6206108331680298, + "step": 1184 + }, + { + "epoch": 0.41762114537444933, + "grad_norm": 1.3470893025550628, + "learning_rate": 1.865636716108856e-05, + "loss": 0.799741268157959, + "step": 1185 + }, + { + "epoch": 0.4179735682819383, + "grad_norm": 1.419455186886867, + "learning_rate": 1.8653448575064893e-05, + "loss": 0.6839771866798401, + "step": 1186 + }, + { + "epoch": 0.4183259911894273, + "grad_norm": 1.4763673797370565, + "learning_rate": 1.8650527051464744e-05, + "loss": 0.7937930822372437, + "step": 1187 + }, + { + "epoch": 0.4186784140969163, + "grad_norm": 2.8190993538517524, + "learning_rate": 1.8647602591279873e-05, + "loss": 0.6819020509719849, + "step": 1188 + }, + { + "epoch": 0.4190308370044053, + "grad_norm": 1.3567646132379503, + "learning_rate": 1.864467519550305e-05, + "loss": 0.75614994764328, + "step": 1189 + }, + { + "epoch": 0.4193832599118943, + "grad_norm": 1.567742841021855, + "learning_rate": 1.864174486512803e-05, + "loss": 0.6966177225112915, + "step": 1190 + }, + { + "epoch": 0.4197356828193833, + "grad_norm": 1.7710714107881367, + "learning_rate": 1.8638811601149568e-05, + "loss": 0.821509838104248, + "step": 1191 + }, + { + "epoch": 0.4200881057268723, + "grad_norm": 1.2328562386437087, + "learning_rate": 1.8635875404563414e-05, + "loss": 0.5905138254165649, + "step": 1192 + }, + { + "epoch": 0.4204405286343612, + "grad_norm": 1.4647056442197128, + "learning_rate": 1.8632936276366323e-05, + "loss": 0.6856247186660767, + "step": 1193 + }, + { + "epoch": 0.4207929515418502, + "grad_norm": 1.4886760353067057, + "learning_rate": 1.862999421755603e-05, + "loss": 0.745036244392395, + "step": 1194 + }, + { + "epoch": 0.4211453744493392, + "grad_norm": 1.1750279689329006, + "learning_rate": 1.8627049229131276e-05, + "loss": 0.6503005027770996, + "step": 1195 + }, + { + "epoch": 0.4214977973568282, + "grad_norm": 1.5431880343600168, + "learning_rate": 1.86241013120918e-05, + "loss": 0.7498307228088379, + "step": 1196 + }, + { + "epoch": 0.4218502202643172, + "grad_norm": 1.3468463845976426, + "learning_rate": 1.862115046743831e-05, + "loss": 0.7928652763366699, + "step": 1197 + }, + { + "epoch": 0.42220264317180617, + "grad_norm": 1.2342083264732957, + "learning_rate": 1.861819669617254e-05, + "loss": 0.6854137182235718, + "step": 1198 + }, + { + "epoch": 0.42255506607929516, + "grad_norm": 1.2078818370142543, + "learning_rate": 1.86152399992972e-05, + "loss": 0.6196715831756592, + "step": 1199 + }, + { + "epoch": 0.42290748898678415, + "grad_norm": 1.3970249114344502, + "learning_rate": 1.8612280377816e-05, + "loss": 0.6937464475631714, + "step": 1200 + }, + { + "epoch": 0.42325991189427314, + "grad_norm": 1.68603514212184, + "learning_rate": 1.860931783273363e-05, + "loss": 0.7681070566177368, + "step": 1201 + }, + { + "epoch": 0.42361233480176214, + "grad_norm": 1.1472443629032707, + "learning_rate": 1.860635236505579e-05, + "loss": 0.676302969455719, + "step": 1202 + }, + { + "epoch": 0.4239647577092511, + "grad_norm": 1.3856112594345633, + "learning_rate": 1.8603383975789168e-05, + "loss": 0.6533253192901611, + "step": 1203 + }, + { + "epoch": 0.42431718061674006, + "grad_norm": 1.3469284337535972, + "learning_rate": 1.860041266594143e-05, + "loss": 0.689995288848877, + "step": 1204 + }, + { + "epoch": 0.42466960352422906, + "grad_norm": 1.5007772835228577, + "learning_rate": 1.859743843652124e-05, + "loss": 0.8129922747612, + "step": 1205 + }, + { + "epoch": 0.42502202643171805, + "grad_norm": 1.5410683437680426, + "learning_rate": 1.859446128853827e-05, + "loss": 0.8388077616691589, + "step": 1206 + }, + { + "epoch": 0.42537444933920704, + "grad_norm": 1.5558529097869003, + "learning_rate": 1.859148122300316e-05, + "loss": 0.8795225024223328, + "step": 1207 + }, + { + "epoch": 0.42572687224669603, + "grad_norm": 1.1213374735945745, + "learning_rate": 1.858849824092755e-05, + "loss": 0.7340251803398132, + "step": 1208 + }, + { + "epoch": 0.426079295154185, + "grad_norm": 1.4951423694810024, + "learning_rate": 1.8585512343324073e-05, + "loss": 0.8028355240821838, + "step": 1209 + }, + { + "epoch": 0.426431718061674, + "grad_norm": 1.4585659256901293, + "learning_rate": 1.8582523531206345e-05, + "loss": 0.8469998836517334, + "step": 1210 + }, + { + "epoch": 0.426784140969163, + "grad_norm": 1.5383443322846213, + "learning_rate": 1.857953180558898e-05, + "loss": 0.7562716007232666, + "step": 1211 + }, + { + "epoch": 0.427136563876652, + "grad_norm": 1.4113837543209433, + "learning_rate": 1.857653716748757e-05, + "loss": 0.7166177034378052, + "step": 1212 + }, + { + "epoch": 0.427488986784141, + "grad_norm": 1.5418199345701933, + "learning_rate": 1.85735396179187e-05, + "loss": 0.6946159601211548, + "step": 1213 + }, + { + "epoch": 0.42784140969163, + "grad_norm": 1.317478160039542, + "learning_rate": 1.8570539157899953e-05, + "loss": 0.5341482758522034, + "step": 1214 + }, + { + "epoch": 0.4281938325991189, + "grad_norm": 1.4287482623115888, + "learning_rate": 1.8567535788449886e-05, + "loss": 0.8128249645233154, + "step": 1215 + }, + { + "epoch": 0.4285462555066079, + "grad_norm": 1.34325298688053, + "learning_rate": 1.8564529510588046e-05, + "loss": 0.7136335372924805, + "step": 1216 + }, + { + "epoch": 0.4288986784140969, + "grad_norm": 1.358163949395023, + "learning_rate": 1.856152032533498e-05, + "loss": 0.6737562417984009, + "step": 1217 + }, + { + "epoch": 0.4292511013215859, + "grad_norm": 1.306172251281951, + "learning_rate": 1.855850823371221e-05, + "loss": 0.8102772235870361, + "step": 1218 + }, + { + "epoch": 0.4296035242290749, + "grad_norm": 1.4109010281873726, + "learning_rate": 1.855549323674224e-05, + "loss": 0.7389130592346191, + "step": 1219 + }, + { + "epoch": 0.4299559471365639, + "grad_norm": 1.6519920374913426, + "learning_rate": 1.8552475335448575e-05, + "loss": 0.9127305746078491, + "step": 1220 + }, + { + "epoch": 0.43030837004405287, + "grad_norm": 1.4401162301668198, + "learning_rate": 1.8549454530855697e-05, + "loss": 0.7599691152572632, + "step": 1221 + }, + { + "epoch": 0.43066079295154186, + "grad_norm": 1.59834239528244, + "learning_rate": 1.8546430823989075e-05, + "loss": 0.8343819379806519, + "step": 1222 + }, + { + "epoch": 0.43101321585903085, + "grad_norm": 1.7081796080725813, + "learning_rate": 1.8543404215875163e-05, + "loss": 0.7759256362915039, + "step": 1223 + }, + { + "epoch": 0.43136563876651984, + "grad_norm": 1.3364188660639875, + "learning_rate": 1.8540374707541398e-05, + "loss": 0.7803373336791992, + "step": 1224 + }, + { + "epoch": 0.43171806167400884, + "grad_norm": 1.4538494145578122, + "learning_rate": 1.8537342300016208e-05, + "loss": 0.6292921304702759, + "step": 1225 + }, + { + "epoch": 0.43207048458149777, + "grad_norm": 1.4521641959343445, + "learning_rate": 1.8534306994329e-05, + "loss": 0.8495175838470459, + "step": 1226 + }, + { + "epoch": 0.43242290748898676, + "grad_norm": 1.3062742481146943, + "learning_rate": 1.8531268791510167e-05, + "loss": 0.6141406297683716, + "step": 1227 + }, + { + "epoch": 0.43277533039647575, + "grad_norm": 1.576341879030456, + "learning_rate": 1.8528227692591076e-05, + "loss": 0.7087793350219727, + "step": 1228 + }, + { + "epoch": 0.43312775330396475, + "grad_norm": 1.5442094308389636, + "learning_rate": 1.8525183698604098e-05, + "loss": 0.7919498682022095, + "step": 1229 + }, + { + "epoch": 0.43348017621145374, + "grad_norm": 1.317139155945084, + "learning_rate": 1.8522136810582563e-05, + "loss": 0.7408226728439331, + "step": 1230 + }, + { + "epoch": 0.43383259911894273, + "grad_norm": 1.407715848952146, + "learning_rate": 1.85190870295608e-05, + "loss": 0.7140083312988281, + "step": 1231 + }, + { + "epoch": 0.4341850220264317, + "grad_norm": 1.4117801977693214, + "learning_rate": 1.8516034356574118e-05, + "loss": 0.7211521863937378, + "step": 1232 + }, + { + "epoch": 0.4345374449339207, + "grad_norm": 1.1753876244240768, + "learning_rate": 1.85129787926588e-05, + "loss": 0.7103208303451538, + "step": 1233 + }, + { + "epoch": 0.4348898678414097, + "grad_norm": 1.4479636604064312, + "learning_rate": 1.850992033885211e-05, + "loss": 0.816985011100769, + "step": 1234 + }, + { + "epoch": 0.4352422907488987, + "grad_norm": 1.4368000528699751, + "learning_rate": 1.850685899619231e-05, + "loss": 0.6678498983383179, + "step": 1235 + }, + { + "epoch": 0.4355947136563877, + "grad_norm": 1.4259303259837681, + "learning_rate": 1.8503794765718622e-05, + "loss": 0.7895394563674927, + "step": 1236 + }, + { + "epoch": 0.4359471365638766, + "grad_norm": 1.4256180200365283, + "learning_rate": 1.8500727648471258e-05, + "loss": 0.7295971512794495, + "step": 1237 + }, + { + "epoch": 0.4362995594713656, + "grad_norm": 1.552299015894991, + "learning_rate": 1.849765764549141e-05, + "loss": 0.7216300964355469, + "step": 1238 + }, + { + "epoch": 0.4366519823788546, + "grad_norm": 2.585430848560662, + "learning_rate": 1.8494584757821252e-05, + "loss": 0.8088986873626709, + "step": 1239 + }, + { + "epoch": 0.4370044052863436, + "grad_norm": 1.3100612400703413, + "learning_rate": 1.8491508986503928e-05, + "loss": 0.7380663156509399, + "step": 1240 + }, + { + "epoch": 0.4373568281938326, + "grad_norm": 1.6225248085666293, + "learning_rate": 1.8488430332583566e-05, + "loss": 0.8671622276306152, + "step": 1241 + }, + { + "epoch": 0.4377092511013216, + "grad_norm": 1.2548349586148027, + "learning_rate": 1.8485348797105277e-05, + "loss": 0.6649274826049805, + "step": 1242 + }, + { + "epoch": 0.4380616740088106, + "grad_norm": 1.3492988450242405, + "learning_rate": 1.848226438111515e-05, + "loss": 0.740972638130188, + "step": 1243 + }, + { + "epoch": 0.43841409691629957, + "grad_norm": 1.4062352938849376, + "learning_rate": 1.8479177085660237e-05, + "loss": 0.6593915820121765, + "step": 1244 + }, + { + "epoch": 0.43876651982378856, + "grad_norm": 1.567811244473075, + "learning_rate": 1.8476086911788588e-05, + "loss": 0.792604923248291, + "step": 1245 + }, + { + "epoch": 0.43911894273127755, + "grad_norm": 1.583820790059346, + "learning_rate": 1.8472993860549216e-05, + "loss": 0.7521885633468628, + "step": 1246 + }, + { + "epoch": 0.43947136563876654, + "grad_norm": 1.4520072830804587, + "learning_rate": 1.846989793299212e-05, + "loss": 0.7242246270179749, + "step": 1247 + }, + { + "epoch": 0.43982378854625553, + "grad_norm": 1.2892821056189339, + "learning_rate": 1.846679913016827e-05, + "loss": 0.7343394160270691, + "step": 1248 + }, + { + "epoch": 0.44017621145374447, + "grad_norm": 1.2525729631593605, + "learning_rate": 1.846369745312961e-05, + "loss": 0.747876763343811, + "step": 1249 + }, + { + "epoch": 0.44052863436123346, + "grad_norm": 1.428983542355963, + "learning_rate": 1.8460592902929064e-05, + "loss": 0.7280946969985962, + "step": 1250 + }, + { + "epoch": 0.44088105726872245, + "grad_norm": 1.4254243168735732, + "learning_rate": 1.845748548062053e-05, + "loss": 0.7288519144058228, + "step": 1251 + }, + { + "epoch": 0.44123348017621145, + "grad_norm": 1.4847519735948493, + "learning_rate": 1.8454375187258885e-05, + "loss": 0.6269914507865906, + "step": 1252 + }, + { + "epoch": 0.44158590308370044, + "grad_norm": 1.5355271633317282, + "learning_rate": 1.8451262023899973e-05, + "loss": 0.7848949432373047, + "step": 1253 + }, + { + "epoch": 0.44193832599118943, + "grad_norm": 1.580356922946946, + "learning_rate": 1.8448145991600618e-05, + "loss": 0.7306517958641052, + "step": 1254 + }, + { + "epoch": 0.4422907488986784, + "grad_norm": 1.3971874565683924, + "learning_rate": 1.8445027091418614e-05, + "loss": 0.6933906078338623, + "step": 1255 + }, + { + "epoch": 0.4426431718061674, + "grad_norm": 1.2942221540854206, + "learning_rate": 1.8441905324412732e-05, + "loss": 0.8260579109191895, + "step": 1256 + }, + { + "epoch": 0.4429955947136564, + "grad_norm": 1.4276139754434451, + "learning_rate": 1.8438780691642712e-05, + "loss": 0.6818344593048096, + "step": 1257 + }, + { + "epoch": 0.4433480176211454, + "grad_norm": 1.5571344695334373, + "learning_rate": 1.8435653194169274e-05, + "loss": 0.5980014801025391, + "step": 1258 + }, + { + "epoch": 0.4437004405286344, + "grad_norm": 1.6363647319534165, + "learning_rate": 1.8432522833054102e-05, + "loss": 0.7694655656814575, + "step": 1259 + }, + { + "epoch": 0.4440528634361233, + "grad_norm": 1.4888452953161495, + "learning_rate": 1.842938960935986e-05, + "loss": 0.6861646771430969, + "step": 1260 + }, + { + "epoch": 0.4444052863436123, + "grad_norm": 1.5245731543783476, + "learning_rate": 1.8426253524150176e-05, + "loss": 0.7346323728561401, + "step": 1261 + }, + { + "epoch": 0.4447577092511013, + "grad_norm": 1.5555183873270297, + "learning_rate": 1.8423114578489657e-05, + "loss": 0.7116265296936035, + "step": 1262 + }, + { + "epoch": 0.4451101321585903, + "grad_norm": 1.3587295641859045, + "learning_rate": 1.8419972773443877e-05, + "loss": 0.7148594856262207, + "step": 1263 + }, + { + "epoch": 0.4454625550660793, + "grad_norm": 1.4208610042885819, + "learning_rate": 1.8416828110079378e-05, + "loss": 0.6629737615585327, + "step": 1264 + }, + { + "epoch": 0.4458149779735683, + "grad_norm": 1.2215430932959532, + "learning_rate": 1.8413680589463677e-05, + "loss": 0.5734454393386841, + "step": 1265 + }, + { + "epoch": 0.4461674008810573, + "grad_norm": 1.4728067026699625, + "learning_rate": 1.8410530212665258e-05, + "loss": 0.8129212260246277, + "step": 1266 + }, + { + "epoch": 0.44651982378854627, + "grad_norm": 1.5823039225136746, + "learning_rate": 1.8407376980753578e-05, + "loss": 0.7408754825592041, + "step": 1267 + }, + { + "epoch": 0.44687224669603526, + "grad_norm": 2.9520848026313633, + "learning_rate": 1.840422089479906e-05, + "loss": 0.7315034866333008, + "step": 1268 + }, + { + "epoch": 0.44722466960352425, + "grad_norm": 1.453693040198655, + "learning_rate": 1.8401061955873102e-05, + "loss": 0.6774684190750122, + "step": 1269 + }, + { + "epoch": 0.44757709251101324, + "grad_norm": 1.4189733125983666, + "learning_rate": 1.8397900165048055e-05, + "loss": 0.6615294814109802, + "step": 1270 + }, + { + "epoch": 0.4479295154185022, + "grad_norm": 1.465563156151872, + "learning_rate": 1.8394735523397258e-05, + "loss": 0.6757136583328247, + "step": 1271 + }, + { + "epoch": 0.44828193832599117, + "grad_norm": 1.3581337883847424, + "learning_rate": 1.8391568031995004e-05, + "loss": 0.6395466327667236, + "step": 1272 + }, + { + "epoch": 0.44863436123348016, + "grad_norm": 1.3957918327614203, + "learning_rate": 1.8388397691916556e-05, + "loss": 0.6436404585838318, + "step": 1273 + }, + { + "epoch": 0.44898678414096915, + "grad_norm": 1.2217258095016672, + "learning_rate": 1.838522450423815e-05, + "loss": 0.6280484199523926, + "step": 1274 + }, + { + "epoch": 0.44933920704845814, + "grad_norm": 1.3831470857016404, + "learning_rate": 1.8382048470036983e-05, + "loss": 0.7485225200653076, + "step": 1275 + }, + { + "epoch": 0.44969162995594714, + "grad_norm": 1.5437699808102354, + "learning_rate": 1.8378869590391217e-05, + "loss": 0.745079517364502, + "step": 1276 + }, + { + "epoch": 0.45004405286343613, + "grad_norm": 1.5902187054867891, + "learning_rate": 1.8375687866379988e-05, + "loss": 0.656510591506958, + "step": 1277 + }, + { + "epoch": 0.4503964757709251, + "grad_norm": 1.542738255105748, + "learning_rate": 1.8372503299083392e-05, + "loss": 0.7122445106506348, + "step": 1278 + }, + { + "epoch": 0.4507488986784141, + "grad_norm": 1.5368544285826038, + "learning_rate": 1.8369315889582483e-05, + "loss": 0.7402621507644653, + "step": 1279 + }, + { + "epoch": 0.4511013215859031, + "grad_norm": 1.539047411882563, + "learning_rate": 1.8366125638959292e-05, + "loss": 0.79311203956604, + "step": 1280 + }, + { + "epoch": 0.4514537444933921, + "grad_norm": 1.5615114889746888, + "learning_rate": 1.8362932548296815e-05, + "loss": 0.7748456001281738, + "step": 1281 + }, + { + "epoch": 0.45180616740088103, + "grad_norm": 1.4203050333533118, + "learning_rate": 1.8359736618679e-05, + "loss": 0.8285728096961975, + "step": 1282 + }, + { + "epoch": 0.45215859030837, + "grad_norm": 1.5541412727714081, + "learning_rate": 1.835653785119076e-05, + "loss": 0.7874733209609985, + "step": 1283 + }, + { + "epoch": 0.452511013215859, + "grad_norm": 1.990742110424804, + "learning_rate": 1.8353336246917996e-05, + "loss": 0.8984566926956177, + "step": 1284 + }, + { + "epoch": 0.452863436123348, + "grad_norm": 1.5779572276747513, + "learning_rate": 1.8350131806947537e-05, + "loss": 0.7730413675308228, + "step": 1285 + }, + { + "epoch": 0.453215859030837, + "grad_norm": 1.7109096071986905, + "learning_rate": 1.8346924532367195e-05, + "loss": 0.6064612865447998, + "step": 1286 + }, + { + "epoch": 0.453568281938326, + "grad_norm": 1.2417304411100711, + "learning_rate": 1.8343714424265742e-05, + "loss": 0.6946402192115784, + "step": 1287 + }, + { + "epoch": 0.453920704845815, + "grad_norm": 1.4035686433407615, + "learning_rate": 1.8340501483732908e-05, + "loss": 0.6131751537322998, + "step": 1288 + }, + { + "epoch": 0.454273127753304, + "grad_norm": 1.5800587203565855, + "learning_rate": 1.833728571185938e-05, + "loss": 0.7251182794570923, + "step": 1289 + }, + { + "epoch": 0.45462555066079297, + "grad_norm": 1.4036983560957499, + "learning_rate": 1.8334067109736826e-05, + "loss": 0.6548069715499878, + "step": 1290 + }, + { + "epoch": 0.45497797356828196, + "grad_norm": 1.3998869795024185, + "learning_rate": 1.833084567845785e-05, + "loss": 0.7416098117828369, + "step": 1291 + }, + { + "epoch": 0.45533039647577095, + "grad_norm": 2.508404695128388, + "learning_rate": 1.8327621419116034e-05, + "loss": 0.7320964932441711, + "step": 1292 + }, + { + "epoch": 0.4556828193832599, + "grad_norm": 1.3052290617356537, + "learning_rate": 1.8324394332805913e-05, + "loss": 0.5926196575164795, + "step": 1293 + }, + { + "epoch": 0.4560352422907489, + "grad_norm": 1.5674410721277312, + "learning_rate": 1.8321164420622977e-05, + "loss": 0.5294085741043091, + "step": 1294 + }, + { + "epoch": 0.45638766519823787, + "grad_norm": 1.2785938430138426, + "learning_rate": 1.8317931683663688e-05, + "loss": 0.6332723498344421, + "step": 1295 + }, + { + "epoch": 0.45674008810572686, + "grad_norm": 1.5962686180302166, + "learning_rate": 1.8314696123025456e-05, + "loss": 0.8361148834228516, + "step": 1296 + }, + { + "epoch": 0.45709251101321585, + "grad_norm": 1.4587382180744954, + "learning_rate": 1.8311457739806648e-05, + "loss": 0.8097354173660278, + "step": 1297 + }, + { + "epoch": 0.45744493392070484, + "grad_norm": 1.5247898400944095, + "learning_rate": 1.8308216535106606e-05, + "loss": 0.8619102239608765, + "step": 1298 + }, + { + "epoch": 0.45779735682819384, + "grad_norm": 1.7222438621078806, + "learning_rate": 1.8304972510025607e-05, + "loss": 0.8149014711380005, + "step": 1299 + }, + { + "epoch": 0.4581497797356828, + "grad_norm": 1.4821216839710079, + "learning_rate": 1.8301725665664904e-05, + "loss": 0.6217210292816162, + "step": 1300 + }, + { + "epoch": 0.4585022026431718, + "grad_norm": 1.3606031472973286, + "learning_rate": 1.8298476003126695e-05, + "loss": 0.7496612071990967, + "step": 1301 + }, + { + "epoch": 0.4588546255506608, + "grad_norm": 1.3221676149271377, + "learning_rate": 1.8295223523514144e-05, + "loss": 0.743242084980011, + "step": 1302 + }, + { + "epoch": 0.4592070484581498, + "grad_norm": 1.3745674408132749, + "learning_rate": 1.829196822793136e-05, + "loss": 0.6425061821937561, + "step": 1303 + }, + { + "epoch": 0.4595594713656388, + "grad_norm": 1.6216951689157317, + "learning_rate": 1.828871011748342e-05, + "loss": 0.8274835348129272, + "step": 1304 + }, + { + "epoch": 0.45991189427312773, + "grad_norm": 1.2722833909738493, + "learning_rate": 1.828544919327635e-05, + "loss": 0.6403865814208984, + "step": 1305 + }, + { + "epoch": 0.4602643171806167, + "grad_norm": 1.234115960449283, + "learning_rate": 1.828218545641713e-05, + "loss": 0.6585257053375244, + "step": 1306 + }, + { + "epoch": 0.4606167400881057, + "grad_norm": 1.2325421263478973, + "learning_rate": 1.82789189080137e-05, + "loss": 0.6467862129211426, + "step": 1307 + }, + { + "epoch": 0.4609691629955947, + "grad_norm": 1.403654297681647, + "learning_rate": 1.827564954917495e-05, + "loss": 0.8656524419784546, + "step": 1308 + }, + { + "epoch": 0.4613215859030837, + "grad_norm": 1.449712147167455, + "learning_rate": 1.8272377381010726e-05, + "loss": 0.7298469543457031, + "step": 1309 + }, + { + "epoch": 0.4616740088105727, + "grad_norm": 1.575558340533703, + "learning_rate": 1.8269102404631826e-05, + "loss": 0.7342871427536011, + "step": 1310 + }, + { + "epoch": 0.4620264317180617, + "grad_norm": 1.4177026442874099, + "learning_rate": 1.8265824621150005e-05, + "loss": 0.7437269687652588, + "step": 1311 + }, + { + "epoch": 0.4623788546255507, + "grad_norm": 1.370008690924395, + "learning_rate": 1.8262544031677965e-05, + "loss": 0.6761496067047119, + "step": 1312 + }, + { + "epoch": 0.46273127753303966, + "grad_norm": 1.3488719018465838, + "learning_rate": 1.825926063732937e-05, + "loss": 0.6504565477371216, + "step": 1313 + }, + { + "epoch": 0.46308370044052866, + "grad_norm": 1.5002490307110308, + "learning_rate": 1.8255974439218826e-05, + "loss": 0.7058892250061035, + "step": 1314 + }, + { + "epoch": 0.46343612334801765, + "grad_norm": 1.37061056314256, + "learning_rate": 1.8252685438461893e-05, + "loss": 0.704500675201416, + "step": 1315 + }, + { + "epoch": 0.4637885462555066, + "grad_norm": 1.3921050444029468, + "learning_rate": 1.824939363617509e-05, + "loss": 0.7438445091247559, + "step": 1316 + }, + { + "epoch": 0.4641409691629956, + "grad_norm": 1.4372002500080507, + "learning_rate": 1.8246099033475872e-05, + "loss": 0.6610915660858154, + "step": 1317 + }, + { + "epoch": 0.46449339207048457, + "grad_norm": 1.0745723869419856, + "learning_rate": 1.8242801631482666e-05, + "loss": 0.5868711471557617, + "step": 1318 + }, + { + "epoch": 0.46484581497797356, + "grad_norm": 1.192238188456442, + "learning_rate": 1.8239501431314828e-05, + "loss": 0.7403215765953064, + "step": 1319 + }, + { + "epoch": 0.46519823788546255, + "grad_norm": 1.2444894883495399, + "learning_rate": 1.823619843409268e-05, + "loss": 0.6836927533149719, + "step": 1320 + }, + { + "epoch": 0.46555066079295154, + "grad_norm": 1.4619703465719247, + "learning_rate": 1.8232892640937482e-05, + "loss": 0.744488537311554, + "step": 1321 + }, + { + "epoch": 0.46590308370044053, + "grad_norm": 1.6337099192848834, + "learning_rate": 1.822958405297145e-05, + "loss": 0.8203051090240479, + "step": 1322 + }, + { + "epoch": 0.4662555066079295, + "grad_norm": 1.184261838198034, + "learning_rate": 1.8226272671317747e-05, + "loss": 0.6452913284301758, + "step": 1323 + }, + { + "epoch": 0.4666079295154185, + "grad_norm": 1.6458345614686154, + "learning_rate": 1.8222958497100482e-05, + "loss": 0.7362639307975769, + "step": 1324 + }, + { + "epoch": 0.4669603524229075, + "grad_norm": 6.608293048647877, + "learning_rate": 1.8219641531444713e-05, + "loss": 0.8192600011825562, + "step": 1325 + }, + { + "epoch": 0.4673127753303965, + "grad_norm": 1.4257376230679313, + "learning_rate": 1.8216321775476452e-05, + "loss": 0.8391410112380981, + "step": 1326 + }, + { + "epoch": 0.46766519823788544, + "grad_norm": 1.3133795307817668, + "learning_rate": 1.8212999230322648e-05, + "loss": 0.8723593354225159, + "step": 1327 + }, + { + "epoch": 0.46801762114537443, + "grad_norm": 1.4218119484201381, + "learning_rate": 1.8209673897111208e-05, + "loss": 0.6891233921051025, + "step": 1328 + }, + { + "epoch": 0.4683700440528634, + "grad_norm": 1.414801660380672, + "learning_rate": 1.820634577697097e-05, + "loss": 0.6585180759429932, + "step": 1329 + }, + { + "epoch": 0.4687224669603524, + "grad_norm": 1.503205293925671, + "learning_rate": 1.8203014871031732e-05, + "loss": 0.9556418657302856, + "step": 1330 + }, + { + "epoch": 0.4690748898678414, + "grad_norm": 1.491345239113851, + "learning_rate": 1.8199681180424234e-05, + "loss": 0.803380012512207, + "step": 1331 + }, + { + "epoch": 0.4694273127753304, + "grad_norm": 1.6217603270172032, + "learning_rate": 1.819634470628016e-05, + "loss": 0.7090115547180176, + "step": 1332 + }, + { + "epoch": 0.4697797356828194, + "grad_norm": 1.6705712009535991, + "learning_rate": 1.8193005449732134e-05, + "loss": 0.6314720511436462, + "step": 1333 + }, + { + "epoch": 0.4701321585903084, + "grad_norm": 1.4756439095691731, + "learning_rate": 1.8189663411913737e-05, + "loss": 0.72248375415802, + "step": 1334 + }, + { + "epoch": 0.47048458149779737, + "grad_norm": 1.2477075880097683, + "learning_rate": 1.818631859395948e-05, + "loss": 0.6192474961280823, + "step": 1335 + }, + { + "epoch": 0.47083700440528636, + "grad_norm": 1.4944381119847567, + "learning_rate": 1.818297099700483e-05, + "loss": 0.6354564428329468, + "step": 1336 + }, + { + "epoch": 0.47118942731277536, + "grad_norm": 1.3129251382794922, + "learning_rate": 1.817962062218619e-05, + "loss": 0.7577195167541504, + "step": 1337 + }, + { + "epoch": 0.4715418502202643, + "grad_norm": 1.5504293722974503, + "learning_rate": 1.8176267470640908e-05, + "loss": 0.8064994812011719, + "step": 1338 + }, + { + "epoch": 0.4718942731277533, + "grad_norm": 1.211182925950848, + "learning_rate": 1.8172911543507276e-05, + "loss": 0.5994154214859009, + "step": 1339 + }, + { + "epoch": 0.4722466960352423, + "grad_norm": 1.701641381957404, + "learning_rate": 1.8169552841924524e-05, + "loss": 0.7483634948730469, + "step": 1340 + }, + { + "epoch": 0.47259911894273127, + "grad_norm": 1.5346948984560977, + "learning_rate": 1.8166191367032828e-05, + "loss": 0.817699134349823, + "step": 1341 + }, + { + "epoch": 0.47295154185022026, + "grad_norm": 1.4634504483386954, + "learning_rate": 1.8162827119973305e-05, + "loss": 0.7262923717498779, + "step": 1342 + }, + { + "epoch": 0.47330396475770925, + "grad_norm": 1.6796646988667925, + "learning_rate": 1.8159460101888013e-05, + "loss": 0.6097851991653442, + "step": 1343 + }, + { + "epoch": 0.47365638766519824, + "grad_norm": 1.3148094915971675, + "learning_rate": 1.8156090313919944e-05, + "loss": 0.7258971929550171, + "step": 1344 + }, + { + "epoch": 0.47400881057268723, + "grad_norm": 1.198607169385478, + "learning_rate": 1.8152717757213045e-05, + "loss": 0.6300361156463623, + "step": 1345 + }, + { + "epoch": 0.4743612334801762, + "grad_norm": 1.397827708634256, + "learning_rate": 1.8149342432912184e-05, + "loss": 0.7339942455291748, + "step": 1346 + }, + { + "epoch": 0.4747136563876652, + "grad_norm": 1.4524082687419129, + "learning_rate": 1.8145964342163188e-05, + "loss": 0.7520095109939575, + "step": 1347 + }, + { + "epoch": 0.4750660792951542, + "grad_norm": 1.6587168399408485, + "learning_rate": 1.814258348611281e-05, + "loss": 0.7276853322982788, + "step": 1348 + }, + { + "epoch": 0.47541850220264315, + "grad_norm": 1.4463166573664321, + "learning_rate": 1.8139199865908742e-05, + "loss": 0.8004029989242554, + "step": 1349 + }, + { + "epoch": 0.47577092511013214, + "grad_norm": 1.4508723815154267, + "learning_rate": 1.8135813482699623e-05, + "loss": 0.6932536363601685, + "step": 1350 + }, + { + "epoch": 0.47612334801762113, + "grad_norm": 1.8868515127553653, + "learning_rate": 1.8132424337635026e-05, + "loss": 0.7697082161903381, + "step": 1351 + }, + { + "epoch": 0.4764757709251101, + "grad_norm": 1.246718000700102, + "learning_rate": 1.8129032431865453e-05, + "loss": 0.6472513675689697, + "step": 1352 + }, + { + "epoch": 0.4768281938325991, + "grad_norm": 1.413046013449196, + "learning_rate": 1.8125637766542353e-05, + "loss": 0.6483110785484314, + "step": 1353 + }, + { + "epoch": 0.4771806167400881, + "grad_norm": 1.4854860856809686, + "learning_rate": 1.8122240342818113e-05, + "loss": 0.5495485067367554, + "step": 1354 + }, + { + "epoch": 0.4775330396475771, + "grad_norm": 1.2801602602197804, + "learning_rate": 1.811884016184605e-05, + "loss": 0.5235577821731567, + "step": 1355 + }, + { + "epoch": 0.4778854625550661, + "grad_norm": 1.734412256759482, + "learning_rate": 1.811543722478042e-05, + "loss": 0.7852121591567993, + "step": 1356 + }, + { + "epoch": 0.4782378854625551, + "grad_norm": 1.3650060645350073, + "learning_rate": 1.811203153277641e-05, + "loss": 0.6704862713813782, + "step": 1357 + }, + { + "epoch": 0.47859030837004407, + "grad_norm": 1.6553040991032588, + "learning_rate": 1.8108623086990156e-05, + "loss": 0.5964453220367432, + "step": 1358 + }, + { + "epoch": 0.47894273127753306, + "grad_norm": 1.3936312619950861, + "learning_rate": 1.8105211888578708e-05, + "loss": 0.6697995066642761, + "step": 1359 + }, + { + "epoch": 0.479295154185022, + "grad_norm": 1.5031130965144783, + "learning_rate": 1.810179793870007e-05, + "loss": 0.6335821151733398, + "step": 1360 + }, + { + "epoch": 0.479647577092511, + "grad_norm": 1.5635708705560234, + "learning_rate": 1.8098381238513173e-05, + "loss": 0.7925145626068115, + "step": 1361 + }, + { + "epoch": 0.48, + "grad_norm": 1.3011545804458011, + "learning_rate": 1.809496178917787e-05, + "loss": 0.6567563414573669, + "step": 1362 + }, + { + "epoch": 0.480352422907489, + "grad_norm": 1.6816341182204335, + "learning_rate": 1.809153959185497e-05, + "loss": 0.6318811178207397, + "step": 1363 + }, + { + "epoch": 0.48070484581497797, + "grad_norm": 1.6781349693525882, + "learning_rate": 1.8088114647706195e-05, + "loss": 0.7309727668762207, + "step": 1364 + }, + { + "epoch": 0.48105726872246696, + "grad_norm": 1.689289351270497, + "learning_rate": 1.8084686957894207e-05, + "loss": 0.7109836339950562, + "step": 1365 + }, + { + "epoch": 0.48140969162995595, + "grad_norm": 1.5638040238741844, + "learning_rate": 1.8081256523582604e-05, + "loss": 0.7475707530975342, + "step": 1366 + }, + { + "epoch": 0.48176211453744494, + "grad_norm": 1.492251829838995, + "learning_rate": 1.8077823345935904e-05, + "loss": 0.7149914503097534, + "step": 1367 + }, + { + "epoch": 0.48211453744493393, + "grad_norm": 1.5575297411632822, + "learning_rate": 1.8074387426119574e-05, + "loss": 0.7294478416442871, + "step": 1368 + }, + { + "epoch": 0.4824669603524229, + "grad_norm": 1.4689289799329066, + "learning_rate": 1.8070948765299995e-05, + "loss": 0.7115635871887207, + "step": 1369 + }, + { + "epoch": 0.4828193832599119, + "grad_norm": 1.5506146763507274, + "learning_rate": 1.806750736464449e-05, + "loss": 0.7046270966529846, + "step": 1370 + }, + { + "epoch": 0.4831718061674009, + "grad_norm": 1.42427078791196, + "learning_rate": 1.8064063225321305e-05, + "loss": 0.6206589937210083, + "step": 1371 + }, + { + "epoch": 0.48352422907488984, + "grad_norm": 1.2993997688945442, + "learning_rate": 1.8060616348499612e-05, + "loss": 0.7135940194129944, + "step": 1372 + }, + { + "epoch": 0.48387665198237884, + "grad_norm": 1.4643276104475023, + "learning_rate": 1.8057166735349533e-05, + "loss": 0.8360849618911743, + "step": 1373 + }, + { + "epoch": 0.4842290748898678, + "grad_norm": 1.541707883618089, + "learning_rate": 1.805371438704209e-05, + "loss": 0.6842360496520996, + "step": 1374 + }, + { + "epoch": 0.4845814977973568, + "grad_norm": 1.5840103891509227, + "learning_rate": 1.8050259304749254e-05, + "loss": 0.7615031003952026, + "step": 1375 + }, + { + "epoch": 0.4849339207048458, + "grad_norm": 1.655426139564667, + "learning_rate": 1.804680148964392e-05, + "loss": 0.8019323348999023, + "step": 1376 + }, + { + "epoch": 0.4852863436123348, + "grad_norm": 1.4674909380859245, + "learning_rate": 1.8043340942899906e-05, + "loss": 0.7882958054542542, + "step": 1377 + }, + { + "epoch": 0.4856387665198238, + "grad_norm": 1.2708485815687132, + "learning_rate": 1.8039877665691955e-05, + "loss": 0.7504314184188843, + "step": 1378 + }, + { + "epoch": 0.4859911894273128, + "grad_norm": 1.4258268452315883, + "learning_rate": 1.803641165919575e-05, + "loss": 0.6634547710418701, + "step": 1379 + }, + { + "epoch": 0.4863436123348018, + "grad_norm": 1.5654330696713128, + "learning_rate": 1.803294292458789e-05, + "loss": 0.7744965553283691, + "step": 1380 + }, + { + "epoch": 0.48669603524229077, + "grad_norm": 1.4537322254817193, + "learning_rate": 1.8029471463045904e-05, + "loss": 0.6322098970413208, + "step": 1381 + }, + { + "epoch": 0.48704845814977976, + "grad_norm": 1.4410033770501562, + "learning_rate": 1.8025997275748237e-05, + "loss": 0.7675940990447998, + "step": 1382 + }, + { + "epoch": 0.4874008810572687, + "grad_norm": 1.4138967124963124, + "learning_rate": 1.8022520363874275e-05, + "loss": 0.805001974105835, + "step": 1383 + }, + { + "epoch": 0.4877533039647577, + "grad_norm": 1.4447058519334661, + "learning_rate": 1.8019040728604322e-05, + "loss": 0.7647902369499207, + "step": 1384 + }, + { + "epoch": 0.4881057268722467, + "grad_norm": 1.4676048165311881, + "learning_rate": 1.8015558371119604e-05, + "loss": 0.7267208099365234, + "step": 1385 + }, + { + "epoch": 0.4884581497797357, + "grad_norm": 1.698344162431053, + "learning_rate": 1.801207329260227e-05, + "loss": 0.9259899854660034, + "step": 1386 + }, + { + "epoch": 0.48881057268722466, + "grad_norm": 1.6007666753359713, + "learning_rate": 1.8008585494235398e-05, + "loss": 0.7127895951271057, + "step": 1387 + }, + { + "epoch": 0.48916299559471366, + "grad_norm": 1.3612199688450533, + "learning_rate": 1.8005094977202987e-05, + "loss": 0.5890867710113525, + "step": 1388 + }, + { + "epoch": 0.48951541850220265, + "grad_norm": 1.355680060820382, + "learning_rate": 1.800160174268996e-05, + "loss": 0.9388052225112915, + "step": 1389 + }, + { + "epoch": 0.48986784140969164, + "grad_norm": 1.3938222391852138, + "learning_rate": 1.799810579188216e-05, + "loss": 0.7282747626304626, + "step": 1390 + }, + { + "epoch": 0.49022026431718063, + "grad_norm": 1.3481077360000804, + "learning_rate": 1.7994607125966354e-05, + "loss": 0.743558943271637, + "step": 1391 + }, + { + "epoch": 0.4905726872246696, + "grad_norm": 1.5830453320245632, + "learning_rate": 1.7991105746130234e-05, + "loss": 0.794719934463501, + "step": 1392 + }, + { + "epoch": 0.4909251101321586, + "grad_norm": 1.2758935421604947, + "learning_rate": 1.7987601653562402e-05, + "loss": 0.7320685982704163, + "step": 1393 + }, + { + "epoch": 0.49127753303964755, + "grad_norm": 1.7642547814838838, + "learning_rate": 1.798409484945239e-05, + "loss": 0.7376105785369873, + "step": 1394 + }, + { + "epoch": 0.49162995594713654, + "grad_norm": 1.2029848235346605, + "learning_rate": 1.7980585334990652e-05, + "loss": 0.7474706172943115, + "step": 1395 + }, + { + "epoch": 0.49198237885462553, + "grad_norm": 1.2018884579546327, + "learning_rate": 1.797707311136856e-05, + "loss": 0.5799805521965027, + "step": 1396 + }, + { + "epoch": 0.4923348017621145, + "grad_norm": 1.4260726798049534, + "learning_rate": 1.79735581797784e-05, + "loss": 0.7515959739685059, + "step": 1397 + }, + { + "epoch": 0.4926872246696035, + "grad_norm": 1.4843732287701579, + "learning_rate": 1.797004054141339e-05, + "loss": 0.6035799980163574, + "step": 1398 + }, + { + "epoch": 0.4930396475770925, + "grad_norm": 1.4699634461145672, + "learning_rate": 1.796652019746765e-05, + "loss": 0.7613668441772461, + "step": 1399 + }, + { + "epoch": 0.4933920704845815, + "grad_norm": 1.5395256627563776, + "learning_rate": 1.7962997149136226e-05, + "loss": 0.8780882954597473, + "step": 1400 + }, + { + "epoch": 0.4937444933920705, + "grad_norm": 1.4849311758521768, + "learning_rate": 1.795947139761509e-05, + "loss": 0.8661091327667236, + "step": 1401 + }, + { + "epoch": 0.4940969162995595, + "grad_norm": 1.2531714361223334, + "learning_rate": 1.7955942944101124e-05, + "loss": 0.6893571019172668, + "step": 1402 + }, + { + "epoch": 0.4944493392070485, + "grad_norm": 1.4079915487364913, + "learning_rate": 1.7952411789792125e-05, + "loss": 0.787032961845398, + "step": 1403 + }, + { + "epoch": 0.49480176211453747, + "grad_norm": 1.3474472991478739, + "learning_rate": 1.7948877935886812e-05, + "loss": 0.5346347689628601, + "step": 1404 + }, + { + "epoch": 0.4951541850220264, + "grad_norm": 1.5512557601329955, + "learning_rate": 1.7945341383584818e-05, + "loss": 0.8090060949325562, + "step": 1405 + }, + { + "epoch": 0.4955066079295154, + "grad_norm": 1.4268796756971738, + "learning_rate": 1.7941802134086695e-05, + "loss": 0.6321496963500977, + "step": 1406 + }, + { + "epoch": 0.4958590308370044, + "grad_norm": 1.4602718850691796, + "learning_rate": 1.7938260188593903e-05, + "loss": 0.6405632495880127, + "step": 1407 + }, + { + "epoch": 0.4962114537444934, + "grad_norm": 1.3838752085896924, + "learning_rate": 1.7934715548308825e-05, + "loss": 0.7665356397628784, + "step": 1408 + }, + { + "epoch": 0.49656387665198237, + "grad_norm": 1.6983169415711221, + "learning_rate": 1.7931168214434757e-05, + "loss": 0.7960416078567505, + "step": 1409 + }, + { + "epoch": 0.49691629955947136, + "grad_norm": 1.3842449461014021, + "learning_rate": 1.7927618188175908e-05, + "loss": 0.8080639839172363, + "step": 1410 + }, + { + "epoch": 0.49726872246696036, + "grad_norm": 1.3034648934851016, + "learning_rate": 1.79240654707374e-05, + "loss": 0.6503266096115112, + "step": 1411 + }, + { + "epoch": 0.49762114537444935, + "grad_norm": 1.3378534420648176, + "learning_rate": 1.792051006332527e-05, + "loss": 0.6063007116317749, + "step": 1412 + }, + { + "epoch": 0.49797356828193834, + "grad_norm": 1.849150255820523, + "learning_rate": 1.791695196714647e-05, + "loss": 0.6861660480499268, + "step": 1413 + }, + { + "epoch": 0.49832599118942733, + "grad_norm": 1.2217791382902905, + "learning_rate": 1.791339118340886e-05, + "loss": 0.7064980268478394, + "step": 1414 + }, + { + "epoch": 0.4986784140969163, + "grad_norm": 1.4370359000865323, + "learning_rate": 1.7909827713321214e-05, + "loss": 0.6102496981620789, + "step": 1415 + }, + { + "epoch": 0.49903083700440526, + "grad_norm": 1.446734818664789, + "learning_rate": 1.790626155809323e-05, + "loss": 0.7460618019104004, + "step": 1416 + }, + { + "epoch": 0.49938325991189425, + "grad_norm": 1.2988677548719765, + "learning_rate": 1.7902692718935496e-05, + "loss": 0.7124448418617249, + "step": 1417 + }, + { + "epoch": 0.49973568281938324, + "grad_norm": 1.322744101240627, + "learning_rate": 1.7899121197059525e-05, + "loss": 0.7194923162460327, + "step": 1418 + }, + { + "epoch": 0.5000881057268722, + "grad_norm": 1.4429377947794157, + "learning_rate": 1.7895546993677736e-05, + "loss": 0.6633901596069336, + "step": 1419 + }, + { + "epoch": 0.5004405286343613, + "grad_norm": 1.5531583469807302, + "learning_rate": 1.7891970110003463e-05, + "loss": 0.8554216623306274, + "step": 1420 + }, + { + "epoch": 0.5007929515418502, + "grad_norm": 1.4541421669927512, + "learning_rate": 1.7888390547250944e-05, + "loss": 0.7259502410888672, + "step": 1421 + }, + { + "epoch": 0.5011453744493392, + "grad_norm": 1.4299229413313208, + "learning_rate": 1.788480830663533e-05, + "loss": 0.7330816984176636, + "step": 1422 + }, + { + "epoch": 0.5014977973568282, + "grad_norm": 1.5727227347094554, + "learning_rate": 1.7881223389372678e-05, + "loss": 0.7793391346931458, + "step": 1423 + }, + { + "epoch": 0.5018502202643171, + "grad_norm": 1.5101282054621992, + "learning_rate": 1.787763579667996e-05, + "loss": 0.7387483716011047, + "step": 1424 + }, + { + "epoch": 0.5022026431718062, + "grad_norm": 1.2875272836020812, + "learning_rate": 1.787404552977505e-05, + "loss": 0.6665850877761841, + "step": 1425 + }, + { + "epoch": 0.5025550660792951, + "grad_norm": 1.6443234538305773, + "learning_rate": 1.7870452589876733e-05, + "loss": 0.7487791180610657, + "step": 1426 + }, + { + "epoch": 0.5029074889867842, + "grad_norm": 1.5494170755115177, + "learning_rate": 1.78668569782047e-05, + "loss": 0.6048247814178467, + "step": 1427 + }, + { + "epoch": 0.5032599118942731, + "grad_norm": 1.2664597501734751, + "learning_rate": 1.786325869597955e-05, + "loss": 0.7196261882781982, + "step": 1428 + }, + { + "epoch": 0.5036123348017622, + "grad_norm": 1.8296774166979555, + "learning_rate": 1.785965774442278e-05, + "loss": 0.6845135688781738, + "step": 1429 + }, + { + "epoch": 0.5039647577092511, + "grad_norm": 1.4157663102240734, + "learning_rate": 1.785605412475681e-05, + "loss": 0.7314398288726807, + "step": 1430 + }, + { + "epoch": 0.5043171806167401, + "grad_norm": 1.4666969447710358, + "learning_rate": 1.7852447838204957e-05, + "loss": 0.7171268463134766, + "step": 1431 + }, + { + "epoch": 0.5046696035242291, + "grad_norm": 1.33657009662446, + "learning_rate": 1.784883888599144e-05, + "loss": 0.8349916338920593, + "step": 1432 + }, + { + "epoch": 0.505022026431718, + "grad_norm": 1.3501942895276628, + "learning_rate": 1.7845227269341387e-05, + "loss": 0.6375530958175659, + "step": 1433 + }, + { + "epoch": 0.505374449339207, + "grad_norm": 1.42707048545369, + "learning_rate": 1.7841612989480824e-05, + "loss": 0.8156824707984924, + "step": 1434 + }, + { + "epoch": 0.505726872246696, + "grad_norm": 1.4408580248696123, + "learning_rate": 1.7837996047636696e-05, + "loss": 0.7186283469200134, + "step": 1435 + }, + { + "epoch": 0.506079295154185, + "grad_norm": 1.3439268630529597, + "learning_rate": 1.7834376445036834e-05, + "loss": 0.6130756139755249, + "step": 1436 + }, + { + "epoch": 0.506431718061674, + "grad_norm": 1.5285659623162418, + "learning_rate": 1.7830754182909985e-05, + "loss": 0.6948508024215698, + "step": 1437 + }, + { + "epoch": 0.506784140969163, + "grad_norm": 1.3759453423428971, + "learning_rate": 1.7827129262485793e-05, + "loss": 0.7049688100814819, + "step": 1438 + }, + { + "epoch": 0.507136563876652, + "grad_norm": 1.457151343686531, + "learning_rate": 1.7823501684994805e-05, + "loss": 0.7491527795791626, + "step": 1439 + }, + { + "epoch": 0.507488986784141, + "grad_norm": 1.6101324796455516, + "learning_rate": 1.781987145166847e-05, + "loss": 0.8718780279159546, + "step": 1440 + }, + { + "epoch": 0.5078414096916299, + "grad_norm": 1.2572878912363772, + "learning_rate": 1.7816238563739144e-05, + "loss": 0.5675592422485352, + "step": 1441 + }, + { + "epoch": 0.508193832599119, + "grad_norm": 1.4044509323540495, + "learning_rate": 1.7812603022440076e-05, + "loss": 0.7472085952758789, + "step": 1442 + }, + { + "epoch": 0.5085462555066079, + "grad_norm": 1.3371129648202849, + "learning_rate": 1.7808964829005416e-05, + "loss": 0.7673810720443726, + "step": 1443 + }, + { + "epoch": 0.5088986784140969, + "grad_norm": 1.5506550607349072, + "learning_rate": 1.7805323984670224e-05, + "loss": 0.8245630264282227, + "step": 1444 + }, + { + "epoch": 0.5092511013215859, + "grad_norm": 1.488734758513416, + "learning_rate": 1.780168049067045e-05, + "loss": 0.8578429222106934, + "step": 1445 + }, + { + "epoch": 0.5096035242290748, + "grad_norm": 1.3892444083620181, + "learning_rate": 1.7798034348242944e-05, + "loss": 0.6631708145141602, + "step": 1446 + }, + { + "epoch": 0.5099559471365639, + "grad_norm": 1.3121030116229568, + "learning_rate": 1.779438555862546e-05, + "loss": 0.8106615543365479, + "step": 1447 + }, + { + "epoch": 0.5103083700440528, + "grad_norm": 1.1486822439059632, + "learning_rate": 1.7790734123056654e-05, + "loss": 0.7033256888389587, + "step": 1448 + }, + { + "epoch": 0.5106607929515419, + "grad_norm": 1.2259259255559172, + "learning_rate": 1.7787080042776065e-05, + "loss": 0.7124278545379639, + "step": 1449 + }, + { + "epoch": 0.5110132158590308, + "grad_norm": 1.4546377837760451, + "learning_rate": 1.7783423319024144e-05, + "loss": 0.7834827899932861, + "step": 1450 + }, + { + "epoch": 0.5113656387665199, + "grad_norm": 1.4580618513432573, + "learning_rate": 1.777976395304224e-05, + "loss": 0.6762892603874207, + "step": 1451 + }, + { + "epoch": 0.5117180616740088, + "grad_norm": 1.4220157860300873, + "learning_rate": 1.7776101946072586e-05, + "loss": 0.7317261695861816, + "step": 1452 + }, + { + "epoch": 0.5120704845814978, + "grad_norm": 1.3265767127223091, + "learning_rate": 1.7772437299358324e-05, + "loss": 0.6278417110443115, + "step": 1453 + }, + { + "epoch": 0.5124229074889868, + "grad_norm": 1.319373459720871, + "learning_rate": 1.7768770014143485e-05, + "loss": 0.6638025045394897, + "step": 1454 + }, + { + "epoch": 0.5127753303964758, + "grad_norm": 1.400198364176684, + "learning_rate": 1.7765100091673e-05, + "loss": 0.786564826965332, + "step": 1455 + }, + { + "epoch": 0.5131277533039648, + "grad_norm": 1.4579007616104753, + "learning_rate": 1.776142753319269e-05, + "loss": 0.7483570575714111, + "step": 1456 + }, + { + "epoch": 0.5134801762114537, + "grad_norm": 1.5755517235246568, + "learning_rate": 1.7757752339949284e-05, + "loss": 0.7036221027374268, + "step": 1457 + }, + { + "epoch": 0.5138325991189427, + "grad_norm": 1.4840022330643747, + "learning_rate": 1.7754074513190384e-05, + "loss": 0.6903718709945679, + "step": 1458 + }, + { + "epoch": 0.5141850220264317, + "grad_norm": 1.2882226376562813, + "learning_rate": 1.77503940541645e-05, + "loss": 0.7728221416473389, + "step": 1459 + }, + { + "epoch": 0.5145374449339207, + "grad_norm": 1.327669814898394, + "learning_rate": 1.774671096412104e-05, + "loss": 0.7127183675765991, + "step": 1460 + }, + { + "epoch": 0.5148898678414097, + "grad_norm": 1.6330052955229915, + "learning_rate": 1.7743025244310293e-05, + "loss": 0.7801295518875122, + "step": 1461 + }, + { + "epoch": 0.5152422907488987, + "grad_norm": 1.1623220195345323, + "learning_rate": 1.773933689598345e-05, + "loss": 0.632892906665802, + "step": 1462 + }, + { + "epoch": 0.5155947136563876, + "grad_norm": 1.2497961025206838, + "learning_rate": 1.7735645920392587e-05, + "loss": 0.7347458600997925, + "step": 1463 + }, + { + "epoch": 0.5159471365638767, + "grad_norm": 1.5115996209276181, + "learning_rate": 1.7731952318790673e-05, + "loss": 0.6705365777015686, + "step": 1464 + }, + { + "epoch": 0.5162995594713656, + "grad_norm": 1.4475904564128834, + "learning_rate": 1.7728256092431577e-05, + "loss": 0.696006715297699, + "step": 1465 + }, + { + "epoch": 0.5166519823788547, + "grad_norm": 1.3978951424570836, + "learning_rate": 1.7724557242570045e-05, + "loss": 0.5922254323959351, + "step": 1466 + }, + { + "epoch": 0.5170044052863436, + "grad_norm": 1.2709448074189098, + "learning_rate": 1.7720855770461733e-05, + "loss": 0.6162985563278198, + "step": 1467 + }, + { + "epoch": 0.5173568281938326, + "grad_norm": 1.432801158502027, + "learning_rate": 1.7717151677363164e-05, + "loss": 0.7319275140762329, + "step": 1468 + }, + { + "epoch": 0.5177092511013216, + "grad_norm": 1.3993642551309304, + "learning_rate": 1.771344496453177e-05, + "loss": 0.7349969148635864, + "step": 1469 + }, + { + "epoch": 0.5180616740088105, + "grad_norm": 1.287285806622758, + "learning_rate": 1.7709735633225863e-05, + "loss": 0.8153162598609924, + "step": 1470 + }, + { + "epoch": 0.5184140969162996, + "grad_norm": 1.3919169087311665, + "learning_rate": 1.7706023684704642e-05, + "loss": 0.6582974195480347, + "step": 1471 + }, + { + "epoch": 0.5187665198237885, + "grad_norm": 1.704010679983685, + "learning_rate": 1.77023091202282e-05, + "loss": 0.696917712688446, + "step": 1472 + }, + { + "epoch": 0.5191189427312776, + "grad_norm": 1.4310702415713368, + "learning_rate": 1.769859194105752e-05, + "loss": 0.6281285285949707, + "step": 1473 + }, + { + "epoch": 0.5194713656387665, + "grad_norm": 1.6327610148964462, + "learning_rate": 1.7694872148454463e-05, + "loss": 0.7687089443206787, + "step": 1474 + }, + { + "epoch": 0.5198237885462555, + "grad_norm": 1.386403275153257, + "learning_rate": 1.7691149743681783e-05, + "loss": 0.6928491592407227, + "step": 1475 + }, + { + "epoch": 0.5201762114537445, + "grad_norm": 1.3972840676283895, + "learning_rate": 1.7687424728003126e-05, + "loss": 0.63843834400177, + "step": 1476 + }, + { + "epoch": 0.5205286343612335, + "grad_norm": 1.7893361899671325, + "learning_rate": 1.7683697102683012e-05, + "loss": 0.8987904787063599, + "step": 1477 + }, + { + "epoch": 0.5208810572687225, + "grad_norm": 1.3513150269139367, + "learning_rate": 1.767996686898686e-05, + "loss": 0.7027539014816284, + "step": 1478 + }, + { + "epoch": 0.5212334801762114, + "grad_norm": 1.5924485741299983, + "learning_rate": 1.7676234028180964e-05, + "loss": 0.8490183353424072, + "step": 1479 + }, + { + "epoch": 0.5215859030837005, + "grad_norm": 1.251712414046886, + "learning_rate": 1.7672498581532508e-05, + "loss": 0.5885729789733887, + "step": 1480 + }, + { + "epoch": 0.5219383259911894, + "grad_norm": 1.289415742432068, + "learning_rate": 1.766876053030956e-05, + "loss": 0.627627968788147, + "step": 1481 + }, + { + "epoch": 0.5222907488986784, + "grad_norm": 1.2948972408498374, + "learning_rate": 1.766501987578108e-05, + "loss": 0.6441413164138794, + "step": 1482 + }, + { + "epoch": 0.5226431718061674, + "grad_norm": 1.3508329997529829, + "learning_rate": 1.7661276619216888e-05, + "loss": 0.6199722290039062, + "step": 1483 + }, + { + "epoch": 0.5229955947136564, + "grad_norm": 1.2931208995237342, + "learning_rate": 1.7657530761887715e-05, + "loss": 0.6364887952804565, + "step": 1484 + }, + { + "epoch": 0.5233480176211454, + "grad_norm": 1.281527242811407, + "learning_rate": 1.7653782305065158e-05, + "loss": 0.7279890775680542, + "step": 1485 + }, + { + "epoch": 0.5237004405286344, + "grad_norm": 1.5228486275670003, + "learning_rate": 1.7650031250021704e-05, + "loss": 0.6552719473838806, + "step": 1486 + }, + { + "epoch": 0.5240528634361233, + "grad_norm": 1.4461703633182712, + "learning_rate": 1.7646277598030717e-05, + "loss": 0.6778907775878906, + "step": 1487 + }, + { + "epoch": 0.5244052863436124, + "grad_norm": 1.3941119820637071, + "learning_rate": 1.7642521350366447e-05, + "loss": 0.6581870317459106, + "step": 1488 + }, + { + "epoch": 0.5247577092511013, + "grad_norm": 1.6198722329881745, + "learning_rate": 1.7638762508304025e-05, + "loss": 0.8529388904571533, + "step": 1489 + }, + { + "epoch": 0.5251101321585903, + "grad_norm": 1.659639253256808, + "learning_rate": 1.7635001073119458e-05, + "loss": 0.6601512432098389, + "step": 1490 + }, + { + "epoch": 0.5254625550660793, + "grad_norm": 1.5073764890239725, + "learning_rate": 1.7631237046089637e-05, + "loss": 0.6347510814666748, + "step": 1491 + }, + { + "epoch": 0.5258149779735682, + "grad_norm": 1.3256786239827452, + "learning_rate": 1.762747042849233e-05, + "loss": 0.700560986995697, + "step": 1492 + }, + { + "epoch": 0.5261674008810573, + "grad_norm": 1.4060287838972836, + "learning_rate": 1.762370122160619e-05, + "loss": 0.5641219019889832, + "step": 1493 + }, + { + "epoch": 0.5265198237885462, + "grad_norm": 1.3124518756577959, + "learning_rate": 1.761992942671074e-05, + "loss": 0.8017370700836182, + "step": 1494 + }, + { + "epoch": 0.5268722466960353, + "grad_norm": 1.334442798992846, + "learning_rate": 1.7616155045086394e-05, + "loss": 0.6345353126525879, + "step": 1495 + }, + { + "epoch": 0.5272246696035242, + "grad_norm": 1.6841165394853315, + "learning_rate": 1.7612378078014432e-05, + "loss": 0.7118426561355591, + "step": 1496 + }, + { + "epoch": 0.5275770925110133, + "grad_norm": 1.6748084277774182, + "learning_rate": 1.7608598526777017e-05, + "loss": 0.6186550855636597, + "step": 1497 + }, + { + "epoch": 0.5279295154185022, + "grad_norm": 1.4676540893387768, + "learning_rate": 1.7604816392657195e-05, + "loss": 0.8351110219955444, + "step": 1498 + }, + { + "epoch": 0.5282819383259912, + "grad_norm": 1.3183866002309903, + "learning_rate": 1.7601031676938877e-05, + "loss": 0.638684868812561, + "step": 1499 + }, + { + "epoch": 0.5286343612334802, + "grad_norm": 1.291067085285626, + "learning_rate": 1.7597244380906856e-05, + "loss": 0.5118356943130493, + "step": 1500 + }, + { + "epoch": 0.5289867841409691, + "grad_norm": 1.2880504132355877, + "learning_rate": 1.7593454505846807e-05, + "loss": 0.637636125087738, + "step": 1501 + }, + { + "epoch": 0.5293392070484582, + "grad_norm": 1.3905967147162603, + "learning_rate": 1.7589662053045264e-05, + "loss": 0.8412563800811768, + "step": 1502 + }, + { + "epoch": 0.5296916299559471, + "grad_norm": 1.6432072453017084, + "learning_rate": 1.758586702378966e-05, + "loss": 0.7940464019775391, + "step": 1503 + }, + { + "epoch": 0.5300440528634361, + "grad_norm": 1.4898667206132308, + "learning_rate": 1.7582069419368276e-05, + "loss": 0.7136783599853516, + "step": 1504 + }, + { + "epoch": 0.5303964757709251, + "grad_norm": 1.5677232979916986, + "learning_rate": 1.757826924107029e-05, + "loss": 0.7212727069854736, + "step": 1505 + }, + { + "epoch": 0.5307488986784141, + "grad_norm": 2.968905268892082, + "learning_rate": 1.757446649018574e-05, + "loss": 0.7026032209396362, + "step": 1506 + }, + { + "epoch": 0.5311013215859031, + "grad_norm": 1.3050484980835664, + "learning_rate": 1.7570661168005544e-05, + "loss": 0.541954755783081, + "step": 1507 + }, + { + "epoch": 0.5314537444933921, + "grad_norm": 1.3422366313712581, + "learning_rate": 1.7566853275821488e-05, + "loss": 0.6927075386047363, + "step": 1508 + }, + { + "epoch": 0.531806167400881, + "grad_norm": 1.4069640098530838, + "learning_rate": 1.7563042814926237e-05, + "loss": 0.6556441783905029, + "step": 1509 + }, + { + "epoch": 0.5321585903083701, + "grad_norm": 1.710258111864569, + "learning_rate": 1.7559229786613317e-05, + "loss": 0.6895149946212769, + "step": 1510 + }, + { + "epoch": 0.532511013215859, + "grad_norm": 1.3737730722509855, + "learning_rate": 1.755541419217714e-05, + "loss": 0.6178139448165894, + "step": 1511 + }, + { + "epoch": 0.532863436123348, + "grad_norm": 1.5162737493672722, + "learning_rate": 1.7551596032912975e-05, + "loss": 0.7645368576049805, + "step": 1512 + }, + { + "epoch": 0.533215859030837, + "grad_norm": 1.3652252290806937, + "learning_rate": 1.7547775310116973e-05, + "loss": 0.8247367143630981, + "step": 1513 + }, + { + "epoch": 0.533568281938326, + "grad_norm": 1.2941657542151124, + "learning_rate": 1.7543952025086147e-05, + "loss": 0.535837709903717, + "step": 1514 + }, + { + "epoch": 0.533920704845815, + "grad_norm": 1.3232982615818571, + "learning_rate": 1.7540126179118387e-05, + "loss": 0.51450514793396, + "step": 1515 + }, + { + "epoch": 0.5342731277533039, + "grad_norm": 1.3863880461090508, + "learning_rate": 1.7536297773512444e-05, + "loss": 0.7962276935577393, + "step": 1516 + }, + { + "epoch": 0.534625550660793, + "grad_norm": 1.4799750544282257, + "learning_rate": 1.753246680956795e-05, + "loss": 0.7586444616317749, + "step": 1517 + }, + { + "epoch": 0.5349779735682819, + "grad_norm": 1.4967875396536634, + "learning_rate": 1.752863328858539e-05, + "loss": 0.7074990272521973, + "step": 1518 + }, + { + "epoch": 0.535330396475771, + "grad_norm": 1.3158842776684478, + "learning_rate": 1.7524797211866126e-05, + "loss": 0.7409921884536743, + "step": 1519 + }, + { + "epoch": 0.5356828193832599, + "grad_norm": 1.3752676962962187, + "learning_rate": 1.7520958580712394e-05, + "loss": 0.7889251708984375, + "step": 1520 + }, + { + "epoch": 0.536035242290749, + "grad_norm": 2.0871001645404776, + "learning_rate": 1.751711739642728e-05, + "loss": 0.8244975209236145, + "step": 1521 + }, + { + "epoch": 0.5363876651982379, + "grad_norm": 1.441127888748836, + "learning_rate": 1.7513273660314753e-05, + "loss": 0.6573888659477234, + "step": 1522 + }, + { + "epoch": 0.5367400881057268, + "grad_norm": 1.3793459185222714, + "learning_rate": 1.7509427373679643e-05, + "loss": 0.71816086769104, + "step": 1523 + }, + { + "epoch": 0.5370925110132159, + "grad_norm": 1.5200593368820163, + "learning_rate": 1.750557853782764e-05, + "loss": 0.7681000232696533, + "step": 1524 + }, + { + "epoch": 0.5374449339207048, + "grad_norm": 1.443442982592023, + "learning_rate": 1.7501727154065304e-05, + "loss": 0.6777461767196655, + "step": 1525 + }, + { + "epoch": 0.5377973568281939, + "grad_norm": 1.5538840121485165, + "learning_rate": 1.7497873223700063e-05, + "loss": 0.714499831199646, + "step": 1526 + }, + { + "epoch": 0.5381497797356828, + "grad_norm": 1.6085901588908644, + "learning_rate": 1.7494016748040206e-05, + "loss": 0.6587036848068237, + "step": 1527 + }, + { + "epoch": 0.5385022026431718, + "grad_norm": 1.5748960305246453, + "learning_rate": 1.7490157728394887e-05, + "loss": 0.7256105542182922, + "step": 1528 + }, + { + "epoch": 0.5388546255506608, + "grad_norm": 1.7818844853131433, + "learning_rate": 1.7486296166074116e-05, + "loss": 0.6551185846328735, + "step": 1529 + }, + { + "epoch": 0.5392070484581498, + "grad_norm": 1.5961201900224617, + "learning_rate": 1.7482432062388782e-05, + "loss": 0.710479736328125, + "step": 1530 + }, + { + "epoch": 0.5395594713656388, + "grad_norm": 1.3063302832130508, + "learning_rate": 1.7478565418650625e-05, + "loss": 0.7882527709007263, + "step": 1531 + }, + { + "epoch": 0.5399118942731278, + "grad_norm": 1.4227949027781848, + "learning_rate": 1.7474696236172247e-05, + "loss": 0.6163671612739563, + "step": 1532 + }, + { + "epoch": 0.5402643171806167, + "grad_norm": 1.3516530648193832, + "learning_rate": 1.7470824516267125e-05, + "loss": 0.6923140287399292, + "step": 1533 + }, + { + "epoch": 0.5406167400881057, + "grad_norm": 1.259724627030556, + "learning_rate": 1.7466950260249573e-05, + "loss": 0.6473938226699829, + "step": 1534 + }, + { + "epoch": 0.5409691629955947, + "grad_norm": 1.7187178399062975, + "learning_rate": 1.7463073469434792e-05, + "loss": 0.631247878074646, + "step": 1535 + }, + { + "epoch": 0.5413215859030837, + "grad_norm": 1.8932273669088504, + "learning_rate": 1.745919414513883e-05, + "loss": 0.8113377690315247, + "step": 1536 + }, + { + "epoch": 0.5416740088105727, + "grad_norm": 1.4958838672098407, + "learning_rate": 1.7455312288678588e-05, + "loss": 0.7950010299682617, + "step": 1537 + }, + { + "epoch": 0.5420264317180616, + "grad_norm": 1.5066443226404773, + "learning_rate": 1.7451427901371843e-05, + "loss": 0.7279125452041626, + "step": 1538 + }, + { + "epoch": 0.5423788546255507, + "grad_norm": 1.304013044677209, + "learning_rate": 1.7447540984537225e-05, + "loss": 0.6746084690093994, + "step": 1539 + }, + { + "epoch": 0.5427312775330396, + "grad_norm": 1.2714028280363416, + "learning_rate": 1.744365153949422e-05, + "loss": 0.5818569660186768, + "step": 1540 + }, + { + "epoch": 0.5430837004405287, + "grad_norm": 1.3192138998270364, + "learning_rate": 1.743975956756317e-05, + "loss": 0.7408111095428467, + "step": 1541 + }, + { + "epoch": 0.5434361233480176, + "grad_norm": 1.4913068245941434, + "learning_rate": 1.7435865070065282e-05, + "loss": 0.6842402815818787, + "step": 1542 + }, + { + "epoch": 0.5437885462555067, + "grad_norm": 1.3316441616353596, + "learning_rate": 1.7431968048322617e-05, + "loss": 0.6179615259170532, + "step": 1543 + }, + { + "epoch": 0.5441409691629956, + "grad_norm": 1.3347896582759051, + "learning_rate": 1.7428068503658085e-05, + "loss": 0.5943140983581543, + "step": 1544 + }, + { + "epoch": 0.5444933920704845, + "grad_norm": 1.797026236227536, + "learning_rate": 1.742416643739547e-05, + "loss": 0.7901419401168823, + "step": 1545 + }, + { + "epoch": 0.5448458149779736, + "grad_norm": 1.4636864955433957, + "learning_rate": 1.74202618508594e-05, + "loss": 0.7630521655082703, + "step": 1546 + }, + { + "epoch": 0.5451982378854625, + "grad_norm": 1.5322711938826543, + "learning_rate": 1.7416354745375355e-05, + "loss": 0.7662566900253296, + "step": 1547 + }, + { + "epoch": 0.5455506607929516, + "grad_norm": 1.52556111103991, + "learning_rate": 1.7412445122269683e-05, + "loss": 0.5758601427078247, + "step": 1548 + }, + { + "epoch": 0.5459030837004405, + "grad_norm": 1.3681670353760105, + "learning_rate": 1.7408532982869573e-05, + "loss": 0.753425121307373, + "step": 1549 + }, + { + "epoch": 0.5462555066079295, + "grad_norm": 1.7147625296386437, + "learning_rate": 1.7404618328503082e-05, + "loss": 0.6954981088638306, + "step": 1550 + }, + { + "epoch": 0.5466079295154185, + "grad_norm": 1.7209819459128521, + "learning_rate": 1.7400701160499107e-05, + "loss": 0.7608321905136108, + "step": 1551 + }, + { + "epoch": 0.5469603524229075, + "grad_norm": 1.3289181862839086, + "learning_rate": 1.7396781480187403e-05, + "loss": 0.679731011390686, + "step": 1552 + }, + { + "epoch": 0.5473127753303965, + "grad_norm": 1.547015119464835, + "learning_rate": 1.7392859288898586e-05, + "loss": 0.7101309299468994, + "step": 1553 + }, + { + "epoch": 0.5476651982378855, + "grad_norm": 1.3483315531721025, + "learning_rate": 1.7388934587964114e-05, + "loss": 0.7975757122039795, + "step": 1554 + }, + { + "epoch": 0.5480176211453744, + "grad_norm": 1.612241763199232, + "learning_rate": 1.73850073787163e-05, + "loss": 0.9132372140884399, + "step": 1555 + }, + { + "epoch": 0.5483700440528634, + "grad_norm": 1.2910456692590726, + "learning_rate": 1.7381077662488313e-05, + "loss": 0.7375202178955078, + "step": 1556 + }, + { + "epoch": 0.5487224669603524, + "grad_norm": 1.387618503105513, + "learning_rate": 1.7377145440614165e-05, + "loss": 0.7066434025764465, + "step": 1557 + }, + { + "epoch": 0.5490748898678414, + "grad_norm": 1.3715154650071018, + "learning_rate": 1.737321071442873e-05, + "loss": 0.8217945098876953, + "step": 1558 + }, + { + "epoch": 0.5494273127753304, + "grad_norm": 1.629646959297134, + "learning_rate": 1.7369273485267716e-05, + "loss": 0.6946529150009155, + "step": 1559 + }, + { + "epoch": 0.5497797356828193, + "grad_norm": 1.525535566210846, + "learning_rate": 1.7365333754467694e-05, + "loss": 0.7671442627906799, + "step": 1560 + }, + { + "epoch": 0.5501321585903084, + "grad_norm": 1.3988607690634036, + "learning_rate": 1.736139152336608e-05, + "loss": 0.7044692039489746, + "step": 1561 + }, + { + "epoch": 0.5504845814977973, + "grad_norm": 1.368099127753493, + "learning_rate": 1.735744679330114e-05, + "loss": 0.6654937267303467, + "step": 1562 + }, + { + "epoch": 0.5508370044052864, + "grad_norm": 1.2951614076743367, + "learning_rate": 1.7353499565611986e-05, + "loss": 0.6683400869369507, + "step": 1563 + }, + { + "epoch": 0.5511894273127753, + "grad_norm": 1.236687311626723, + "learning_rate": 1.734954984163858e-05, + "loss": 0.6360758543014526, + "step": 1564 + }, + { + "epoch": 0.5515418502202644, + "grad_norm": 1.3363599776517268, + "learning_rate": 1.7345597622721727e-05, + "loss": 0.6982365846633911, + "step": 1565 + }, + { + "epoch": 0.5518942731277533, + "grad_norm": 1.559537817461735, + "learning_rate": 1.7341642910203087e-05, + "loss": 0.8011882305145264, + "step": 1566 + }, + { + "epoch": 0.5522466960352423, + "grad_norm": 2.285492930360211, + "learning_rate": 1.7337685705425156e-05, + "loss": 0.8203347325325012, + "step": 1567 + }, + { + "epoch": 0.5525991189427313, + "grad_norm": 1.2535380811715755, + "learning_rate": 1.7333726009731288e-05, + "loss": 0.653145432472229, + "step": 1568 + }, + { + "epoch": 0.5529515418502202, + "grad_norm": 1.4482870728586805, + "learning_rate": 1.7329763824465676e-05, + "loss": 0.6527417302131653, + "step": 1569 + }, + { + "epoch": 0.5533039647577093, + "grad_norm": 1.44377376134513, + "learning_rate": 1.7325799150973353e-05, + "loss": 0.6965492963790894, + "step": 1570 + }, + { + "epoch": 0.5536563876651982, + "grad_norm": 1.64534067222521, + "learning_rate": 1.7321831990600206e-05, + "loss": 0.6779811382293701, + "step": 1571 + }, + { + "epoch": 0.5540088105726872, + "grad_norm": 1.4062562776851213, + "learning_rate": 1.731786234469296e-05, + "loss": 0.733130931854248, + "step": 1572 + }, + { + "epoch": 0.5543612334801762, + "grad_norm": 1.3711228848627866, + "learning_rate": 1.7313890214599195e-05, + "loss": 0.6482118964195251, + "step": 1573 + }, + { + "epoch": 0.5547136563876652, + "grad_norm": 1.300564860417972, + "learning_rate": 1.7309915601667312e-05, + "loss": 0.5167185068130493, + "step": 1574 + }, + { + "epoch": 0.5550660792951542, + "grad_norm": 1.5636119075248611, + "learning_rate": 1.730593850724658e-05, + "loss": 0.7172712087631226, + "step": 1575 + }, + { + "epoch": 0.5554185022026432, + "grad_norm": 1.3031139662778384, + "learning_rate": 1.730195893268709e-05, + "loss": 0.6786075830459595, + "step": 1576 + }, + { + "epoch": 0.5557709251101322, + "grad_norm": 1.272621756820605, + "learning_rate": 1.7297976879339787e-05, + "loss": 0.6823022365570068, + "step": 1577 + }, + { + "epoch": 0.5561233480176212, + "grad_norm": 1.5517603954080275, + "learning_rate": 1.7293992348556462e-05, + "loss": 0.6882521510124207, + "step": 1578 + }, + { + "epoch": 0.5564757709251101, + "grad_norm": 1.3633741314626187, + "learning_rate": 1.7290005341689726e-05, + "loss": 0.6028990745544434, + "step": 1579 + }, + { + "epoch": 0.5568281938325991, + "grad_norm": 1.6493192591020644, + "learning_rate": 1.728601586009305e-05, + "loss": 0.7759981155395508, + "step": 1580 + }, + { + "epoch": 0.5571806167400881, + "grad_norm": 1.4356044858306343, + "learning_rate": 1.7282023905120743e-05, + "loss": 0.7067322134971619, + "step": 1581 + }, + { + "epoch": 0.5575330396475771, + "grad_norm": 1.6158791701222606, + "learning_rate": 1.727802947812794e-05, + "loss": 0.7972309589385986, + "step": 1582 + }, + { + "epoch": 0.5578854625550661, + "grad_norm": 1.7662811513100274, + "learning_rate": 1.7274032580470634e-05, + "loss": 0.780463457107544, + "step": 1583 + }, + { + "epoch": 0.558237885462555, + "grad_norm": 1.4053617141185595, + "learning_rate": 1.7270033213505638e-05, + "loss": 0.647217869758606, + "step": 1584 + }, + { + "epoch": 0.5585903083700441, + "grad_norm": 1.3125952525291176, + "learning_rate": 1.7266031378590624e-05, + "loss": 0.6253752112388611, + "step": 1585 + }, + { + "epoch": 0.558942731277533, + "grad_norm": 11.7060219187992, + "learning_rate": 1.7262027077084083e-05, + "loss": 0.8427211046218872, + "step": 1586 + }, + { + "epoch": 0.5592951541850221, + "grad_norm": 1.344046568539196, + "learning_rate": 1.7258020310345348e-05, + "loss": 0.6763455867767334, + "step": 1587 + }, + { + "epoch": 0.559647577092511, + "grad_norm": 1.593422172771999, + "learning_rate": 1.72540110797346e-05, + "loss": 0.7333850264549255, + "step": 1588 + }, + { + "epoch": 0.56, + "grad_norm": 1.6040079500892586, + "learning_rate": 1.7249999386612844e-05, + "loss": 0.8572328090667725, + "step": 1589 + }, + { + "epoch": 0.560352422907489, + "grad_norm": 1.5035390542036942, + "learning_rate": 1.7245985232341923e-05, + "loss": 0.7960183620452881, + "step": 1590 + }, + { + "epoch": 0.5607048458149779, + "grad_norm": 1.484772075429922, + "learning_rate": 1.7241968618284518e-05, + "loss": 0.6750795841217041, + "step": 1591 + }, + { + "epoch": 0.561057268722467, + "grad_norm": 1.9239116239416003, + "learning_rate": 1.7237949545804145e-05, + "loss": 0.7828525304794312, + "step": 1592 + }, + { + "epoch": 0.5614096916299559, + "grad_norm": 1.4415449299886975, + "learning_rate": 1.7233928016265158e-05, + "loss": 0.7414604425430298, + "step": 1593 + }, + { + "epoch": 0.561762114537445, + "grad_norm": 1.4483242479736562, + "learning_rate": 1.7229904031032736e-05, + "loss": 0.6853663921356201, + "step": 1594 + }, + { + "epoch": 0.5621145374449339, + "grad_norm": 1.9067676423331832, + "learning_rate": 1.72258775914729e-05, + "loss": 0.7923493385314941, + "step": 1595 + }, + { + "epoch": 0.5624669603524229, + "grad_norm": 1.6239202976244251, + "learning_rate": 1.7221848698952496e-05, + "loss": 0.6776527166366577, + "step": 1596 + }, + { + "epoch": 0.5628193832599119, + "grad_norm": 1.4721879083766742, + "learning_rate": 1.721781735483921e-05, + "loss": 0.6036615371704102, + "step": 1597 + }, + { + "epoch": 0.5631718061674009, + "grad_norm": 1.271294238053108, + "learning_rate": 1.7213783560501564e-05, + "loss": 0.7175784111022949, + "step": 1598 + }, + { + "epoch": 0.5635242290748899, + "grad_norm": 1.609537856897954, + "learning_rate": 1.7209747317308897e-05, + "loss": 0.790808379650116, + "step": 1599 + }, + { + "epoch": 0.5638766519823789, + "grad_norm": 1.211639696248482, + "learning_rate": 1.7205708626631392e-05, + "loss": 0.6230301856994629, + "step": 1600 + }, + { + "epoch": 0.5642290748898678, + "grad_norm": 1.120326299832536, + "learning_rate": 1.720166748984006e-05, + "loss": 0.712124228477478, + "step": 1601 + }, + { + "epoch": 0.5645814977973568, + "grad_norm": 1.1185092917911836, + "learning_rate": 1.719762390830674e-05, + "loss": 0.543883740901947, + "step": 1602 + }, + { + "epoch": 0.5649339207048458, + "grad_norm": 1.3866183721479424, + "learning_rate": 1.71935778834041e-05, + "loss": 0.7619644999504089, + "step": 1603 + }, + { + "epoch": 0.5652863436123348, + "grad_norm": 1.3869247346305908, + "learning_rate": 1.718952941650564e-05, + "loss": 0.6447019577026367, + "step": 1604 + }, + { + "epoch": 0.5656387665198238, + "grad_norm": 1.4175373147115695, + "learning_rate": 1.718547850898569e-05, + "loss": 0.7254266738891602, + "step": 1605 + }, + { + "epoch": 0.5659911894273127, + "grad_norm": 1.3621762521360266, + "learning_rate": 1.7181425162219406e-05, + "loss": 0.632878839969635, + "step": 1606 + }, + { + "epoch": 0.5663436123348018, + "grad_norm": 1.3921274088807207, + "learning_rate": 1.7177369377582776e-05, + "loss": 0.7711806893348694, + "step": 1607 + }, + { + "epoch": 0.5666960352422907, + "grad_norm": 1.1613347832568823, + "learning_rate": 1.7173311156452607e-05, + "loss": 0.6639282703399658, + "step": 1608 + }, + { + "epoch": 0.5670484581497798, + "grad_norm": 1.4423463303361395, + "learning_rate": 1.7169250500206544e-05, + "loss": 0.6918407082557678, + "step": 1609 + }, + { + "epoch": 0.5674008810572687, + "grad_norm": 2.283192950596924, + "learning_rate": 1.716518741022305e-05, + "loss": 0.6602861881256104, + "step": 1610 + }, + { + "epoch": 0.5677533039647578, + "grad_norm": 1.401616641880741, + "learning_rate": 1.7161121887881424e-05, + "loss": 0.5853942632675171, + "step": 1611 + }, + { + "epoch": 0.5681057268722467, + "grad_norm": 1.4206445071697613, + "learning_rate": 1.7157053934561775e-05, + "loss": 0.6793895959854126, + "step": 1612 + }, + { + "epoch": 0.5684581497797356, + "grad_norm": 1.43055320760408, + "learning_rate": 1.7152983551645054e-05, + "loss": 0.7882634401321411, + "step": 1613 + }, + { + "epoch": 0.5688105726872247, + "grad_norm": 1.4602086959676452, + "learning_rate": 1.7148910740513023e-05, + "loss": 0.6530553698539734, + "step": 1614 + }, + { + "epoch": 0.5691629955947136, + "grad_norm": 1.2905537135464573, + "learning_rate": 1.714483550254828e-05, + "loss": 0.6405597925186157, + "step": 1615 + }, + { + "epoch": 0.5695154185022027, + "grad_norm": 1.4236330365126968, + "learning_rate": 1.714075783913424e-05, + "loss": 0.7356796860694885, + "step": 1616 + }, + { + "epoch": 0.5698678414096916, + "grad_norm": 1.3877607090316109, + "learning_rate": 1.7136677751655142e-05, + "loss": 0.7393465042114258, + "step": 1617 + }, + { + "epoch": 0.5702202643171806, + "grad_norm": 1.6092126006316967, + "learning_rate": 1.7132595241496045e-05, + "loss": 0.7205296158790588, + "step": 1618 + }, + { + "epoch": 0.5705726872246696, + "grad_norm": 1.291376266983401, + "learning_rate": 1.7128510310042842e-05, + "loss": 0.7359808683395386, + "step": 1619 + }, + { + "epoch": 0.5709251101321586, + "grad_norm": 1.3759135749970453, + "learning_rate": 1.712442295868224e-05, + "loss": 0.7097065448760986, + "step": 1620 + }, + { + "epoch": 0.5712775330396476, + "grad_norm": 1.3905917375530226, + "learning_rate": 1.7120333188801756e-05, + "loss": 0.66839599609375, + "step": 1621 + }, + { + "epoch": 0.5716299559471366, + "grad_norm": 1.7035593754714837, + "learning_rate": 1.7116241001789753e-05, + "loss": 0.8373857736587524, + "step": 1622 + }, + { + "epoch": 0.5719823788546256, + "grad_norm": 1.4514044348034505, + "learning_rate": 1.7112146399035393e-05, + "loss": 0.6405144333839417, + "step": 1623 + }, + { + "epoch": 0.5723348017621145, + "grad_norm": 1.3537498495813336, + "learning_rate": 1.710804938192867e-05, + "loss": 0.622218906879425, + "step": 1624 + }, + { + "epoch": 0.5726872246696035, + "grad_norm": 1.3235233015291856, + "learning_rate": 1.710394995186039e-05, + "loss": 0.6728596687316895, + "step": 1625 + }, + { + "epoch": 0.5730396475770925, + "grad_norm": 1.457353775792826, + "learning_rate": 1.7099848110222188e-05, + "loss": 0.7749369144439697, + "step": 1626 + }, + { + "epoch": 0.5733920704845815, + "grad_norm": 1.5414707611626788, + "learning_rate": 1.7095743858406506e-05, + "loss": 0.7230759859085083, + "step": 1627 + }, + { + "epoch": 0.5737444933920705, + "grad_norm": 1.540981219180448, + "learning_rate": 1.7091637197806614e-05, + "loss": 0.8243547677993774, + "step": 1628 + }, + { + "epoch": 0.5740969162995595, + "grad_norm": 1.38043003521811, + "learning_rate": 1.708752812981659e-05, + "loss": 0.5860315561294556, + "step": 1629 + }, + { + "epoch": 0.5744493392070484, + "grad_norm": 1.6273061636094053, + "learning_rate": 1.708341665583134e-05, + "loss": 0.6623368859291077, + "step": 1630 + }, + { + "epoch": 0.5748017621145375, + "grad_norm": 1.8119651381751527, + "learning_rate": 1.7079302777246577e-05, + "loss": 0.6467370986938477, + "step": 1631 + }, + { + "epoch": 0.5751541850220264, + "grad_norm": 1.5119118761679917, + "learning_rate": 1.707518649545884e-05, + "loss": 0.6443271636962891, + "step": 1632 + }, + { + "epoch": 0.5755066079295155, + "grad_norm": 1.3128080413830525, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.6995208263397217, + "step": 1633 + }, + { + "epoch": 0.5758590308370044, + "grad_norm": 1.4660315838841709, + "learning_rate": 1.706694672786465e-05, + "loss": 0.698627233505249, + "step": 1634 + }, + { + "epoch": 0.5762114537444933, + "grad_norm": 1.3788458614759633, + "learning_rate": 1.706282324485534e-05, + "loss": 0.713565468788147, + "step": 1635 + }, + { + "epoch": 0.5765638766519824, + "grad_norm": 1.4050651409728825, + "learning_rate": 1.7058697364237342e-05, + "loss": 0.7978894710540771, + "step": 1636 + }, + { + "epoch": 0.5769162995594713, + "grad_norm": 1.374012134646938, + "learning_rate": 1.7054569087411262e-05, + "loss": 0.7361177206039429, + "step": 1637 + }, + { + "epoch": 0.5772687224669604, + "grad_norm": 1.3640656150089683, + "learning_rate": 1.705043841577853e-05, + "loss": 0.5904364585876465, + "step": 1638 + }, + { + "epoch": 0.5776211453744493, + "grad_norm": 1.4706525609098695, + "learning_rate": 1.7046305350741365e-05, + "loss": 0.7122133374214172, + "step": 1639 + }, + { + "epoch": 0.5779735682819384, + "grad_norm": 1.5208627357939872, + "learning_rate": 1.7042169893702826e-05, + "loss": 0.6350806951522827, + "step": 1640 + }, + { + "epoch": 0.5783259911894273, + "grad_norm": 1.4511692718944456, + "learning_rate": 1.7038032046066767e-05, + "loss": 0.6332669258117676, + "step": 1641 + }, + { + "epoch": 0.5786784140969163, + "grad_norm": 1.415207402865657, + "learning_rate": 1.7033891809237865e-05, + "loss": 0.6645903587341309, + "step": 1642 + }, + { + "epoch": 0.5790308370044053, + "grad_norm": 1.6697269215763402, + "learning_rate": 1.7029749184621593e-05, + "loss": 0.8156411051750183, + "step": 1643 + }, + { + "epoch": 0.5793832599118943, + "grad_norm": 1.3789808786486863, + "learning_rate": 1.7025604173624247e-05, + "loss": 0.6778720617294312, + "step": 1644 + }, + { + "epoch": 0.5797356828193833, + "grad_norm": 1.5882994058774447, + "learning_rate": 1.702145677765293e-05, + "loss": 0.6774875521659851, + "step": 1645 + }, + { + "epoch": 0.5800881057268722, + "grad_norm": 1.7790432286964633, + "learning_rate": 1.701730699811555e-05, + "loss": 0.9239652156829834, + "step": 1646 + }, + { + "epoch": 0.5804405286343612, + "grad_norm": 1.3647594896468807, + "learning_rate": 1.701315483642083e-05, + "loss": 0.6841437816619873, + "step": 1647 + }, + { + "epoch": 0.5807929515418502, + "grad_norm": 1.7199469103031315, + "learning_rate": 1.7009000293978308e-05, + "loss": 0.7540775537490845, + "step": 1648 + }, + { + "epoch": 0.5811453744493392, + "grad_norm": 1.0742597088843755, + "learning_rate": 1.7004843372198306e-05, + "loss": 0.5534735321998596, + "step": 1649 + }, + { + "epoch": 0.5814977973568282, + "grad_norm": 1.326312979627632, + "learning_rate": 1.7000684072491984e-05, + "loss": 0.5398745536804199, + "step": 1650 + }, + { + "epoch": 0.5818502202643172, + "grad_norm": 1.583833147288038, + "learning_rate": 1.6996522396271285e-05, + "loss": 0.7249305248260498, + "step": 1651 + }, + { + "epoch": 0.5822026431718061, + "grad_norm": 2.3893378173132973, + "learning_rate": 1.6992358344948976e-05, + "loss": 0.819263219833374, + "step": 1652 + }, + { + "epoch": 0.5825550660792952, + "grad_norm": 1.4489156713328724, + "learning_rate": 1.6988191919938618e-05, + "loss": 0.7421448826789856, + "step": 1653 + }, + { + "epoch": 0.5829074889867841, + "grad_norm": 1.832209725536692, + "learning_rate": 1.6984023122654584e-05, + "loss": 0.7665672302246094, + "step": 1654 + }, + { + "epoch": 0.5832599118942732, + "grad_norm": 1.390589552129084, + "learning_rate": 1.697985195451205e-05, + "loss": 0.7226558327674866, + "step": 1655 + }, + { + "epoch": 0.5836123348017621, + "grad_norm": 1.5091001050977364, + "learning_rate": 1.6975678416926995e-05, + "loss": 0.6702080965042114, + "step": 1656 + }, + { + "epoch": 0.583964757709251, + "grad_norm": 1.460442381139403, + "learning_rate": 1.697150251131621e-05, + "loss": 0.5843878984451294, + "step": 1657 + }, + { + "epoch": 0.5843171806167401, + "grad_norm": 1.37517469234843, + "learning_rate": 1.6967324239097287e-05, + "loss": 0.707448422908783, + "step": 1658 + }, + { + "epoch": 0.584669603524229, + "grad_norm": 1.8436282149841139, + "learning_rate": 1.6963143601688615e-05, + "loss": 0.7619093060493469, + "step": 1659 + }, + { + "epoch": 0.5850220264317181, + "grad_norm": 1.5399166464925174, + "learning_rate": 1.695896060050939e-05, + "loss": 0.6550310850143433, + "step": 1660 + }, + { + "epoch": 0.585374449339207, + "grad_norm": 1.6689625417691945, + "learning_rate": 1.6954775236979616e-05, + "loss": 0.7202504277229309, + "step": 1661 + }, + { + "epoch": 0.5857268722466961, + "grad_norm": 1.4936106294591966, + "learning_rate": 1.6950587512520085e-05, + "loss": 0.7941907644271851, + "step": 1662 + }, + { + "epoch": 0.586079295154185, + "grad_norm": 1.3939181305394832, + "learning_rate": 1.6946397428552406e-05, + "loss": 0.6349755525588989, + "step": 1663 + }, + { + "epoch": 0.586431718061674, + "grad_norm": 1.4663377684980818, + "learning_rate": 1.6942204986498978e-05, + "loss": 0.6220123171806335, + "step": 1664 + }, + { + "epoch": 0.586784140969163, + "grad_norm": 1.3729457618271874, + "learning_rate": 1.693801018778301e-05, + "loss": 0.6617282629013062, + "step": 1665 + }, + { + "epoch": 0.587136563876652, + "grad_norm": 1.6745607368825612, + "learning_rate": 1.6933813033828496e-05, + "loss": 0.7424415349960327, + "step": 1666 + }, + { + "epoch": 0.587488986784141, + "grad_norm": 1.4332695932293307, + "learning_rate": 1.6929613526060254e-05, + "loss": 0.7245291471481323, + "step": 1667 + }, + { + "epoch": 0.5878414096916299, + "grad_norm": 1.7631957554533126, + "learning_rate": 1.692541166590387e-05, + "loss": 0.7037352323532104, + "step": 1668 + }, + { + "epoch": 0.588193832599119, + "grad_norm": 1.563153866597813, + "learning_rate": 1.6921207454785754e-05, + "loss": 0.7452583312988281, + "step": 1669 + }, + { + "epoch": 0.5885462555066079, + "grad_norm": 1.8223456889525438, + "learning_rate": 1.6917000894133106e-05, + "loss": 0.7773720026016235, + "step": 1670 + }, + { + "epoch": 0.5888986784140969, + "grad_norm": 1.6663522681826546, + "learning_rate": 1.6912791985373916e-05, + "loss": 0.5820617079734802, + "step": 1671 + }, + { + "epoch": 0.5892511013215859, + "grad_norm": 1.4638050818442514, + "learning_rate": 1.6908580729936983e-05, + "loss": 0.7513154745101929, + "step": 1672 + }, + { + "epoch": 0.5896035242290749, + "grad_norm": 1.4916906386520274, + "learning_rate": 1.6904367129251898e-05, + "loss": 0.6741763949394226, + "step": 1673 + }, + { + "epoch": 0.5899559471365639, + "grad_norm": 1.4430048165358413, + "learning_rate": 1.690015118474904e-05, + "loss": 0.7290149331092834, + "step": 1674 + }, + { + "epoch": 0.5903083700440529, + "grad_norm": 1.431209358109114, + "learning_rate": 1.6895932897859596e-05, + "loss": 0.651113748550415, + "step": 1675 + }, + { + "epoch": 0.5906607929515418, + "grad_norm": 1.5475090754915908, + "learning_rate": 1.6891712270015546e-05, + "loss": 0.8062121272087097, + "step": 1676 + }, + { + "epoch": 0.5910132158590309, + "grad_norm": 1.6532405105419041, + "learning_rate": 1.6887489302649657e-05, + "loss": 0.7168683409690857, + "step": 1677 + }, + { + "epoch": 0.5913656387665198, + "grad_norm": 1.5137609810465338, + "learning_rate": 1.6883263997195497e-05, + "loss": 0.6751970052719116, + "step": 1678 + }, + { + "epoch": 0.5917180616740088, + "grad_norm": 1.3244566227591112, + "learning_rate": 1.687903635508742e-05, + "loss": 0.5176222324371338, + "step": 1679 + }, + { + "epoch": 0.5920704845814978, + "grad_norm": 1.532290203616517, + "learning_rate": 1.6874806377760587e-05, + "loss": 0.605686366558075, + "step": 1680 + }, + { + "epoch": 0.5924229074889867, + "grad_norm": 1.55000273332987, + "learning_rate": 1.6870574066650945e-05, + "loss": 0.6927961111068726, + "step": 1681 + }, + { + "epoch": 0.5927753303964758, + "grad_norm": 1.7129667821490024, + "learning_rate": 1.6866339423195223e-05, + "loss": 0.7434122562408447, + "step": 1682 + }, + { + "epoch": 0.5931277533039647, + "grad_norm": 1.6508583062240207, + "learning_rate": 1.6862102448830956e-05, + "loss": 0.5646539926528931, + "step": 1683 + }, + { + "epoch": 0.5934801762114538, + "grad_norm": 1.6845514517525704, + "learning_rate": 1.6857863144996464e-05, + "loss": 0.6666921377182007, + "step": 1684 + }, + { + "epoch": 0.5938325991189427, + "grad_norm": 1.7487162446625693, + "learning_rate": 1.6853621513130857e-05, + "loss": 0.6630325317382812, + "step": 1685 + }, + { + "epoch": 0.5941850220264318, + "grad_norm": 1.6744610818707069, + "learning_rate": 1.6849377554674042e-05, + "loss": 0.6519981622695923, + "step": 1686 + }, + { + "epoch": 0.5945374449339207, + "grad_norm": 1.523793082989738, + "learning_rate": 1.6845131271066705e-05, + "loss": 0.7958102822303772, + "step": 1687 + }, + { + "epoch": 0.5948898678414097, + "grad_norm": 1.6258620864429363, + "learning_rate": 1.6840882663750333e-05, + "loss": 0.6136632561683655, + "step": 1688 + }, + { + "epoch": 0.5952422907488987, + "grad_norm": 1.5696515726783535, + "learning_rate": 1.683663173416719e-05, + "loss": 0.6177657842636108, + "step": 1689 + }, + { + "epoch": 0.5955947136563877, + "grad_norm": 1.3990187120155009, + "learning_rate": 1.683237848376034e-05, + "loss": 0.7489751577377319, + "step": 1690 + }, + { + "epoch": 0.5959471365638767, + "grad_norm": 1.7037734397554838, + "learning_rate": 1.6828122913973625e-05, + "loss": 0.6749632954597473, + "step": 1691 + }, + { + "epoch": 0.5962995594713656, + "grad_norm": 1.7564038851615957, + "learning_rate": 1.682386502625168e-05, + "loss": 0.6340545415878296, + "step": 1692 + }, + { + "epoch": 0.5966519823788546, + "grad_norm": 1.2684465272191359, + "learning_rate": 1.6819604822039924e-05, + "loss": 0.6141117811203003, + "step": 1693 + }, + { + "epoch": 0.5970044052863436, + "grad_norm": 1.6665435860950566, + "learning_rate": 1.681534230278457e-05, + "loss": 0.7937319874763489, + "step": 1694 + }, + { + "epoch": 0.5973568281938326, + "grad_norm": 1.376760638279742, + "learning_rate": 1.68110774699326e-05, + "loss": 0.6196104288101196, + "step": 1695 + }, + { + "epoch": 0.5977092511013216, + "grad_norm": 1.755256295612453, + "learning_rate": 1.68068103249318e-05, + "loss": 0.6856463551521301, + "step": 1696 + }, + { + "epoch": 0.5980616740088106, + "grad_norm": 1.423055172614558, + "learning_rate": 1.680254086923073e-05, + "loss": 0.754359245300293, + "step": 1697 + }, + { + "epoch": 0.5984140969162995, + "grad_norm": 1.5540819723583295, + "learning_rate": 1.6798269104278738e-05, + "loss": 0.6663862466812134, + "step": 1698 + }, + { + "epoch": 0.5987665198237886, + "grad_norm": 1.8192134096199304, + "learning_rate": 1.6793995031525955e-05, + "loss": 0.7072615027427673, + "step": 1699 + }, + { + "epoch": 0.5991189427312775, + "grad_norm": 1.3664015344189913, + "learning_rate": 1.678971865242329e-05, + "loss": 0.5722007751464844, + "step": 1700 + }, + { + "epoch": 0.5994713656387666, + "grad_norm": 1.5146739460913152, + "learning_rate": 1.6785439968422456e-05, + "loss": 0.8254455327987671, + "step": 1701 + }, + { + "epoch": 0.5998237885462555, + "grad_norm": 1.6221807995806083, + "learning_rate": 1.678115898097592e-05, + "loss": 0.5726041793823242, + "step": 1702 + }, + { + "epoch": 0.6001762114537444, + "grad_norm": 1.7173506198717712, + "learning_rate": 1.6776875691536946e-05, + "loss": 0.6480926275253296, + "step": 1703 + }, + { + "epoch": 0.6005286343612335, + "grad_norm": 1.801703791100917, + "learning_rate": 1.677259010155958e-05, + "loss": 0.6469742059707642, + "step": 1704 + }, + { + "epoch": 0.6008810572687224, + "grad_norm": 1.6534691770392222, + "learning_rate": 1.6768302212498647e-05, + "loss": 0.814565896987915, + "step": 1705 + }, + { + "epoch": 0.6012334801762115, + "grad_norm": 1.6212192399903926, + "learning_rate": 1.6764012025809745e-05, + "loss": 0.7063060402870178, + "step": 1706 + }, + { + "epoch": 0.6015859030837004, + "grad_norm": 1.2993416112883407, + "learning_rate": 1.6759719542949268e-05, + "loss": 0.6523685455322266, + "step": 1707 + }, + { + "epoch": 0.6019383259911895, + "grad_norm": 1.7291371377992661, + "learning_rate": 1.6755424765374378e-05, + "loss": 0.7361165285110474, + "step": 1708 + }, + { + "epoch": 0.6022907488986784, + "grad_norm": 1.1307969866596985, + "learning_rate": 1.6751127694543012e-05, + "loss": 0.45241934061050415, + "step": 1709 + }, + { + "epoch": 0.6026431718061674, + "grad_norm": 1.3734078208692269, + "learning_rate": 1.6746828331913903e-05, + "loss": 0.6610431671142578, + "step": 1710 + }, + { + "epoch": 0.6029955947136564, + "grad_norm": 1.6659887779271019, + "learning_rate": 1.674252667894654e-05, + "loss": 0.7572601437568665, + "step": 1711 + }, + { + "epoch": 0.6033480176211454, + "grad_norm": 1.3828996049540105, + "learning_rate": 1.6738222737101205e-05, + "loss": 0.7021572589874268, + "step": 1712 + }, + { + "epoch": 0.6037004405286344, + "grad_norm": 1.5581462402658262, + "learning_rate": 1.6733916507838952e-05, + "loss": 0.7742347121238708, + "step": 1713 + }, + { + "epoch": 0.6040528634361233, + "grad_norm": 1.5666267075277038, + "learning_rate": 1.6729607992621613e-05, + "loss": 0.6453407406806946, + "step": 1714 + }, + { + "epoch": 0.6044052863436123, + "grad_norm": 1.279025328652212, + "learning_rate": 1.6725297192911793e-05, + "loss": 0.7004555463790894, + "step": 1715 + }, + { + "epoch": 0.6047577092511013, + "grad_norm": 1.3482721305547676, + "learning_rate": 1.6720984110172875e-05, + "loss": 0.6979051232337952, + "step": 1716 + }, + { + "epoch": 0.6051101321585903, + "grad_norm": 1.5059245296578512, + "learning_rate": 1.671666874586902e-05, + "loss": 0.6387851238250732, + "step": 1717 + }, + { + "epoch": 0.6054625550660793, + "grad_norm": 1.5397561778856637, + "learning_rate": 1.671235110146515e-05, + "loss": 0.9083811044692993, + "step": 1718 + }, + { + "epoch": 0.6058149779735683, + "grad_norm": 1.637790853716126, + "learning_rate": 1.6708031178426984e-05, + "loss": 0.747002363204956, + "step": 1719 + }, + { + "epoch": 0.6061674008810573, + "grad_norm": 1.6617583077406621, + "learning_rate": 1.6703708978220986e-05, + "loss": 0.7553372383117676, + "step": 1720 + }, + { + "epoch": 0.6065198237885463, + "grad_norm": 1.72002611544435, + "learning_rate": 1.669938450231442e-05, + "loss": 0.762795090675354, + "step": 1721 + }, + { + "epoch": 0.6068722466960352, + "grad_norm": 1.3894206198813077, + "learning_rate": 1.669505775217531e-05, + "loss": 0.739936113357544, + "step": 1722 + }, + { + "epoch": 0.6072246696035243, + "grad_norm": 1.625344781935558, + "learning_rate": 1.6690728729272456e-05, + "loss": 0.8439112305641174, + "step": 1723 + }, + { + "epoch": 0.6075770925110132, + "grad_norm": 1.5345011506472854, + "learning_rate": 1.6686397435075416e-05, + "loss": 0.6144756078720093, + "step": 1724 + }, + { + "epoch": 0.6079295154185022, + "grad_norm": 1.3674442510472364, + "learning_rate": 1.6682063871054534e-05, + "loss": 0.569161057472229, + "step": 1725 + }, + { + "epoch": 0.6082819383259912, + "grad_norm": 1.6372827589624075, + "learning_rate": 1.6677728038680926e-05, + "loss": 0.7523979544639587, + "step": 1726 + }, + { + "epoch": 0.6086343612334801, + "grad_norm": 1.453986649514636, + "learning_rate": 1.6673389939426463e-05, + "loss": 0.6394520401954651, + "step": 1727 + }, + { + "epoch": 0.6089867841409692, + "grad_norm": 1.358198647287584, + "learning_rate": 1.66690495747638e-05, + "loss": 0.5975633859634399, + "step": 1728 + }, + { + "epoch": 0.6093392070484581, + "grad_norm": 1.6192297143942058, + "learning_rate": 1.666470694616636e-05, + "loss": 0.736790657043457, + "step": 1729 + }, + { + "epoch": 0.6096916299559472, + "grad_norm": 1.4234241508654442, + "learning_rate": 1.6660362055108316e-05, + "loss": 0.7693831920623779, + "step": 1730 + }, + { + "epoch": 0.6100440528634361, + "grad_norm": 1.8032471376275176, + "learning_rate": 1.665601490306464e-05, + "loss": 0.7322608232498169, + "step": 1731 + }, + { + "epoch": 0.6103964757709252, + "grad_norm": 1.3709677099617412, + "learning_rate": 1.6651665491511043e-05, + "loss": 0.6478679180145264, + "step": 1732 + }, + { + "epoch": 0.6107488986784141, + "grad_norm": 1.8838571148858527, + "learning_rate": 1.6647313821924022e-05, + "loss": 0.7125877141952515, + "step": 1733 + }, + { + "epoch": 0.6111013215859031, + "grad_norm": 1.5594770538222507, + "learning_rate": 1.664295989578083e-05, + "loss": 0.8999321460723877, + "step": 1734 + }, + { + "epoch": 0.6114537444933921, + "grad_norm": 1.618421596120734, + "learning_rate": 1.663860371455949e-05, + "loss": 0.6908334493637085, + "step": 1735 + }, + { + "epoch": 0.611806167400881, + "grad_norm": 1.5552403174407248, + "learning_rate": 1.663424527973879e-05, + "loss": 0.6708767414093018, + "step": 1736 + }, + { + "epoch": 0.61215859030837, + "grad_norm": 1.4907630752773764, + "learning_rate": 1.6629884592798283e-05, + "loss": 0.6991565823554993, + "step": 1737 + }, + { + "epoch": 0.612511013215859, + "grad_norm": 1.430459100414143, + "learning_rate": 1.6625521655218287e-05, + "loss": 0.6224193572998047, + "step": 1738 + }, + { + "epoch": 0.612863436123348, + "grad_norm": 1.6355889531807317, + "learning_rate": 1.662115646847988e-05, + "loss": 0.701459527015686, + "step": 1739 + }, + { + "epoch": 0.613215859030837, + "grad_norm": 1.508424771304017, + "learning_rate": 1.6616789034064914e-05, + "loss": 0.784063458442688, + "step": 1740 + }, + { + "epoch": 0.613568281938326, + "grad_norm": 1.4868333492675876, + "learning_rate": 1.661241935345599e-05, + "loss": 0.7604146003723145, + "step": 1741 + }, + { + "epoch": 0.613920704845815, + "grad_norm": 1.7090188741959023, + "learning_rate": 1.6608047428136482e-05, + "loss": 0.6347941160202026, + "step": 1742 + }, + { + "epoch": 0.614273127753304, + "grad_norm": 1.6487656059998825, + "learning_rate": 1.6603673259590524e-05, + "loss": 0.7559434175491333, + "step": 1743 + }, + { + "epoch": 0.6146255506607929, + "grad_norm": 1.5969979245345363, + "learning_rate": 1.6599296849303007e-05, + "loss": 0.742524266242981, + "step": 1744 + }, + { + "epoch": 0.614977973568282, + "grad_norm": 1.2238633556789393, + "learning_rate": 1.6594918198759586e-05, + "loss": 0.697594165802002, + "step": 1745 + }, + { + "epoch": 0.6153303964757709, + "grad_norm": 1.4536023257551807, + "learning_rate": 1.659053730944668e-05, + "loss": 0.7876765131950378, + "step": 1746 + }, + { + "epoch": 0.6156828193832599, + "grad_norm": 1.489887595585156, + "learning_rate": 1.658615418285146e-05, + "loss": 0.7514386177062988, + "step": 1747 + }, + { + "epoch": 0.6160352422907489, + "grad_norm": 1.6935500501856253, + "learning_rate": 1.658176882046187e-05, + "loss": 0.6220899820327759, + "step": 1748 + }, + { + "epoch": 0.6163876651982378, + "grad_norm": 1.9395284146525182, + "learning_rate": 1.6577381223766592e-05, + "loss": 0.7376539707183838, + "step": 1749 + }, + { + "epoch": 0.6167400881057269, + "grad_norm": 1.6373866531670291, + "learning_rate": 1.6572991394255084e-05, + "loss": 0.8296281099319458, + "step": 1750 + }, + { + "epoch": 0.6170925110132158, + "grad_norm": 1.545978766740828, + "learning_rate": 1.656859933341756e-05, + "loss": 0.7316757440567017, + "step": 1751 + }, + { + "epoch": 0.6174449339207049, + "grad_norm": 1.5280854263636194, + "learning_rate": 1.6564205042744986e-05, + "loss": 0.6933871507644653, + "step": 1752 + }, + { + "epoch": 0.6177973568281938, + "grad_norm": 1.890269396017501, + "learning_rate": 1.655980852372908e-05, + "loss": 0.6835601329803467, + "step": 1753 + }, + { + "epoch": 0.6181497797356829, + "grad_norm": 1.3967466693425752, + "learning_rate": 1.655540977786233e-05, + "loss": 0.6752027869224548, + "step": 1754 + }, + { + "epoch": 0.6185022026431718, + "grad_norm": 1.4944496246124994, + "learning_rate": 1.6551008806637976e-05, + "loss": 0.6092851758003235, + "step": 1755 + }, + { + "epoch": 0.6188546255506608, + "grad_norm": 1.3266652259646856, + "learning_rate": 1.6546605611550008e-05, + "loss": 0.682563066482544, + "step": 1756 + }, + { + "epoch": 0.6192070484581498, + "grad_norm": 1.5302981352911342, + "learning_rate": 1.654220019409317e-05, + "loss": 0.8674311637878418, + "step": 1757 + }, + { + "epoch": 0.6195594713656387, + "grad_norm": 1.4437314589210788, + "learning_rate": 1.6537792555762966e-05, + "loss": 0.7209165096282959, + "step": 1758 + }, + { + "epoch": 0.6199118942731278, + "grad_norm": 1.5958855115050472, + "learning_rate": 1.6533382698055655e-05, + "loss": 0.7795991897583008, + "step": 1759 + }, + { + "epoch": 0.6202643171806167, + "grad_norm": 1.6392261912532398, + "learning_rate": 1.6528970622468245e-05, + "loss": 0.6749448776245117, + "step": 1760 + }, + { + "epoch": 0.6206167400881057, + "grad_norm": 1.5291165267411688, + "learning_rate": 1.6524556330498494e-05, + "loss": 0.9127920866012573, + "step": 1761 + }, + { + "epoch": 0.6209691629955947, + "grad_norm": 1.5402491362904795, + "learning_rate": 1.6520139823644922e-05, + "loss": 0.6224071979522705, + "step": 1762 + }, + { + "epoch": 0.6213215859030837, + "grad_norm": 1.426673111398807, + "learning_rate": 1.6515721103406798e-05, + "loss": 0.6955251693725586, + "step": 1763 + }, + { + "epoch": 0.6216740088105727, + "grad_norm": 1.7187740007003602, + "learning_rate": 1.6511300171284132e-05, + "loss": 0.676613986492157, + "step": 1764 + }, + { + "epoch": 0.6220264317180617, + "grad_norm": 1.4024924612217573, + "learning_rate": 1.65068770287777e-05, + "loss": 0.7482033967971802, + "step": 1765 + }, + { + "epoch": 0.6223788546255506, + "grad_norm": 1.4659804586317469, + "learning_rate": 1.6502451677389015e-05, + "loss": 0.6019684076309204, + "step": 1766 + }, + { + "epoch": 0.6227312775330397, + "grad_norm": 1.419796458872072, + "learning_rate": 1.649802411862035e-05, + "loss": 0.6796068549156189, + "step": 1767 + }, + { + "epoch": 0.6230837004405286, + "grad_norm": 2.234008541241949, + "learning_rate": 1.6493594353974724e-05, + "loss": 0.6351302862167358, + "step": 1768 + }, + { + "epoch": 0.6234361233480176, + "grad_norm": 1.4257561009443, + "learning_rate": 1.6489162384955906e-05, + "loss": 0.6093732714653015, + "step": 1769 + }, + { + "epoch": 0.6237885462555066, + "grad_norm": 1.842168854503522, + "learning_rate": 1.6484728213068405e-05, + "loss": 0.8181271553039551, + "step": 1770 + }, + { + "epoch": 0.6241409691629956, + "grad_norm": 1.821206401126196, + "learning_rate": 1.6480291839817488e-05, + "loss": 0.7093993425369263, + "step": 1771 + }, + { + "epoch": 0.6244933920704846, + "grad_norm": 1.416340976430299, + "learning_rate": 1.6475853266709165e-05, + "loss": 0.6895081996917725, + "step": 1772 + }, + { + "epoch": 0.6248458149779735, + "grad_norm": 1.5970315552720198, + "learning_rate": 1.6471412495250195e-05, + "loss": 0.6706013679504395, + "step": 1773 + }, + { + "epoch": 0.6251982378854626, + "grad_norm": 1.5170788749866242, + "learning_rate": 1.6466969526948082e-05, + "loss": 0.6700015664100647, + "step": 1774 + }, + { + "epoch": 0.6255506607929515, + "grad_norm": 1.5173815641058028, + "learning_rate": 1.6462524363311072e-05, + "loss": 0.6591087579727173, + "step": 1775 + }, + { + "epoch": 0.6259030837004406, + "grad_norm": 1.6219345446237772, + "learning_rate": 1.6458077005848164e-05, + "loss": 0.7775006294250488, + "step": 1776 + }, + { + "epoch": 0.6262555066079295, + "grad_norm": 1.6260525304572828, + "learning_rate": 1.6453627456069093e-05, + "loss": 0.8459682464599609, + "step": 1777 + }, + { + "epoch": 0.6266079295154185, + "grad_norm": 1.4031571304990242, + "learning_rate": 1.6449175715484346e-05, + "loss": 0.6536898612976074, + "step": 1778 + }, + { + "epoch": 0.6269603524229075, + "grad_norm": 1.5129603585000657, + "learning_rate": 1.6444721785605148e-05, + "loss": 0.7543610334396362, + "step": 1779 + }, + { + "epoch": 0.6273127753303964, + "grad_norm": 1.6228520645077271, + "learning_rate": 1.6440265667943474e-05, + "loss": 0.7416362762451172, + "step": 1780 + }, + { + "epoch": 0.6276651982378855, + "grad_norm": 1.4583654660578542, + "learning_rate": 1.6435807364012035e-05, + "loss": 0.5505499839782715, + "step": 1781 + }, + { + "epoch": 0.6280176211453744, + "grad_norm": 1.5252426453600672, + "learning_rate": 1.6431346875324284e-05, + "loss": 0.792723536491394, + "step": 1782 + }, + { + "epoch": 0.6283700440528635, + "grad_norm": 1.3655475423968058, + "learning_rate": 1.6426884203394416e-05, + "loss": 0.6313158273696899, + "step": 1783 + }, + { + "epoch": 0.6287224669603524, + "grad_norm": 1.6057168635576118, + "learning_rate": 1.642241934973738e-05, + "loss": 0.6168874502182007, + "step": 1784 + }, + { + "epoch": 0.6290748898678414, + "grad_norm": 1.78997265433784, + "learning_rate": 1.6417952315868845e-05, + "loss": 0.6995766162872314, + "step": 1785 + }, + { + "epoch": 0.6294273127753304, + "grad_norm": 1.4835625331683349, + "learning_rate": 1.641348310330523e-05, + "loss": 0.8046826124191284, + "step": 1786 + }, + { + "epoch": 0.6297797356828194, + "grad_norm": 1.4892920408023869, + "learning_rate": 1.6409011713563697e-05, + "loss": 0.7227291464805603, + "step": 1787 + }, + { + "epoch": 0.6301321585903084, + "grad_norm": 1.4682105257113767, + "learning_rate": 1.6404538148162145e-05, + "loss": 0.6463631391525269, + "step": 1788 + }, + { + "epoch": 0.6304845814977974, + "grad_norm": 2.4977643907634, + "learning_rate": 1.640006240861921e-05, + "loss": 0.7473348379135132, + "step": 1789 + }, + { + "epoch": 0.6308370044052863, + "grad_norm": 1.4291329366827183, + "learning_rate": 1.6395584496454263e-05, + "loss": 0.7311505079269409, + "step": 1790 + }, + { + "epoch": 0.6311894273127753, + "grad_norm": 1.5618530036111458, + "learning_rate": 1.639110441318742e-05, + "loss": 0.7259535789489746, + "step": 1791 + }, + { + "epoch": 0.6315418502202643, + "grad_norm": 1.515515721890048, + "learning_rate": 1.6386622160339522e-05, + "loss": 0.5777252912521362, + "step": 1792 + }, + { + "epoch": 0.6318942731277533, + "grad_norm": 1.3190322559386176, + "learning_rate": 1.638213773943216e-05, + "loss": 0.5510598421096802, + "step": 1793 + }, + { + "epoch": 0.6322466960352423, + "grad_norm": 1.5085803548323364, + "learning_rate": 1.637765115198766e-05, + "loss": 0.6448229551315308, + "step": 1794 + }, + { + "epoch": 0.6325991189427312, + "grad_norm": 1.5827276696724286, + "learning_rate": 1.6373162399529067e-05, + "loss": 0.7359289526939392, + "step": 1795 + }, + { + "epoch": 0.6329515418502203, + "grad_norm": 1.5346140091491929, + "learning_rate": 1.6368671483580185e-05, + "loss": 0.616656482219696, + "step": 1796 + }, + { + "epoch": 0.6333039647577092, + "grad_norm": 1.4291822350961465, + "learning_rate": 1.6364178405665534e-05, + "loss": 0.5966289043426514, + "step": 1797 + }, + { + "epoch": 0.6336563876651983, + "grad_norm": 1.8727626569458464, + "learning_rate": 1.6359683167310375e-05, + "loss": 0.7475985288619995, + "step": 1798 + }, + { + "epoch": 0.6340088105726872, + "grad_norm": 1.4494645750595028, + "learning_rate": 1.63551857700407e-05, + "loss": 0.6030765771865845, + "step": 1799 + }, + { + "epoch": 0.6343612334801763, + "grad_norm": 1.6637248682130477, + "learning_rate": 1.6350686215383237e-05, + "loss": 0.6193016171455383, + "step": 1800 + }, + { + "epoch": 0.6347136563876652, + "grad_norm": 1.3604775956740969, + "learning_rate": 1.6346184504865442e-05, + "loss": 0.6404513120651245, + "step": 1801 + }, + { + "epoch": 0.6350660792951542, + "grad_norm": 1.5539318450371893, + "learning_rate": 1.6341680640015515e-05, + "loss": 0.8453506231307983, + "step": 1802 + }, + { + "epoch": 0.6354185022026432, + "grad_norm": 1.3642622033336096, + "learning_rate": 1.6337174622362366e-05, + "loss": 0.6094445586204529, + "step": 1803 + }, + { + "epoch": 0.6357709251101321, + "grad_norm": 1.5112522647253264, + "learning_rate": 1.6332666453435653e-05, + "loss": 0.7352159023284912, + "step": 1804 + }, + { + "epoch": 0.6361233480176212, + "grad_norm": 1.4529963307650198, + "learning_rate": 1.632815613476576e-05, + "loss": 0.7395339608192444, + "step": 1805 + }, + { + "epoch": 0.6364757709251101, + "grad_norm": 1.4350925789909401, + "learning_rate": 1.63236436678838e-05, + "loss": 0.7246927618980408, + "step": 1806 + }, + { + "epoch": 0.6368281938325991, + "grad_norm": 1.3653208723694477, + "learning_rate": 1.6319129054321616e-05, + "loss": 0.6913329362869263, + "step": 1807 + }, + { + "epoch": 0.6371806167400881, + "grad_norm": 1.9893098285493216, + "learning_rate": 1.6314612295611772e-05, + "loss": 0.6410515308380127, + "step": 1808 + }, + { + "epoch": 0.6375330396475771, + "grad_norm": 2.6583918764324665, + "learning_rate": 1.6310093393287574e-05, + "loss": 0.690910816192627, + "step": 1809 + }, + { + "epoch": 0.6378854625550661, + "grad_norm": 1.4623649413484192, + "learning_rate": 1.6305572348883044e-05, + "loss": 0.6520562171936035, + "step": 1810 + }, + { + "epoch": 0.6382378854625551, + "grad_norm": 1.6850706181935027, + "learning_rate": 1.630104916393294e-05, + "loss": 0.6966608166694641, + "step": 1811 + }, + { + "epoch": 0.638590308370044, + "grad_norm": 1.7161033790648312, + "learning_rate": 1.6296523839972743e-05, + "loss": 0.826806902885437, + "step": 1812 + }, + { + "epoch": 0.6389427312775331, + "grad_norm": 1.431569634617566, + "learning_rate": 1.6291996378538653e-05, + "loss": 0.6695773601531982, + "step": 1813 + }, + { + "epoch": 0.639295154185022, + "grad_norm": 1.4264708644101765, + "learning_rate": 1.6287466781167607e-05, + "loss": 0.5725491046905518, + "step": 1814 + }, + { + "epoch": 0.639647577092511, + "grad_norm": 1.2779233324378096, + "learning_rate": 1.628293504939727e-05, + "loss": 0.5543544292449951, + "step": 1815 + }, + { + "epoch": 0.64, + "grad_norm": 3.2997728941963564, + "learning_rate": 1.6278401184766007e-05, + "loss": 0.6964641809463501, + "step": 1816 + }, + { + "epoch": 0.640352422907489, + "grad_norm": 1.3065245679172277, + "learning_rate": 1.6273865188812935e-05, + "loss": 0.675407886505127, + "step": 1817 + }, + { + "epoch": 0.640704845814978, + "grad_norm": 1.4883059032141013, + "learning_rate": 1.626932706307788e-05, + "loss": 0.6304433345794678, + "step": 1818 + }, + { + "epoch": 0.6410572687224669, + "grad_norm": 1.5529882690454875, + "learning_rate": 1.62647868091014e-05, + "loss": 0.7432112693786621, + "step": 1819 + }, + { + "epoch": 0.641409691629956, + "grad_norm": 1.5761551228008874, + "learning_rate": 1.6260244428424763e-05, + "loss": 0.730377197265625, + "step": 1820 + }, + { + "epoch": 0.6417621145374449, + "grad_norm": 1.7239403694554825, + "learning_rate": 1.6255699922589968e-05, + "loss": 0.694229006767273, + "step": 1821 + }, + { + "epoch": 0.642114537444934, + "grad_norm": 1.5664915948077012, + "learning_rate": 1.6251153293139735e-05, + "loss": 0.7284739017486572, + "step": 1822 + }, + { + "epoch": 0.6424669603524229, + "grad_norm": 1.4047714992661522, + "learning_rate": 1.6246604541617507e-05, + "loss": 0.6028950214385986, + "step": 1823 + }, + { + "epoch": 0.642819383259912, + "grad_norm": 1.65079248713073, + "learning_rate": 1.6242053669567432e-05, + "loss": 0.6776808500289917, + "step": 1824 + }, + { + "epoch": 0.6431718061674009, + "grad_norm": 1.7695857292474644, + "learning_rate": 1.6237500678534396e-05, + "loss": 0.7743366956710815, + "step": 1825 + }, + { + "epoch": 0.6435242290748898, + "grad_norm": 1.594351471613888, + "learning_rate": 1.6232945570064e-05, + "loss": 0.6356723308563232, + "step": 1826 + }, + { + "epoch": 0.6438766519823789, + "grad_norm": 1.4846113103688028, + "learning_rate": 1.622838834570256e-05, + "loss": 0.7356402277946472, + "step": 1827 + }, + { + "epoch": 0.6442290748898678, + "grad_norm": 1.455165750941624, + "learning_rate": 1.622382900699711e-05, + "loss": 0.7639342546463013, + "step": 1828 + }, + { + "epoch": 0.6445814977973569, + "grad_norm": 2.0823946019481987, + "learning_rate": 1.6219267555495407e-05, + "loss": 0.6969513297080994, + "step": 1829 + }, + { + "epoch": 0.6449339207048458, + "grad_norm": 1.418146430885783, + "learning_rate": 1.621470399274592e-05, + "loss": 0.7532765865325928, + "step": 1830 + }, + { + "epoch": 0.6452863436123348, + "grad_norm": 1.3893974330709622, + "learning_rate": 1.6210138320297835e-05, + "loss": 0.5801100730895996, + "step": 1831 + }, + { + "epoch": 0.6456387665198238, + "grad_norm": 1.5780391931120195, + "learning_rate": 1.6205570539701056e-05, + "loss": 0.8006102442741394, + "step": 1832 + }, + { + "epoch": 0.6459911894273128, + "grad_norm": 1.4094927188728377, + "learning_rate": 1.6201000652506203e-05, + "loss": 0.6507089138031006, + "step": 1833 + }, + { + "epoch": 0.6463436123348018, + "grad_norm": 1.9684758989320281, + "learning_rate": 1.619642866026461e-05, + "loss": 0.7407999634742737, + "step": 1834 + }, + { + "epoch": 0.6466960352422908, + "grad_norm": 1.4160609898798358, + "learning_rate": 1.619185456452833e-05, + "loss": 0.6964670419692993, + "step": 1835 + }, + { + "epoch": 0.6470484581497797, + "grad_norm": 1.6614634508995256, + "learning_rate": 1.6187278366850122e-05, + "loss": 0.7095489501953125, + "step": 1836 + }, + { + "epoch": 0.6474008810572687, + "grad_norm": 2.0391949894277017, + "learning_rate": 1.6182700068783463e-05, + "loss": 0.6968166828155518, + "step": 1837 + }, + { + "epoch": 0.6477533039647577, + "grad_norm": 1.3206477384834772, + "learning_rate": 1.617811967188254e-05, + "loss": 0.7745821475982666, + "step": 1838 + }, + { + "epoch": 0.6481057268722467, + "grad_norm": 1.4803456865319338, + "learning_rate": 1.6173537177702266e-05, + "loss": 0.7071934938430786, + "step": 1839 + }, + { + "epoch": 0.6484581497797357, + "grad_norm": 1.7225763324537737, + "learning_rate": 1.6168952587798242e-05, + "loss": 0.6481701135635376, + "step": 1840 + }, + { + "epoch": 0.6488105726872246, + "grad_norm": 1.4447543914645467, + "learning_rate": 1.6164365903726805e-05, + "loss": 0.6349890232086182, + "step": 1841 + }, + { + "epoch": 0.6491629955947137, + "grad_norm": 1.3913908457554178, + "learning_rate": 1.6159777127044982e-05, + "loss": 0.6067368388175964, + "step": 1842 + }, + { + "epoch": 0.6495154185022026, + "grad_norm": 1.3943413375617566, + "learning_rate": 1.6155186259310523e-05, + "loss": 0.7170778512954712, + "step": 1843 + }, + { + "epoch": 0.6498678414096917, + "grad_norm": 1.4309397568408155, + "learning_rate": 1.6150593302081888e-05, + "loss": 0.5623376965522766, + "step": 1844 + }, + { + "epoch": 0.6502202643171806, + "grad_norm": 1.442096873601557, + "learning_rate": 1.6145998256918238e-05, + "loss": 0.7295233607292175, + "step": 1845 + }, + { + "epoch": 0.6505726872246697, + "grad_norm": 1.513681766461532, + "learning_rate": 1.6141401125379454e-05, + "loss": 0.6991151571273804, + "step": 1846 + }, + { + "epoch": 0.6509251101321586, + "grad_norm": 1.568060173563952, + "learning_rate": 1.6136801909026113e-05, + "loss": 0.7553545236587524, + "step": 1847 + }, + { + "epoch": 0.6512775330396475, + "grad_norm": 1.560177534517688, + "learning_rate": 1.613220060941951e-05, + "loss": 0.8280071020126343, + "step": 1848 + }, + { + "epoch": 0.6516299559471366, + "grad_norm": 1.3846780543862842, + "learning_rate": 1.6127597228121636e-05, + "loss": 0.662299633026123, + "step": 1849 + }, + { + "epoch": 0.6519823788546255, + "grad_norm": 1.519733781984336, + "learning_rate": 1.6122991766695206e-05, + "loss": 0.6493197679519653, + "step": 1850 + }, + { + "epoch": 0.6523348017621146, + "grad_norm": 1.5074834442694671, + "learning_rate": 1.6118384226703623e-05, + "loss": 0.5910629034042358, + "step": 1851 + }, + { + "epoch": 0.6526872246696035, + "grad_norm": 1.5082942143966174, + "learning_rate": 1.611377460971101e-05, + "loss": 0.7124426364898682, + "step": 1852 + }, + { + "epoch": 0.6530396475770925, + "grad_norm": 1.6734021483912949, + "learning_rate": 1.610916291728218e-05, + "loss": 0.6081063747406006, + "step": 1853 + }, + { + "epoch": 0.6533920704845815, + "grad_norm": 1.5485445677219123, + "learning_rate": 1.6104549150982666e-05, + "loss": 0.7536673545837402, + "step": 1854 + }, + { + "epoch": 0.6537444933920705, + "grad_norm": 1.5239612944966212, + "learning_rate": 1.6099933312378695e-05, + "loss": 0.6514976024627686, + "step": 1855 + }, + { + "epoch": 0.6540969162995595, + "grad_norm": 1.3951117738157057, + "learning_rate": 1.6095315403037205e-05, + "loss": 0.6595193147659302, + "step": 1856 + }, + { + "epoch": 0.6544493392070485, + "grad_norm": 1.5562205804379312, + "learning_rate": 1.6090695424525826e-05, + "loss": 0.666920006275177, + "step": 1857 + }, + { + "epoch": 0.6548017621145374, + "grad_norm": 1.5350434119319913, + "learning_rate": 1.6086073378412902e-05, + "loss": 0.5984979271888733, + "step": 1858 + }, + { + "epoch": 0.6551541850220264, + "grad_norm": 1.8541188470544154, + "learning_rate": 1.608144926626747e-05, + "loss": 0.8021191358566284, + "step": 1859 + }, + { + "epoch": 0.6555066079295154, + "grad_norm": 1.5029675710659876, + "learning_rate": 1.6076823089659272e-05, + "loss": 0.7368075847625732, + "step": 1860 + }, + { + "epoch": 0.6558590308370044, + "grad_norm": 1.596711606351331, + "learning_rate": 1.6072194850158755e-05, + "loss": 0.7923766374588013, + "step": 1861 + }, + { + "epoch": 0.6562114537444934, + "grad_norm": 1.6332800469997777, + "learning_rate": 1.606756454933706e-05, + "loss": 0.6907824873924255, + "step": 1862 + }, + { + "epoch": 0.6565638766519823, + "grad_norm": 1.5674543537069574, + "learning_rate": 1.606293218876603e-05, + "loss": 0.7366634607315063, + "step": 1863 + }, + { + "epoch": 0.6569162995594714, + "grad_norm": 1.7550517656533429, + "learning_rate": 1.6058297770018208e-05, + "loss": 0.7166022658348083, + "step": 1864 + }, + { + "epoch": 0.6572687224669603, + "grad_norm": 1.5153527205809505, + "learning_rate": 1.6053661294666833e-05, + "loss": 0.6969404220581055, + "step": 1865 + }, + { + "epoch": 0.6576211453744494, + "grad_norm": 1.5681332930444218, + "learning_rate": 1.6049022764285846e-05, + "loss": 0.7182974815368652, + "step": 1866 + }, + { + "epoch": 0.6579735682819383, + "grad_norm": 2.620263422686914, + "learning_rate": 1.6044382180449886e-05, + "loss": 0.7469301819801331, + "step": 1867 + }, + { + "epoch": 0.6583259911894274, + "grad_norm": 1.458082221775431, + "learning_rate": 1.603973954473428e-05, + "loss": 0.7097122073173523, + "step": 1868 + }, + { + "epoch": 0.6586784140969163, + "grad_norm": 1.3404337000381439, + "learning_rate": 1.6035094858715065e-05, + "loss": 0.6907291412353516, + "step": 1869 + }, + { + "epoch": 0.6590308370044052, + "grad_norm": 1.5576579616406543, + "learning_rate": 1.6030448123968963e-05, + "loss": 0.6259130239486694, + "step": 1870 + }, + { + "epoch": 0.6593832599118943, + "grad_norm": 1.6431810286043311, + "learning_rate": 1.6025799342073397e-05, + "loss": 0.6948051452636719, + "step": 1871 + }, + { + "epoch": 0.6597356828193832, + "grad_norm": 1.3540961323396474, + "learning_rate": 1.602114851460648e-05, + "loss": 0.7037572264671326, + "step": 1872 + }, + { + "epoch": 0.6600881057268723, + "grad_norm": 1.565352238933419, + "learning_rate": 1.6016495643147036e-05, + "loss": 0.7728864550590515, + "step": 1873 + }, + { + "epoch": 0.6604405286343612, + "grad_norm": 1.4345290675539004, + "learning_rate": 1.601184072927456e-05, + "loss": 0.7782067060470581, + "step": 1874 + }, + { + "epoch": 0.6607929515418502, + "grad_norm": 1.4505913839056241, + "learning_rate": 1.6007183774569246e-05, + "loss": 0.6168591976165771, + "step": 1875 + }, + { + "epoch": 0.6611453744493392, + "grad_norm": 1.6465062301007323, + "learning_rate": 1.6002524780611995e-05, + "loss": 0.702346920967102, + "step": 1876 + }, + { + "epoch": 0.6614977973568282, + "grad_norm": 1.6478258582343996, + "learning_rate": 1.5997863748984384e-05, + "loss": 0.6084239482879639, + "step": 1877 + }, + { + "epoch": 0.6618502202643172, + "grad_norm": 1.5841429013244157, + "learning_rate": 1.5993200681268696e-05, + "loss": 0.8307315707206726, + "step": 1878 + }, + { + "epoch": 0.6622026431718062, + "grad_norm": 1.8073980879357947, + "learning_rate": 1.5988535579047888e-05, + "loss": 0.6465811729431152, + "step": 1879 + }, + { + "epoch": 0.6625550660792952, + "grad_norm": 1.5593829827457022, + "learning_rate": 1.598386844390562e-05, + "loss": 0.71415114402771, + "step": 1880 + }, + { + "epoch": 0.6629074889867841, + "grad_norm": 6.602062472303997, + "learning_rate": 1.5979199277426243e-05, + "loss": 0.7135012149810791, + "step": 1881 + }, + { + "epoch": 0.6632599118942731, + "grad_norm": 1.584805815321856, + "learning_rate": 1.597452808119479e-05, + "loss": 0.840306282043457, + "step": 1882 + }, + { + "epoch": 0.6636123348017621, + "grad_norm": 1.454651140369818, + "learning_rate": 1.596985485679699e-05, + "loss": 0.622429609298706, + "step": 1883 + }, + { + "epoch": 0.6639647577092511, + "grad_norm": 1.5798478269154124, + "learning_rate": 1.5965179605819248e-05, + "loss": 0.6505612134933472, + "step": 1884 + }, + { + "epoch": 0.66431718061674, + "grad_norm": 1.4292089389404006, + "learning_rate": 1.5960502329848683e-05, + "loss": 0.7665247917175293, + "step": 1885 + }, + { + "epoch": 0.6646696035242291, + "grad_norm": 1.614107737492675, + "learning_rate": 1.5955823030473068e-05, + "loss": 0.7780051231384277, + "step": 1886 + }, + { + "epoch": 0.665022026431718, + "grad_norm": 1.4074097920809756, + "learning_rate": 1.5951141709280886e-05, + "loss": 0.6311650276184082, + "step": 1887 + }, + { + "epoch": 0.6653744493392071, + "grad_norm": 1.287734360896639, + "learning_rate": 1.5946458367861302e-05, + "loss": 0.7126712799072266, + "step": 1888 + }, + { + "epoch": 0.665726872246696, + "grad_norm": 1.3823278268773909, + "learning_rate": 1.5941773007804165e-05, + "loss": 0.6979397535324097, + "step": 1889 + }, + { + "epoch": 0.6660792951541851, + "grad_norm": 1.5067230035216896, + "learning_rate": 1.5937085630700003e-05, + "loss": 0.7065495252609253, + "step": 1890 + }, + { + "epoch": 0.666431718061674, + "grad_norm": 1.373677820269664, + "learning_rate": 1.593239623814004e-05, + "loss": 0.6157221794128418, + "step": 1891 + }, + { + "epoch": 0.6667841409691629, + "grad_norm": 1.6157271272896285, + "learning_rate": 1.5927704831716177e-05, + "loss": 0.6835625171661377, + "step": 1892 + }, + { + "epoch": 0.667136563876652, + "grad_norm": 1.5002309814069255, + "learning_rate": 1.5923011413021e-05, + "loss": 0.6416822671890259, + "step": 1893 + }, + { + "epoch": 0.6674889867841409, + "grad_norm": 1.4507514621746327, + "learning_rate": 1.5918315983647782e-05, + "loss": 0.7307168245315552, + "step": 1894 + }, + { + "epoch": 0.66784140969163, + "grad_norm": 1.3321086634513644, + "learning_rate": 1.5913618545190468e-05, + "loss": 0.5464824438095093, + "step": 1895 + }, + { + "epoch": 0.6681938325991189, + "grad_norm": 1.544912001907108, + "learning_rate": 1.5908919099243698e-05, + "loss": 0.6634502410888672, + "step": 1896 + }, + { + "epoch": 0.668546255506608, + "grad_norm": 1.2985703589965545, + "learning_rate": 1.5904217647402788e-05, + "loss": 0.719158411026001, + "step": 1897 + }, + { + "epoch": 0.6688986784140969, + "grad_norm": 1.5083721998375157, + "learning_rate": 1.5899514191263733e-05, + "loss": 0.7547527551651001, + "step": 1898 + }, + { + "epoch": 0.6692511013215859, + "grad_norm": 1.6226125781851348, + "learning_rate": 1.5894808732423207e-05, + "loss": 0.7549886703491211, + "step": 1899 + }, + { + "epoch": 0.6696035242290749, + "grad_norm": 1.5327056521201368, + "learning_rate": 1.589010127247857e-05, + "loss": 0.7107831239700317, + "step": 1900 + }, + { + "epoch": 0.6699559471365639, + "grad_norm": 1.5679371113552734, + "learning_rate": 1.588539181302786e-05, + "loss": 0.855078935623169, + "step": 1901 + }, + { + "epoch": 0.6703083700440529, + "grad_norm": 1.4970896726818788, + "learning_rate": 1.5880680355669792e-05, + "loss": 0.8235266208648682, + "step": 1902 + }, + { + "epoch": 0.6706607929515418, + "grad_norm": 1.339674008175079, + "learning_rate": 1.587596690200375e-05, + "loss": 0.6060166358947754, + "step": 1903 + }, + { + "epoch": 0.6710132158590308, + "grad_norm": 1.4603163291197105, + "learning_rate": 1.5871251453629817e-05, + "loss": 0.7325272560119629, + "step": 1904 + }, + { + "epoch": 0.6713656387665198, + "grad_norm": 1.5470128203990354, + "learning_rate": 1.586653401214873e-05, + "loss": 0.674901008605957, + "step": 1905 + }, + { + "epoch": 0.6717180616740088, + "grad_norm": 1.3515017914848853, + "learning_rate": 1.5861814579161928e-05, + "loss": 0.767164945602417, + "step": 1906 + }, + { + "epoch": 0.6720704845814978, + "grad_norm": 1.3633425183694836, + "learning_rate": 1.5857093156271496e-05, + "loss": 0.5691556930541992, + "step": 1907 + }, + { + "epoch": 0.6724229074889868, + "grad_norm": 1.3106038540183678, + "learning_rate": 1.585236974508022e-05, + "loss": 0.6885931491851807, + "step": 1908 + }, + { + "epoch": 0.6727753303964757, + "grad_norm": 1.143239709830434, + "learning_rate": 1.5847644347191545e-05, + "loss": 0.6227391958236694, + "step": 1909 + }, + { + "epoch": 0.6731277533039648, + "grad_norm": 1.4883434470080177, + "learning_rate": 1.5842916964209602e-05, + "loss": 0.6084527969360352, + "step": 1910 + }, + { + "epoch": 0.6734801762114537, + "grad_norm": 1.7178691294348742, + "learning_rate": 1.583818759773919e-05, + "loss": 0.7001935243606567, + "step": 1911 + }, + { + "epoch": 0.6738325991189428, + "grad_norm": 1.684468384573203, + "learning_rate": 1.5833456249385774e-05, + "loss": 0.8263465166091919, + "step": 1912 + }, + { + "epoch": 0.6741850220264317, + "grad_norm": 1.6085564780466834, + "learning_rate": 1.582872292075551e-05, + "loss": 0.662792444229126, + "step": 1913 + }, + { + "epoch": 0.6745374449339208, + "grad_norm": 1.7464203558320361, + "learning_rate": 1.582398761345521e-05, + "loss": 0.7093051075935364, + "step": 1914 + }, + { + "epoch": 0.6748898678414097, + "grad_norm": 1.4885122105608484, + "learning_rate": 1.5819250329092364e-05, + "loss": 0.7264106273651123, + "step": 1915 + }, + { + "epoch": 0.6752422907488986, + "grad_norm": 1.5383309179609377, + "learning_rate": 1.581451106927513e-05, + "loss": 0.6561543345451355, + "step": 1916 + }, + { + "epoch": 0.6755947136563877, + "grad_norm": 1.634971670239321, + "learning_rate": 1.580976983561235e-05, + "loss": 0.6563262939453125, + "step": 1917 + }, + { + "epoch": 0.6759471365638766, + "grad_norm": 1.2931579342976025, + "learning_rate": 1.5805026629713512e-05, + "loss": 0.5224509239196777, + "step": 1918 + }, + { + "epoch": 0.6762995594713657, + "grad_norm": 1.4840746720603137, + "learning_rate": 1.5800281453188793e-05, + "loss": 0.6565898656845093, + "step": 1919 + }, + { + "epoch": 0.6766519823788546, + "grad_norm": 1.4375600407888718, + "learning_rate": 1.5795534307649032e-05, + "loss": 0.7954028844833374, + "step": 1920 + }, + { + "epoch": 0.6770044052863436, + "grad_norm": 1.3454762773409146, + "learning_rate": 1.579078519470574e-05, + "loss": 0.6624404788017273, + "step": 1921 + }, + { + "epoch": 0.6773568281938326, + "grad_norm": 1.5514355338443828, + "learning_rate": 1.5786034115971083e-05, + "loss": 0.840311586856842, + "step": 1922 + }, + { + "epoch": 0.6777092511013216, + "grad_norm": 1.5163172495660509, + "learning_rate": 1.578128107305792e-05, + "loss": 0.6967859864234924, + "step": 1923 + }, + { + "epoch": 0.6780616740088106, + "grad_norm": 1.0735596232953704, + "learning_rate": 1.5776526067579746e-05, + "loss": 0.5295379161834717, + "step": 1924 + }, + { + "epoch": 0.6784140969162996, + "grad_norm": 1.8118747234451476, + "learning_rate": 1.5771769101150752e-05, + "loss": 0.6758475303649902, + "step": 1925 + }, + { + "epoch": 0.6787665198237885, + "grad_norm": 1.3510918406813899, + "learning_rate": 1.576701017538577e-05, + "loss": 0.6891785860061646, + "step": 1926 + }, + { + "epoch": 0.6791189427312775, + "grad_norm": 1.4115910497948105, + "learning_rate": 1.5762249291900304e-05, + "loss": 0.6507086157798767, + "step": 1927 + }, + { + "epoch": 0.6794713656387665, + "grad_norm": 1.4168935733459347, + "learning_rate": 1.5757486452310537e-05, + "loss": 0.6220029592514038, + "step": 1928 + }, + { + "epoch": 0.6798237885462555, + "grad_norm": 1.5134078284046213, + "learning_rate": 1.5752721658233294e-05, + "loss": 0.7742874622344971, + "step": 1929 + }, + { + "epoch": 0.6801762114537445, + "grad_norm": 1.513809055671425, + "learning_rate": 1.5747954911286085e-05, + "loss": 0.6895851492881775, + "step": 1930 + }, + { + "epoch": 0.6805286343612335, + "grad_norm": 1.6367265924041048, + "learning_rate": 1.5743186213087062e-05, + "loss": 0.71466064453125, + "step": 1931 + }, + { + "epoch": 0.6808810572687225, + "grad_norm": 1.506916023064254, + "learning_rate": 1.5738415565255056e-05, + "loss": 0.6465627551078796, + "step": 1932 + }, + { + "epoch": 0.6812334801762114, + "grad_norm": 1.3796886447957644, + "learning_rate": 1.5733642969409553e-05, + "loss": 0.7592962980270386, + "step": 1933 + }, + { + "epoch": 0.6815859030837005, + "grad_norm": 1.662185742102518, + "learning_rate": 1.57288684271707e-05, + "loss": 0.7641816735267639, + "step": 1934 + }, + { + "epoch": 0.6819383259911894, + "grad_norm": 1.5600426648231815, + "learning_rate": 1.5724091940159306e-05, + "loss": 0.7015130519866943, + "step": 1935 + }, + { + "epoch": 0.6822907488986785, + "grad_norm": 1.5031237824980206, + "learning_rate": 1.5719313509996833e-05, + "loss": 0.7851461172103882, + "step": 1936 + }, + { + "epoch": 0.6826431718061674, + "grad_norm": 1.5670991097913773, + "learning_rate": 1.571453313830542e-05, + "loss": 0.7924813628196716, + "step": 1937 + }, + { + "epoch": 0.6829955947136563, + "grad_norm": 1.3030215719290177, + "learning_rate": 1.570975082670785e-05, + "loss": 0.6082741022109985, + "step": 1938 + }, + { + "epoch": 0.6833480176211454, + "grad_norm": 1.5878638287998994, + "learning_rate": 1.5704966576827563e-05, + "loss": 0.7307756543159485, + "step": 1939 + }, + { + "epoch": 0.6837004405286343, + "grad_norm": 1.421111197077357, + "learning_rate": 1.570018039028867e-05, + "loss": 0.6877273917198181, + "step": 1940 + }, + { + "epoch": 0.6840528634361234, + "grad_norm": 3.222041323215856, + "learning_rate": 1.5695392268715934e-05, + "loss": 0.7702943086624146, + "step": 1941 + }, + { + "epoch": 0.6844052863436123, + "grad_norm": 1.3182333231384877, + "learning_rate": 1.569060221373477e-05, + "loss": 0.6576820611953735, + "step": 1942 + }, + { + "epoch": 0.6847577092511014, + "grad_norm": 1.6178003008675335, + "learning_rate": 1.568581022697125e-05, + "loss": 0.6605322360992432, + "step": 1943 + }, + { + "epoch": 0.6851101321585903, + "grad_norm": 1.5479637201173908, + "learning_rate": 1.568101631005211e-05, + "loss": 0.8065364360809326, + "step": 1944 + }, + { + "epoch": 0.6854625550660793, + "grad_norm": 1.5909483515555374, + "learning_rate": 1.5676220464604726e-05, + "loss": 0.8018748164176941, + "step": 1945 + }, + { + "epoch": 0.6858149779735683, + "grad_norm": 1.4496461628107289, + "learning_rate": 1.567142269225715e-05, + "loss": 0.6114683151245117, + "step": 1946 + }, + { + "epoch": 0.6861674008810573, + "grad_norm": 1.4567709922330223, + "learning_rate": 1.566662299463807e-05, + "loss": 0.8470789194107056, + "step": 1947 + }, + { + "epoch": 0.6865198237885463, + "grad_norm": 1.4716494157627575, + "learning_rate": 1.5661821373376837e-05, + "loss": 0.7133561372756958, + "step": 1948 + }, + { + "epoch": 0.6868722466960352, + "grad_norm": 1.6398709503866558, + "learning_rate": 1.5657017830103448e-05, + "loss": 0.9101625084877014, + "step": 1949 + }, + { + "epoch": 0.6872246696035242, + "grad_norm": 1.8312595153810016, + "learning_rate": 1.565221236644856e-05, + "loss": 0.7395101189613342, + "step": 1950 + }, + { + "epoch": 0.6875770925110132, + "grad_norm": 1.4532682115054107, + "learning_rate": 1.5647404984043474e-05, + "loss": 0.7421061992645264, + "step": 1951 + }, + { + "epoch": 0.6879295154185022, + "grad_norm": 1.4495130982943423, + "learning_rate": 1.5642595684520154e-05, + "loss": 0.8744432330131531, + "step": 1952 + }, + { + "epoch": 0.6882819383259912, + "grad_norm": 1.6475850419823541, + "learning_rate": 1.56377844695112e-05, + "loss": 0.8043868541717529, + "step": 1953 + }, + { + "epoch": 0.6886343612334802, + "grad_norm": 1.444538108927131, + "learning_rate": 1.5632971340649873e-05, + "loss": 0.6231396198272705, + "step": 1954 + }, + { + "epoch": 0.6889867841409691, + "grad_norm": 1.3765988847280666, + "learning_rate": 1.562815629957008e-05, + "loss": 0.7791434526443481, + "step": 1955 + }, + { + "epoch": 0.6893392070484582, + "grad_norm": 1.2135950275511538, + "learning_rate": 1.5623339347906383e-05, + "loss": 0.5652475357055664, + "step": 1956 + }, + { + "epoch": 0.6896916299559471, + "grad_norm": 1.4607959644694648, + "learning_rate": 1.561852048729398e-05, + "loss": 0.611067533493042, + "step": 1957 + }, + { + "epoch": 0.6900440528634362, + "grad_norm": 1.2569255893474116, + "learning_rate": 1.5613699719368724e-05, + "loss": 0.7580389976501465, + "step": 1958 + }, + { + "epoch": 0.6903964757709251, + "grad_norm": 1.516048041026883, + "learning_rate": 1.560887704576712e-05, + "loss": 0.6841205954551697, + "step": 1959 + }, + { + "epoch": 0.690748898678414, + "grad_norm": 1.7678860610521125, + "learning_rate": 1.5604052468126315e-05, + "loss": 0.7600575089454651, + "step": 1960 + }, + { + "epoch": 0.6911013215859031, + "grad_norm": 1.458096987341084, + "learning_rate": 1.55992259880841e-05, + "loss": 0.7547114491462708, + "step": 1961 + }, + { + "epoch": 0.691453744493392, + "grad_norm": 1.3490975617996133, + "learning_rate": 1.5594397607278912e-05, + "loss": 0.6917474865913391, + "step": 1962 + }, + { + "epoch": 0.6918061674008811, + "grad_norm": 1.378212312699651, + "learning_rate": 1.5589567327349845e-05, + "loss": 0.6820487976074219, + "step": 1963 + }, + { + "epoch": 0.69215859030837, + "grad_norm": 1.4687305992297937, + "learning_rate": 1.5584735149936628e-05, + "loss": 0.6513597965240479, + "step": 1964 + }, + { + "epoch": 0.6925110132158591, + "grad_norm": 1.4807223837447299, + "learning_rate": 1.5579901076679625e-05, + "loss": 0.668257474899292, + "step": 1965 + }, + { + "epoch": 0.692863436123348, + "grad_norm": 1.5130451892313703, + "learning_rate": 1.5575065109219864e-05, + "loss": 0.7600705623626709, + "step": 1966 + }, + { + "epoch": 0.693215859030837, + "grad_norm": 1.5218611988458295, + "learning_rate": 1.5570227249198993e-05, + "loss": 0.8140011429786682, + "step": 1967 + }, + { + "epoch": 0.693568281938326, + "grad_norm": 1.1438716908088957, + "learning_rate": 1.556538749825933e-05, + "loss": 0.610436201095581, + "step": 1968 + }, + { + "epoch": 0.693920704845815, + "grad_norm": 1.7706616264872619, + "learning_rate": 1.556054585804381e-05, + "loss": 0.7745693922042847, + "step": 1969 + }, + { + "epoch": 0.694273127753304, + "grad_norm": 1.4076568647110412, + "learning_rate": 1.5555702330196024e-05, + "loss": 0.5809592008590698, + "step": 1970 + }, + { + "epoch": 0.6946255506607929, + "grad_norm": 1.220751429593537, + "learning_rate": 1.5550856916360195e-05, + "loss": 0.6354515552520752, + "step": 1971 + }, + { + "epoch": 0.694977973568282, + "grad_norm": 1.4513364815061058, + "learning_rate": 1.5546009618181194e-05, + "loss": 0.8076149225234985, + "step": 1972 + }, + { + "epoch": 0.6953303964757709, + "grad_norm": 1.6702158357132753, + "learning_rate": 1.5541160437304524e-05, + "loss": 0.7553249597549438, + "step": 1973 + }, + { + "epoch": 0.6956828193832599, + "grad_norm": 1.4495619596653457, + "learning_rate": 1.5536309375376332e-05, + "loss": 0.6109169125556946, + "step": 1974 + }, + { + "epoch": 0.6960352422907489, + "grad_norm": 1.4052818449921982, + "learning_rate": 1.5531456434043404e-05, + "loss": 0.8184436559677124, + "step": 1975 + }, + { + "epoch": 0.6963876651982379, + "grad_norm": 1.3611746850672197, + "learning_rate": 1.5526601614953164e-05, + "loss": 0.6823909878730774, + "step": 1976 + }, + { + "epoch": 0.6967400881057269, + "grad_norm": 1.3254402340100906, + "learning_rate": 1.5521744919753668e-05, + "loss": 0.6669045090675354, + "step": 1977 + }, + { + "epoch": 0.6970925110132159, + "grad_norm": 1.7752129025350782, + "learning_rate": 1.5516886350093617e-05, + "loss": 0.8054187297821045, + "step": 1978 + }, + { + "epoch": 0.6974449339207048, + "grad_norm": 1.6379915816078137, + "learning_rate": 1.551202590762234e-05, + "loss": 0.7089184522628784, + "step": 1979 + }, + { + "epoch": 0.6977973568281939, + "grad_norm": 1.5207382048575195, + "learning_rate": 1.5507163593989804e-05, + "loss": 0.7908214330673218, + "step": 1980 + }, + { + "epoch": 0.6981497797356828, + "grad_norm": 1.454323961299799, + "learning_rate": 1.5502299410846626e-05, + "loss": 0.8859039545059204, + "step": 1981 + }, + { + "epoch": 0.6985022026431718, + "grad_norm": 1.5085321450966587, + "learning_rate": 1.549743335984403e-05, + "loss": 0.7156866788864136, + "step": 1982 + }, + { + "epoch": 0.6988546255506608, + "grad_norm": 1.4496904801370623, + "learning_rate": 1.5492565442633894e-05, + "loss": 0.6158934831619263, + "step": 1983 + }, + { + "epoch": 0.6992070484581497, + "grad_norm": 1.5453977055484032, + "learning_rate": 1.548769566086873e-05, + "loss": 0.6689192056655884, + "step": 1984 + }, + { + "epoch": 0.6995594713656388, + "grad_norm": 1.4591630403591411, + "learning_rate": 1.548282401620167e-05, + "loss": 0.6695841550827026, + "step": 1985 + }, + { + "epoch": 0.6999118942731277, + "grad_norm": 1.6161480882103554, + "learning_rate": 1.5477950510286488e-05, + "loss": 0.7196098566055298, + "step": 1986 + }, + { + "epoch": 0.7002643171806168, + "grad_norm": 1.5261033448052712, + "learning_rate": 1.5473075144777586e-05, + "loss": 0.7811123132705688, + "step": 1987 + }, + { + "epoch": 0.7006167400881057, + "grad_norm": 1.3902237132074229, + "learning_rate": 1.5468197921330006e-05, + "loss": 0.6341326236724854, + "step": 1988 + }, + { + "epoch": 0.7009691629955948, + "grad_norm": 1.4052051959904983, + "learning_rate": 1.5463318841599408e-05, + "loss": 0.6344352960586548, + "step": 1989 + }, + { + "epoch": 0.7013215859030837, + "grad_norm": 1.5015659436227353, + "learning_rate": 1.5458437907242084e-05, + "loss": 0.6708072423934937, + "step": 1990 + }, + { + "epoch": 0.7016740088105727, + "grad_norm": 1.4551372124338164, + "learning_rate": 1.5453555119914963e-05, + "loss": 0.7018578052520752, + "step": 1991 + }, + { + "epoch": 0.7020264317180617, + "grad_norm": 1.4651591378979865, + "learning_rate": 1.5448670481275604e-05, + "loss": 0.6966190338134766, + "step": 1992 + }, + { + "epoch": 0.7023788546255506, + "grad_norm": 1.2815956936347872, + "learning_rate": 1.5443783992982182e-05, + "loss": 0.6280171871185303, + "step": 1993 + }, + { + "epoch": 0.7027312775330397, + "grad_norm": 1.451492070117077, + "learning_rate": 1.5438895656693512e-05, + "loss": 0.6644559502601624, + "step": 1994 + }, + { + "epoch": 0.7030837004405286, + "grad_norm": 1.5030450433681415, + "learning_rate": 1.543400547406903e-05, + "loss": 0.776411771774292, + "step": 1995 + }, + { + "epoch": 0.7034361233480176, + "grad_norm": 1.428531901666428, + "learning_rate": 1.5429113446768805e-05, + "loss": 0.6353679895401001, + "step": 1996 + }, + { + "epoch": 0.7037885462555066, + "grad_norm": 1.468487936335314, + "learning_rate": 1.5424219576453526e-05, + "loss": 0.686774492263794, + "step": 1997 + }, + { + "epoch": 0.7041409691629956, + "grad_norm": 1.2525683766202464, + "learning_rate": 1.5419323864784508e-05, + "loss": 0.5296701192855835, + "step": 1998 + }, + { + "epoch": 0.7044933920704846, + "grad_norm": 1.264413948230812, + "learning_rate": 1.5414426313423692e-05, + "loss": 0.6246802806854248, + "step": 1999 + }, + { + "epoch": 0.7048458149779736, + "grad_norm": 1.44172793688486, + "learning_rate": 1.5409526924033646e-05, + "loss": 0.6633912920951843, + "step": 2000 + }, + { + "epoch": 0.7051982378854625, + "grad_norm": 1.720413855985522, + "learning_rate": 1.540462569827756e-05, + "loss": 0.7324577569961548, + "step": 2001 + }, + { + "epoch": 0.7055506607929516, + "grad_norm": 1.6372387419200998, + "learning_rate": 1.539972263781925e-05, + "loss": 0.7988085746765137, + "step": 2002 + }, + { + "epoch": 0.7059030837004405, + "grad_norm": 1.4528481393218415, + "learning_rate": 1.539481774432315e-05, + "loss": 0.6761256456375122, + "step": 2003 + }, + { + "epoch": 0.7062555066079295, + "grad_norm": 1.6101005409981786, + "learning_rate": 1.538991101945431e-05, + "loss": 0.6647740006446838, + "step": 2004 + }, + { + "epoch": 0.7066079295154185, + "grad_norm": 1.5047715708456952, + "learning_rate": 1.538500246487843e-05, + "loss": 0.7111536860466003, + "step": 2005 + }, + { + "epoch": 0.7069603524229074, + "grad_norm": 1.8533704165409681, + "learning_rate": 1.5380092082261797e-05, + "loss": 0.7395933270454407, + "step": 2006 + }, + { + "epoch": 0.7073127753303965, + "grad_norm": 1.4630720873509298, + "learning_rate": 1.5375179873271335e-05, + "loss": 0.6158996820449829, + "step": 2007 + }, + { + "epoch": 0.7076651982378854, + "grad_norm": 1.4746770670226905, + "learning_rate": 1.537026583957459e-05, + "loss": 0.7259848117828369, + "step": 2008 + }, + { + "epoch": 0.7080176211453745, + "grad_norm": 1.6674311554666914, + "learning_rate": 1.5365349982839723e-05, + "loss": 0.8370928764343262, + "step": 2009 + }, + { + "epoch": 0.7083700440528634, + "grad_norm": 1.3618230849109776, + "learning_rate": 1.536043230473551e-05, + "loss": 0.6041784882545471, + "step": 2010 + }, + { + "epoch": 0.7087224669603525, + "grad_norm": 1.4112680073946362, + "learning_rate": 1.535551280693135e-05, + "loss": 0.688548743724823, + "step": 2011 + }, + { + "epoch": 0.7090748898678414, + "grad_norm": 1.6056330275270763, + "learning_rate": 1.5350591491097265e-05, + "loss": 0.573681652545929, + "step": 2012 + }, + { + "epoch": 0.7094273127753304, + "grad_norm": 2.0956667904129636, + "learning_rate": 1.5345668358903886e-05, + "loss": 0.6919670104980469, + "step": 2013 + }, + { + "epoch": 0.7097797356828194, + "grad_norm": 1.6440284625605202, + "learning_rate": 1.534074341202246e-05, + "loss": 0.6693999767303467, + "step": 2014 + }, + { + "epoch": 0.7101321585903083, + "grad_norm": 1.5023686452775393, + "learning_rate": 1.533581665212486e-05, + "loss": 0.7204093337059021, + "step": 2015 + }, + { + "epoch": 0.7104845814977974, + "grad_norm": 1.7353596990699613, + "learning_rate": 1.5330888080883555e-05, + "loss": 0.6196314096450806, + "step": 2016 + }, + { + "epoch": 0.7108370044052863, + "grad_norm": 1.4190743094269347, + "learning_rate": 1.5325957699971657e-05, + "loss": 0.7292872071266174, + "step": 2017 + }, + { + "epoch": 0.7111894273127753, + "grad_norm": 1.7578012075664924, + "learning_rate": 1.532102551106287e-05, + "loss": 0.7514410018920898, + "step": 2018 + }, + { + "epoch": 0.7115418502202643, + "grad_norm": 1.329552917806312, + "learning_rate": 1.531609151583152e-05, + "loss": 0.7683345079421997, + "step": 2019 + }, + { + "epoch": 0.7118942731277533, + "grad_norm": 1.8323846391695044, + "learning_rate": 1.5311155715952536e-05, + "loss": 0.6994156837463379, + "step": 2020 + }, + { + "epoch": 0.7122466960352423, + "grad_norm": 1.3407977210543047, + "learning_rate": 1.5306218113101482e-05, + "loss": 0.5530328750610352, + "step": 2021 + }, + { + "epoch": 0.7125991189427313, + "grad_norm": 1.6814720781682417, + "learning_rate": 1.530127870895451e-05, + "loss": 0.6126301884651184, + "step": 2022 + }, + { + "epoch": 0.7129515418502202, + "grad_norm": 1.9618212705640916, + "learning_rate": 1.5296337505188403e-05, + "loss": 0.7514982223510742, + "step": 2023 + }, + { + "epoch": 0.7133039647577093, + "grad_norm": 1.742411408925072, + "learning_rate": 1.529139450348054e-05, + "loss": 0.7087191939353943, + "step": 2024 + }, + { + "epoch": 0.7136563876651982, + "grad_norm": 1.3195305972662899, + "learning_rate": 1.5286449705508914e-05, + "loss": 0.5713562965393066, + "step": 2025 + }, + { + "epoch": 0.7140088105726872, + "grad_norm": 1.3621779724967453, + "learning_rate": 1.5281503112952136e-05, + "loss": 0.6796679496765137, + "step": 2026 + }, + { + "epoch": 0.7143612334801762, + "grad_norm": 1.8247081007192694, + "learning_rate": 1.5276554727489415e-05, + "loss": 0.7902421355247498, + "step": 2027 + }, + { + "epoch": 0.7147136563876652, + "grad_norm": 1.3608050254188053, + "learning_rate": 1.527160455080058e-05, + "loss": 0.6645491123199463, + "step": 2028 + }, + { + "epoch": 0.7150660792951542, + "grad_norm": 1.489658346292968, + "learning_rate": 1.5266652584566056e-05, + "loss": 0.6077255606651306, + "step": 2029 + }, + { + "epoch": 0.7154185022026431, + "grad_norm": 1.412193602346091, + "learning_rate": 1.5261698830466888e-05, + "loss": 0.6219078302383423, + "step": 2030 + }, + { + "epoch": 0.7157709251101322, + "grad_norm": 1.280704281307457, + "learning_rate": 1.5256743290184713e-05, + "loss": 0.5895035266876221, + "step": 2031 + }, + { + "epoch": 0.7161233480176211, + "grad_norm": 1.497416305314063, + "learning_rate": 1.5251785965401786e-05, + "loss": 0.6735520958900452, + "step": 2032 + }, + { + "epoch": 0.7164757709251102, + "grad_norm": 1.353147232010895, + "learning_rate": 1.524682685780097e-05, + "loss": 0.6212488412857056, + "step": 2033 + }, + { + "epoch": 0.7168281938325991, + "grad_norm": 1.5786628078958613, + "learning_rate": 1.524186596906572e-05, + "loss": 0.7181172966957092, + "step": 2034 + }, + { + "epoch": 0.7171806167400882, + "grad_norm": 3.1301800941750906, + "learning_rate": 1.5236903300880107e-05, + "loss": 0.7156587839126587, + "step": 2035 + }, + { + "epoch": 0.7175330396475771, + "grad_norm": 1.513371130481219, + "learning_rate": 1.52319388549288e-05, + "loss": 0.6989034414291382, + "step": 2036 + }, + { + "epoch": 0.7178854625550661, + "grad_norm": 1.5183441818080943, + "learning_rate": 1.5226972632897079e-05, + "loss": 0.7224982976913452, + "step": 2037 + }, + { + "epoch": 0.7182378854625551, + "grad_norm": 1.5033480023563544, + "learning_rate": 1.522200463647082e-05, + "loss": 0.6871547698974609, + "step": 2038 + }, + { + "epoch": 0.718590308370044, + "grad_norm": 1.5898527901911406, + "learning_rate": 1.5217034867336498e-05, + "loss": 0.725049614906311, + "step": 2039 + }, + { + "epoch": 0.718942731277533, + "grad_norm": 2.079980258079047, + "learning_rate": 1.5212063327181197e-05, + "loss": 0.7105863094329834, + "step": 2040 + }, + { + "epoch": 0.719295154185022, + "grad_norm": 1.4720898042575539, + "learning_rate": 1.5207090017692605e-05, + "loss": 0.5823827981948853, + "step": 2041 + }, + { + "epoch": 0.719647577092511, + "grad_norm": 1.9166232714289464, + "learning_rate": 1.5202114940559005e-05, + "loss": 0.7087944746017456, + "step": 2042 + }, + { + "epoch": 0.72, + "grad_norm": 1.40676061171607, + "learning_rate": 1.5197138097469275e-05, + "loss": 0.6678824424743652, + "step": 2043 + }, + { + "epoch": 0.720352422907489, + "grad_norm": 1.8181396920642288, + "learning_rate": 1.5192159490112904e-05, + "loss": 0.7318846583366394, + "step": 2044 + }, + { + "epoch": 0.720704845814978, + "grad_norm": 1.4972370605408583, + "learning_rate": 1.5187179120179969e-05, + "loss": 0.7245825529098511, + "step": 2045 + }, + { + "epoch": 0.721057268722467, + "grad_norm": 1.8554569851295908, + "learning_rate": 1.5182196989361155e-05, + "loss": 0.7691583633422852, + "step": 2046 + }, + { + "epoch": 0.7214096916299559, + "grad_norm": 1.8926959198228865, + "learning_rate": 1.517721309934774e-05, + "loss": 0.7961187362670898, + "step": 2047 + }, + { + "epoch": 0.721762114537445, + "grad_norm": 1.4465824812635413, + "learning_rate": 1.51722274518316e-05, + "loss": 0.7163759469985962, + "step": 2048 + }, + { + "epoch": 0.7221145374449339, + "grad_norm": 1.5931659235074929, + "learning_rate": 1.51672400485052e-05, + "loss": 0.6807754039764404, + "step": 2049 + }, + { + "epoch": 0.7224669603524229, + "grad_norm": 1.6629043788678177, + "learning_rate": 1.516225089106162e-05, + "loss": 0.7026433348655701, + "step": 2050 + }, + { + "epoch": 0.7228193832599119, + "grad_norm": 1.5979782761024863, + "learning_rate": 1.5157259981194514e-05, + "loss": 0.8230476379394531, + "step": 2051 + }, + { + "epoch": 0.7231718061674008, + "grad_norm": 1.7451468269512191, + "learning_rate": 1.5152267320598149e-05, + "loss": 0.6466805934906006, + "step": 2052 + }, + { + "epoch": 0.7235242290748899, + "grad_norm": 1.441654513994546, + "learning_rate": 1.5147272910967368e-05, + "loss": 0.7203368544578552, + "step": 2053 + }, + { + "epoch": 0.7238766519823788, + "grad_norm": 1.3552926542352444, + "learning_rate": 1.5142276753997627e-05, + "loss": 0.6455702781677246, + "step": 2054 + }, + { + "epoch": 0.7242290748898679, + "grad_norm": 1.4569594560235375, + "learning_rate": 1.5137278851384958e-05, + "loss": 0.609260082244873, + "step": 2055 + }, + { + "epoch": 0.7245814977973568, + "grad_norm": 1.8083723333355965, + "learning_rate": 1.5132279204826e-05, + "loss": 0.8320673704147339, + "step": 2056 + }, + { + "epoch": 0.7249339207048459, + "grad_norm": 1.5846751172626037, + "learning_rate": 1.512727781601797e-05, + "loss": 0.8497718572616577, + "step": 2057 + }, + { + "epoch": 0.7252863436123348, + "grad_norm": 1.3523103900088498, + "learning_rate": 1.5122274686658695e-05, + "loss": 0.6398370265960693, + "step": 2058 + }, + { + "epoch": 0.7256387665198238, + "grad_norm": 1.4475161405549521, + "learning_rate": 1.511726981844657e-05, + "loss": 0.7562476396560669, + "step": 2059 + }, + { + "epoch": 0.7259911894273128, + "grad_norm": 1.8369611551341436, + "learning_rate": 1.51122632130806e-05, + "loss": 0.7948570251464844, + "step": 2060 + }, + { + "epoch": 0.7263436123348017, + "grad_norm": 1.9057892039367437, + "learning_rate": 1.5107254872260366e-05, + "loss": 0.7062652111053467, + "step": 2061 + }, + { + "epoch": 0.7266960352422908, + "grad_norm": 1.666793884988277, + "learning_rate": 1.5102244797686049e-05, + "loss": 0.6290205717086792, + "step": 2062 + }, + { + "epoch": 0.7270484581497797, + "grad_norm": 1.7111515682842917, + "learning_rate": 1.5097232991058409e-05, + "loss": 0.727097749710083, + "step": 2063 + }, + { + "epoch": 0.7274008810572687, + "grad_norm": 1.6005396217530683, + "learning_rate": 1.5092219454078803e-05, + "loss": 0.783380389213562, + "step": 2064 + }, + { + "epoch": 0.7277533039647577, + "grad_norm": 1.4872748126751951, + "learning_rate": 1.5087204188449165e-05, + "loss": 0.6190629601478577, + "step": 2065 + }, + { + "epoch": 0.7281057268722467, + "grad_norm": 1.5426042958975894, + "learning_rate": 1.5082187195872026e-05, + "loss": 0.6749798059463501, + "step": 2066 + }, + { + "epoch": 0.7284581497797357, + "grad_norm": 1.524694880675492, + "learning_rate": 1.5077168478050494e-05, + "loss": 0.6581153273582458, + "step": 2067 + }, + { + "epoch": 0.7288105726872247, + "grad_norm": 1.433767292714838, + "learning_rate": 1.5072148036688279e-05, + "loss": 0.6886252760887146, + "step": 2068 + }, + { + "epoch": 0.7291629955947136, + "grad_norm": 1.651630016781231, + "learning_rate": 1.506712587348965e-05, + "loss": 0.6893814206123352, + "step": 2069 + }, + { + "epoch": 0.7295154185022027, + "grad_norm": 1.7840073958291343, + "learning_rate": 1.5062101990159486e-05, + "loss": 0.8242654800415039, + "step": 2070 + }, + { + "epoch": 0.7298678414096916, + "grad_norm": 1.4785860236042563, + "learning_rate": 1.5057076388403229e-05, + "loss": 0.6331228017807007, + "step": 2071 + }, + { + "epoch": 0.7302202643171806, + "grad_norm": 1.999658994203056, + "learning_rate": 1.5052049069926927e-05, + "loss": 0.6440649032592773, + "step": 2072 + }, + { + "epoch": 0.7305726872246696, + "grad_norm": 1.4709264297577982, + "learning_rate": 1.5047020036437187e-05, + "loss": 0.7575498819351196, + "step": 2073 + }, + { + "epoch": 0.7309251101321586, + "grad_norm": 1.8032604054381702, + "learning_rate": 1.5041989289641215e-05, + "loss": 0.7530438899993896, + "step": 2074 + }, + { + "epoch": 0.7312775330396476, + "grad_norm": 1.5344556457224068, + "learning_rate": 1.5036956831246792e-05, + "loss": 0.6035616397857666, + "step": 2075 + }, + { + "epoch": 0.7316299559471365, + "grad_norm": 1.5603807233808964, + "learning_rate": 1.5031922662962279e-05, + "loss": 0.8199492692947388, + "step": 2076 + }, + { + "epoch": 0.7319823788546256, + "grad_norm": 1.4221584765379676, + "learning_rate": 1.5026886786496624e-05, + "loss": 0.7700716257095337, + "step": 2077 + }, + { + "epoch": 0.7323348017621145, + "grad_norm": 1.363028657258907, + "learning_rate": 1.5021849203559347e-05, + "loss": 0.6147816777229309, + "step": 2078 + }, + { + "epoch": 0.7326872246696036, + "grad_norm": 1.5628142146943151, + "learning_rate": 1.5016809915860549e-05, + "loss": 0.6841654777526855, + "step": 2079 + }, + { + "epoch": 0.7330396475770925, + "grad_norm": 1.7910877668379601, + "learning_rate": 1.5011768925110915e-05, + "loss": 0.7212510108947754, + "step": 2080 + }, + { + "epoch": 0.7333920704845815, + "grad_norm": 1.5222211216380177, + "learning_rate": 1.5006726233021702e-05, + "loss": 0.6695969104766846, + "step": 2081 + }, + { + "epoch": 0.7337444933920705, + "grad_norm": 1.391558192885713, + "learning_rate": 1.500168184130475e-05, + "loss": 0.5991939306259155, + "step": 2082 + }, + { + "epoch": 0.7340969162995594, + "grad_norm": 1.4191544168706896, + "learning_rate": 1.4996635751672467e-05, + "loss": 0.7127671241760254, + "step": 2083 + }, + { + "epoch": 0.7344493392070485, + "grad_norm": 1.6905086418980109, + "learning_rate": 1.4991587965837853e-05, + "loss": 0.6874737739562988, + "step": 2084 + }, + { + "epoch": 0.7348017621145374, + "grad_norm": 1.3584519480933235, + "learning_rate": 1.4986538485514466e-05, + "loss": 0.6695086359977722, + "step": 2085 + }, + { + "epoch": 0.7351541850220265, + "grad_norm": 1.694264564137899, + "learning_rate": 1.4981487312416452e-05, + "loss": 0.8366880416870117, + "step": 2086 + }, + { + "epoch": 0.7355066079295154, + "grad_norm": 1.4589826786561007, + "learning_rate": 1.4976434448258519e-05, + "loss": 0.6448042988777161, + "step": 2087 + }, + { + "epoch": 0.7358590308370044, + "grad_norm": 1.8583566766216881, + "learning_rate": 1.4971379894755969e-05, + "loss": 0.7015181183815002, + "step": 2088 + }, + { + "epoch": 0.7362114537444934, + "grad_norm": 1.702091122213854, + "learning_rate": 1.4966323653624657e-05, + "loss": 0.6842815279960632, + "step": 2089 + }, + { + "epoch": 0.7365638766519824, + "grad_norm": 1.7134163669939546, + "learning_rate": 1.4961265726581025e-05, + "loss": 0.6866877675056458, + "step": 2090 + }, + { + "epoch": 0.7369162995594714, + "grad_norm": 1.537334961209543, + "learning_rate": 1.4956206115342076e-05, + "loss": 0.5486865043640137, + "step": 2091 + }, + { + "epoch": 0.7372687224669604, + "grad_norm": 1.7196744065626985, + "learning_rate": 1.4951144821625396e-05, + "loss": 0.7241986989974976, + "step": 2092 + }, + { + "epoch": 0.7376211453744493, + "grad_norm": 1.647893211532232, + "learning_rate": 1.4946081847149134e-05, + "loss": 0.8400537967681885, + "step": 2093 + }, + { + "epoch": 0.7379735682819383, + "grad_norm": 2.2262132208657146, + "learning_rate": 1.4941017193632013e-05, + "loss": 0.6050147414207458, + "step": 2094 + }, + { + "epoch": 0.7383259911894273, + "grad_norm": 1.337421477916073, + "learning_rate": 1.4935950862793322e-05, + "loss": 0.6744229197502136, + "step": 2095 + }, + { + "epoch": 0.7386784140969163, + "grad_norm": 1.4345512538147223, + "learning_rate": 1.493088285635293e-05, + "loss": 0.6902294158935547, + "step": 2096 + }, + { + "epoch": 0.7390308370044053, + "grad_norm": 1.8712136012401615, + "learning_rate": 1.492581317603126e-05, + "loss": 0.6328809261322021, + "step": 2097 + }, + { + "epoch": 0.7393832599118942, + "grad_norm": 1.4287618993627116, + "learning_rate": 1.4920741823549316e-05, + "loss": 0.5740914344787598, + "step": 2098 + }, + { + "epoch": 0.7397356828193833, + "grad_norm": 2.181624869430245, + "learning_rate": 1.491566880062866e-05, + "loss": 0.676064133644104, + "step": 2099 + }, + { + "epoch": 0.7400881057268722, + "grad_norm": 1.5152586818427025, + "learning_rate": 1.4910594108991427e-05, + "loss": 0.655153751373291, + "step": 2100 + }, + { + "epoch": 0.7404405286343613, + "grad_norm": 1.7534591753196083, + "learning_rate": 1.4905517750360321e-05, + "loss": 0.7406177520751953, + "step": 2101 + }, + { + "epoch": 0.7407929515418502, + "grad_norm": 1.777307095945404, + "learning_rate": 1.4900439726458602e-05, + "loss": 0.6568606495857239, + "step": 2102 + }, + { + "epoch": 0.7411453744493393, + "grad_norm": 1.661203262476052, + "learning_rate": 1.4895360039010101e-05, + "loss": 0.8073545098304749, + "step": 2103 + }, + { + "epoch": 0.7414977973568282, + "grad_norm": 1.6727123321226325, + "learning_rate": 1.4890278689739219e-05, + "loss": 0.6350502967834473, + "step": 2104 + }, + { + "epoch": 0.7418502202643171, + "grad_norm": 1.475293376760879, + "learning_rate": 1.4885195680370915e-05, + "loss": 0.6419750452041626, + "step": 2105 + }, + { + "epoch": 0.7422026431718062, + "grad_norm": 1.5480091112446772, + "learning_rate": 1.4880111012630706e-05, + "loss": 0.72661292552948, + "step": 2106 + }, + { + "epoch": 0.7425550660792951, + "grad_norm": 1.5125479406066336, + "learning_rate": 1.4875024688244683e-05, + "loss": 0.6996778845787048, + "step": 2107 + }, + { + "epoch": 0.7429074889867842, + "grad_norm": 1.7343888178448454, + "learning_rate": 1.4869936708939497e-05, + "loss": 0.8383389711380005, + "step": 2108 + }, + { + "epoch": 0.7432599118942731, + "grad_norm": 1.6950461405964057, + "learning_rate": 1.4864847076442358e-05, + "loss": 0.6863676905632019, + "step": 2109 + }, + { + "epoch": 0.7436123348017621, + "grad_norm": 1.781136801701718, + "learning_rate": 1.4859755792481032e-05, + "loss": 0.8493780493736267, + "step": 2110 + }, + { + "epoch": 0.7439647577092511, + "grad_norm": 1.3754571175527768, + "learning_rate": 1.4854662858783857e-05, + "loss": 0.6172446012496948, + "step": 2111 + }, + { + "epoch": 0.7443171806167401, + "grad_norm": 6.860121931549926, + "learning_rate": 1.4849568277079724e-05, + "loss": 0.8390353918075562, + "step": 2112 + }, + { + "epoch": 0.7446696035242291, + "grad_norm": 1.8563178731324264, + "learning_rate": 1.4844472049098087e-05, + "loss": 0.7108968496322632, + "step": 2113 + }, + { + "epoch": 0.7450220264317181, + "grad_norm": 1.5680406370173388, + "learning_rate": 1.4839374176568956e-05, + "loss": 0.7322912812232971, + "step": 2114 + }, + { + "epoch": 0.745374449339207, + "grad_norm": 1.5999840343791083, + "learning_rate": 1.4834274661222896e-05, + "loss": 0.6371238231658936, + "step": 2115 + }, + { + "epoch": 0.745726872246696, + "grad_norm": 1.6793360349519253, + "learning_rate": 1.4829173504791035e-05, + "loss": 0.8346511125564575, + "step": 2116 + }, + { + "epoch": 0.746079295154185, + "grad_norm": 1.5530745059154032, + "learning_rate": 1.4824070709005063e-05, + "loss": 0.5893645286560059, + "step": 2117 + }, + { + "epoch": 0.746431718061674, + "grad_norm": 1.298803943907695, + "learning_rate": 1.4818966275597213e-05, + "loss": 0.60541832447052, + "step": 2118 + }, + { + "epoch": 0.746784140969163, + "grad_norm": 2.0046684565684108, + "learning_rate": 1.4813860206300286e-05, + "loss": 0.5823955535888672, + "step": 2119 + }, + { + "epoch": 0.747136563876652, + "grad_norm": 1.8094924676670123, + "learning_rate": 1.480875250284763e-05, + "loss": 0.6751007437705994, + "step": 2120 + }, + { + "epoch": 0.747488986784141, + "grad_norm": 1.5760168475146599, + "learning_rate": 1.4803643166973155e-05, + "loss": 0.6878843307495117, + "step": 2121 + }, + { + "epoch": 0.7478414096916299, + "grad_norm": 1.4061876649605263, + "learning_rate": 1.4798532200411319e-05, + "loss": 0.6732173562049866, + "step": 2122 + }, + { + "epoch": 0.748193832599119, + "grad_norm": 1.558565097379613, + "learning_rate": 1.479341960489714e-05, + "loss": 0.6383658647537231, + "step": 2123 + }, + { + "epoch": 0.7485462555066079, + "grad_norm": 1.8120908321553708, + "learning_rate": 1.4788305382166174e-05, + "loss": 0.7444638013839722, + "step": 2124 + }, + { + "epoch": 0.748898678414097, + "grad_norm": 1.7437949253948153, + "learning_rate": 1.4783189533954555e-05, + "loss": 0.5492427349090576, + "step": 2125 + }, + { + "epoch": 0.7492511013215859, + "grad_norm": 1.60343309806789, + "learning_rate": 1.4778072061998944e-05, + "loss": 0.6193333864212036, + "step": 2126 + }, + { + "epoch": 0.7496035242290748, + "grad_norm": 2.019729643045431, + "learning_rate": 1.4772952968036572e-05, + "loss": 0.853213906288147, + "step": 2127 + }, + { + "epoch": 0.7499559471365639, + "grad_norm": 1.4306248677016198, + "learning_rate": 1.4767832253805203e-05, + "loss": 0.6128672361373901, + "step": 2128 + }, + { + "epoch": 0.7503083700440528, + "grad_norm": 1.7550432779472305, + "learning_rate": 1.4762709921043166e-05, + "loss": 0.7298723459243774, + "step": 2129 + }, + { + "epoch": 0.7506607929515419, + "grad_norm": 1.3773404123246435, + "learning_rate": 1.475758597148933e-05, + "loss": 0.6578782796859741, + "step": 2130 + }, + { + "epoch": 0.7510132158590308, + "grad_norm": 1.6603784675007325, + "learning_rate": 1.4752460406883122e-05, + "loss": 0.6490681171417236, + "step": 2131 + }, + { + "epoch": 0.7513656387665198, + "grad_norm": 1.530112138397779, + "learning_rate": 1.4747333228964502e-05, + "loss": 0.657980740070343, + "step": 2132 + }, + { + "epoch": 0.7517180616740088, + "grad_norm": 1.9937499661396574, + "learning_rate": 1.4742204439473999e-05, + "loss": 0.8431578874588013, + "step": 2133 + }, + { + "epoch": 0.7520704845814978, + "grad_norm": 1.7351787739786175, + "learning_rate": 1.4737074040152667e-05, + "loss": 0.7217377424240112, + "step": 2134 + }, + { + "epoch": 0.7524229074889868, + "grad_norm": 2.232953474209366, + "learning_rate": 1.4731942032742127e-05, + "loss": 0.6299912333488464, + "step": 2135 + }, + { + "epoch": 0.7527753303964758, + "grad_norm": 1.6053563211063129, + "learning_rate": 1.4726808418984527e-05, + "loss": 0.6325603723526001, + "step": 2136 + }, + { + "epoch": 0.7531277533039648, + "grad_norm": 1.7427287871247603, + "learning_rate": 1.4721673200622572e-05, + "loss": 0.6785098314285278, + "step": 2137 + }, + { + "epoch": 0.7534801762114537, + "grad_norm": 2.5780020778792068, + "learning_rate": 1.471653637939951e-05, + "loss": 0.7311918139457703, + "step": 2138 + }, + { + "epoch": 0.7538325991189427, + "grad_norm": 1.498799685922224, + "learning_rate": 1.4711397957059132e-05, + "loss": 0.7117096781730652, + "step": 2139 + }, + { + "epoch": 0.7541850220264317, + "grad_norm": 1.4519847744536865, + "learning_rate": 1.4706257935345772e-05, + "loss": 0.6709408760070801, + "step": 2140 + }, + { + "epoch": 0.7545374449339207, + "grad_norm": 1.9629689982019365, + "learning_rate": 1.4701116316004307e-05, + "loss": 0.6478008031845093, + "step": 2141 + }, + { + "epoch": 0.7548898678414097, + "grad_norm": 1.5362345610055923, + "learning_rate": 1.4695973100780154e-05, + "loss": 0.6414140462875366, + "step": 2142 + }, + { + "epoch": 0.7552422907488987, + "grad_norm": 1.7088547501964069, + "learning_rate": 1.4690828291419283e-05, + "loss": 0.6947815418243408, + "step": 2143 + }, + { + "epoch": 0.7555947136563876, + "grad_norm": 1.6244554419934112, + "learning_rate": 1.4685681889668187e-05, + "loss": 0.6614837646484375, + "step": 2144 + }, + { + "epoch": 0.7559471365638767, + "grad_norm": 1.87010430937903, + "learning_rate": 1.4680533897273913e-05, + "loss": 0.7803678512573242, + "step": 2145 + }, + { + "epoch": 0.7562995594713656, + "grad_norm": 1.975192105020327, + "learning_rate": 1.4675384315984045e-05, + "loss": 0.8411567211151123, + "step": 2146 + }, + { + "epoch": 0.7566519823788547, + "grad_norm": 2.4329758477488177, + "learning_rate": 1.4670233147546708e-05, + "loss": 0.8379243016242981, + "step": 2147 + }, + { + "epoch": 0.7570044052863436, + "grad_norm": 1.6153137773652926, + "learning_rate": 1.4665080393710558e-05, + "loss": 0.6419194936752319, + "step": 2148 + }, + { + "epoch": 0.7573568281938327, + "grad_norm": 1.8383077301350303, + "learning_rate": 1.4659926056224798e-05, + "loss": 0.7791979908943176, + "step": 2149 + }, + { + "epoch": 0.7577092511013216, + "grad_norm": 1.72203201226436, + "learning_rate": 1.465477013683916e-05, + "loss": 0.7237389087677002, + "step": 2150 + }, + { + "epoch": 0.7580616740088105, + "grad_norm": 1.5129431088418641, + "learning_rate": 1.464961263730393e-05, + "loss": 0.6750755906105042, + "step": 2151 + }, + { + "epoch": 0.7584140969162996, + "grad_norm": 1.3799525283393634, + "learning_rate": 1.4644453559369904e-05, + "loss": 0.5412150621414185, + "step": 2152 + }, + { + "epoch": 0.7587665198237885, + "grad_norm": 1.7752121571388841, + "learning_rate": 1.463929290478844e-05, + "loss": 0.7009850740432739, + "step": 2153 + }, + { + "epoch": 0.7591189427312776, + "grad_norm": 1.5166585489574307, + "learning_rate": 1.4634130675311411e-05, + "loss": 0.8678998351097107, + "step": 2154 + }, + { + "epoch": 0.7594713656387665, + "grad_norm": 2.0127463717616347, + "learning_rate": 1.4628966872691241e-05, + "loss": 0.7395705580711365, + "step": 2155 + }, + { + "epoch": 0.7598237885462555, + "grad_norm": 1.5739842401493016, + "learning_rate": 1.4623801498680875e-05, + "loss": 0.5950812101364136, + "step": 2156 + }, + { + "epoch": 0.7601762114537445, + "grad_norm": 1.6474041176538503, + "learning_rate": 1.46186345550338e-05, + "loss": 0.7133630514144897, + "step": 2157 + }, + { + "epoch": 0.7605286343612335, + "grad_norm": 1.4644647660974064, + "learning_rate": 1.4613466043504026e-05, + "loss": 0.7551965117454529, + "step": 2158 + }, + { + "epoch": 0.7608810572687225, + "grad_norm": 1.4284086636489846, + "learning_rate": 1.4608295965846111e-05, + "loss": 0.6654022932052612, + "step": 2159 + }, + { + "epoch": 0.7612334801762115, + "grad_norm": 3.5518990487711126, + "learning_rate": 1.460312432381513e-05, + "loss": 0.8081967830657959, + "step": 2160 + }, + { + "epoch": 0.7615859030837004, + "grad_norm": 1.8113760087057564, + "learning_rate": 1.4597951119166696e-05, + "loss": 0.7478348016738892, + "step": 2161 + }, + { + "epoch": 0.7619383259911894, + "grad_norm": 2.9384500423152833, + "learning_rate": 1.4592776353656948e-05, + "loss": 0.7866748571395874, + "step": 2162 + }, + { + "epoch": 0.7622907488986784, + "grad_norm": 1.4185631764668494, + "learning_rate": 1.4587600029042563e-05, + "loss": 0.6675869226455688, + "step": 2163 + }, + { + "epoch": 0.7626431718061674, + "grad_norm": 1.934904377243222, + "learning_rate": 1.4582422147080739e-05, + "loss": 0.6881103515625, + "step": 2164 + }, + { + "epoch": 0.7629955947136564, + "grad_norm": 1.6886719056667128, + "learning_rate": 1.457724270952921e-05, + "loss": 0.7298593521118164, + "step": 2165 + }, + { + "epoch": 0.7633480176211453, + "grad_norm": 1.5123877451607526, + "learning_rate": 1.4572061718146224e-05, + "loss": 0.7102776765823364, + "step": 2166 + }, + { + "epoch": 0.7637004405286344, + "grad_norm": 1.6706836844885837, + "learning_rate": 1.4566879174690576e-05, + "loss": 0.7767213582992554, + "step": 2167 + }, + { + "epoch": 0.7640528634361233, + "grad_norm": 1.4702267439170456, + "learning_rate": 1.4561695080921573e-05, + "loss": 0.7480257749557495, + "step": 2168 + }, + { + "epoch": 0.7644052863436124, + "grad_norm": 1.4326376726611632, + "learning_rate": 1.4556509438599057e-05, + "loss": 0.7419564723968506, + "step": 2169 + }, + { + "epoch": 0.7647577092511013, + "grad_norm": 1.4787079836022163, + "learning_rate": 1.4551322249483388e-05, + "loss": 0.6820264458656311, + "step": 2170 + }, + { + "epoch": 0.7651101321585904, + "grad_norm": 1.3819947250134947, + "learning_rate": 1.4546133515335462e-05, + "loss": 0.5947732329368591, + "step": 2171 + }, + { + "epoch": 0.7654625550660793, + "grad_norm": 1.6478975280830812, + "learning_rate": 1.4540943237916685e-05, + "loss": 0.6772021055221558, + "step": 2172 + }, + { + "epoch": 0.7658149779735682, + "grad_norm": 1.7643629263201115, + "learning_rate": 1.4535751418989e-05, + "loss": 0.7822210192680359, + "step": 2173 + }, + { + "epoch": 0.7661674008810573, + "grad_norm": 1.6079996302057808, + "learning_rate": 1.4530558060314866e-05, + "loss": 0.6208021640777588, + "step": 2174 + }, + { + "epoch": 0.7665198237885462, + "grad_norm": 1.5681481752797541, + "learning_rate": 1.4525363163657264e-05, + "loss": 0.8017063140869141, + "step": 2175 + }, + { + "epoch": 0.7668722466960353, + "grad_norm": 1.4681783580715917, + "learning_rate": 1.4520166730779704e-05, + "loss": 0.738383948802948, + "step": 2176 + }, + { + "epoch": 0.7672246696035242, + "grad_norm": 1.742058488341915, + "learning_rate": 1.4514968763446213e-05, + "loss": 0.7698314785957336, + "step": 2177 + }, + { + "epoch": 0.7675770925110132, + "grad_norm": 1.7037031257568012, + "learning_rate": 1.4509769263421337e-05, + "loss": 0.789836049079895, + "step": 2178 + }, + { + "epoch": 0.7679295154185022, + "grad_norm": 1.8506345351591968, + "learning_rate": 1.4504568232470145e-05, + "loss": 0.6437339782714844, + "step": 2179 + }, + { + "epoch": 0.7682819383259912, + "grad_norm": 2.04999468198658, + "learning_rate": 1.4499365672358226e-05, + "loss": 0.6684735417366028, + "step": 2180 + }, + { + "epoch": 0.7686343612334802, + "grad_norm": 1.5077038126146909, + "learning_rate": 1.4494161584851687e-05, + "loss": 0.6577454805374146, + "step": 2181 + }, + { + "epoch": 0.7689867841409692, + "grad_norm": 1.3277471323795764, + "learning_rate": 1.4488955971717154e-05, + "loss": 0.5975776314735413, + "step": 2182 + }, + { + "epoch": 0.7693392070484582, + "grad_norm": 1.8819815707164231, + "learning_rate": 1.4483748834721767e-05, + "loss": 0.6385577917098999, + "step": 2183 + }, + { + "epoch": 0.7696916299559471, + "grad_norm": 1.4452778349053288, + "learning_rate": 1.4478540175633193e-05, + "loss": 0.6295928955078125, + "step": 2184 + }, + { + "epoch": 0.7700440528634361, + "grad_norm": 1.5790897154124113, + "learning_rate": 1.4473329996219605e-05, + "loss": 0.6848496198654175, + "step": 2185 + }, + { + "epoch": 0.7703964757709251, + "grad_norm": 1.410283277756768, + "learning_rate": 1.44681182982497e-05, + "loss": 0.6476501226425171, + "step": 2186 + }, + { + "epoch": 0.7707488986784141, + "grad_norm": 1.5220085975801703, + "learning_rate": 1.4462905083492683e-05, + "loss": 0.750103235244751, + "step": 2187 + }, + { + "epoch": 0.771101321585903, + "grad_norm": 1.3838063845924222, + "learning_rate": 1.4457690353718285e-05, + "loss": 0.668454110622406, + "step": 2188 + }, + { + "epoch": 0.7714537444933921, + "grad_norm": 1.3695000422583874, + "learning_rate": 1.4452474110696738e-05, + "loss": 0.6671048402786255, + "step": 2189 + }, + { + "epoch": 0.771806167400881, + "grad_norm": 1.404147919130693, + "learning_rate": 1.4447256356198797e-05, + "loss": 0.6261379718780518, + "step": 2190 + }, + { + "epoch": 0.7721585903083701, + "grad_norm": 1.6192228095415668, + "learning_rate": 1.4442037091995726e-05, + "loss": 0.6128308176994324, + "step": 2191 + }, + { + "epoch": 0.772511013215859, + "grad_norm": 1.629684954387357, + "learning_rate": 1.4436816319859306e-05, + "loss": 0.7709108591079712, + "step": 2192 + }, + { + "epoch": 0.7728634361233481, + "grad_norm": 1.7604991326643686, + "learning_rate": 1.4431594041561822e-05, + "loss": 0.6242028474807739, + "step": 2193 + }, + { + "epoch": 0.773215859030837, + "grad_norm": 1.7562103574700596, + "learning_rate": 1.4426370258876079e-05, + "loss": 0.8030718564987183, + "step": 2194 + }, + { + "epoch": 0.7735682819383259, + "grad_norm": 1.5182882363444798, + "learning_rate": 1.4421144973575386e-05, + "loss": 0.7785710692405701, + "step": 2195 + }, + { + "epoch": 0.773920704845815, + "grad_norm": 1.5453752656669346, + "learning_rate": 1.4415918187433564e-05, + "loss": 0.6846014857292175, + "step": 2196 + }, + { + "epoch": 0.7742731277533039, + "grad_norm": 1.6007643935951585, + "learning_rate": 1.4410689902224947e-05, + "loss": 0.7883827686309814, + "step": 2197 + }, + { + "epoch": 0.774625550660793, + "grad_norm": 2.0453745328196065, + "learning_rate": 1.4405460119724377e-05, + "loss": 0.8285650610923767, + "step": 2198 + }, + { + "epoch": 0.7749779735682819, + "grad_norm": 1.5026043059194256, + "learning_rate": 1.4400228841707193e-05, + "loss": 0.6101093292236328, + "step": 2199 + }, + { + "epoch": 0.775330396475771, + "grad_norm": 1.4888885445589903, + "learning_rate": 1.4394996069949262e-05, + "loss": 0.6627891063690186, + "step": 2200 + }, + { + "epoch": 0.7756828193832599, + "grad_norm": 1.4487650646569075, + "learning_rate": 1.4389761806226943e-05, + "loss": 0.6755822896957397, + "step": 2201 + }, + { + "epoch": 0.7760352422907489, + "grad_norm": 1.438634659048083, + "learning_rate": 1.4384526052317106e-05, + "loss": 0.6718465089797974, + "step": 2202 + }, + { + "epoch": 0.7763876651982379, + "grad_norm": 1.4171659147035778, + "learning_rate": 1.4379288809997121e-05, + "loss": 0.5857758522033691, + "step": 2203 + }, + { + "epoch": 0.7767400881057269, + "grad_norm": 1.1200186604200135, + "learning_rate": 1.4374050081044876e-05, + "loss": 0.5861783027648926, + "step": 2204 + }, + { + "epoch": 0.7770925110132159, + "grad_norm": 1.442532656158601, + "learning_rate": 1.4368809867238754e-05, + "loss": 0.6862374544143677, + "step": 2205 + }, + { + "epoch": 0.7774449339207048, + "grad_norm": 1.6455201954220524, + "learning_rate": 1.4363568170357646e-05, + "loss": 0.6787701845169067, + "step": 2206 + }, + { + "epoch": 0.7777973568281938, + "grad_norm": 1.4101038203667695, + "learning_rate": 1.435832499218094e-05, + "loss": 0.5671687126159668, + "step": 2207 + }, + { + "epoch": 0.7781497797356828, + "grad_norm": 1.5479554264257531, + "learning_rate": 1.435308033448854e-05, + "loss": 0.8243429064750671, + "step": 2208 + }, + { + "epoch": 0.7785022026431718, + "grad_norm": 1.3676716888852272, + "learning_rate": 1.4347834199060835e-05, + "loss": 0.5880655646324158, + "step": 2209 + }, + { + "epoch": 0.7788546255506608, + "grad_norm": 2.451624357800272, + "learning_rate": 1.4342586587678734e-05, + "loss": 0.7085679769515991, + "step": 2210 + }, + { + "epoch": 0.7792070484581498, + "grad_norm": 1.546990179750224, + "learning_rate": 1.4337337502123627e-05, + "loss": 0.7011853456497192, + "step": 2211 + }, + { + "epoch": 0.7795594713656387, + "grad_norm": 1.6003260447933962, + "learning_rate": 1.4332086944177426e-05, + "loss": 0.755327582359314, + "step": 2212 + }, + { + "epoch": 0.7799118942731278, + "grad_norm": 1.3917359947430683, + "learning_rate": 1.4326834915622522e-05, + "loss": 0.7152736186981201, + "step": 2213 + }, + { + "epoch": 0.7802643171806167, + "grad_norm": 1.3821995576878587, + "learning_rate": 1.4321581418241825e-05, + "loss": 0.6744083166122437, + "step": 2214 + }, + { + "epoch": 0.7806167400881058, + "grad_norm": 1.5294456027931242, + "learning_rate": 1.4316326453818728e-05, + "loss": 0.6112288236618042, + "step": 2215 + }, + { + "epoch": 0.7809691629955947, + "grad_norm": 1.2620758120071194, + "learning_rate": 1.4311070024137128e-05, + "loss": 0.5569246411323547, + "step": 2216 + }, + { + "epoch": 0.7813215859030836, + "grad_norm": 1.474883531826743, + "learning_rate": 1.4305812130981418e-05, + "loss": 0.6214494705200195, + "step": 2217 + }, + { + "epoch": 0.7816740088105727, + "grad_norm": 1.4094788075709526, + "learning_rate": 1.4300552776136497e-05, + "loss": 0.5401003956794739, + "step": 2218 + }, + { + "epoch": 0.7820264317180616, + "grad_norm": 1.433294268920241, + "learning_rate": 1.4295291961387742e-05, + "loss": 0.5128720998764038, + "step": 2219 + }, + { + "epoch": 0.7823788546255507, + "grad_norm": 1.352265751544302, + "learning_rate": 1.4290029688521043e-05, + "loss": 0.5495916604995728, + "step": 2220 + }, + { + "epoch": 0.7827312775330396, + "grad_norm": 1.6131865642068703, + "learning_rate": 1.4284765959322772e-05, + "loss": 0.628544807434082, + "step": 2221 + }, + { + "epoch": 0.7830837004405287, + "grad_norm": 1.443784571277232, + "learning_rate": 1.427950077557981e-05, + "loss": 0.7171294689178467, + "step": 2222 + }, + { + "epoch": 0.7834361233480176, + "grad_norm": 1.3723589201513293, + "learning_rate": 1.4274234139079513e-05, + "loss": 0.7436389327049255, + "step": 2223 + }, + { + "epoch": 0.7837885462555066, + "grad_norm": 1.5295286402885273, + "learning_rate": 1.426896605160975e-05, + "loss": 0.7154244780540466, + "step": 2224 + }, + { + "epoch": 0.7841409691629956, + "grad_norm": 1.4385555847293963, + "learning_rate": 1.426369651495886e-05, + "loss": 0.6433268189430237, + "step": 2225 + }, + { + "epoch": 0.7844933920704846, + "grad_norm": 1.4177681718218336, + "learning_rate": 1.4258425530915703e-05, + "loss": 0.6612321734428406, + "step": 2226 + }, + { + "epoch": 0.7848458149779736, + "grad_norm": 1.962010974229914, + "learning_rate": 1.42531531012696e-05, + "loss": 0.6384811401367188, + "step": 2227 + }, + { + "epoch": 0.7851982378854625, + "grad_norm": 1.4927220821701634, + "learning_rate": 1.4247879227810384e-05, + "loss": 0.5592762231826782, + "step": 2228 + }, + { + "epoch": 0.7855506607929515, + "grad_norm": 1.6376570609433725, + "learning_rate": 1.4242603912328367e-05, + "loss": 0.6904512643814087, + "step": 2229 + }, + { + "epoch": 0.7859030837004405, + "grad_norm": 1.7784965930873091, + "learning_rate": 1.4237327156614358e-05, + "loss": 0.7165266871452332, + "step": 2230 + }, + { + "epoch": 0.7862555066079295, + "grad_norm": 1.6275397333714936, + "learning_rate": 1.423204896245965e-05, + "loss": 0.8567172288894653, + "step": 2231 + }, + { + "epoch": 0.7866079295154185, + "grad_norm": 1.6554990252792119, + "learning_rate": 1.4226769331656028e-05, + "loss": 0.6595934629440308, + "step": 2232 + }, + { + "epoch": 0.7869603524229075, + "grad_norm": 1.8034278962736743, + "learning_rate": 1.4221488265995755e-05, + "loss": 0.750861644744873, + "step": 2233 + }, + { + "epoch": 0.7873127753303965, + "grad_norm": 1.3674194021669617, + "learning_rate": 1.4216205767271597e-05, + "loss": 0.7146387696266174, + "step": 2234 + }, + { + "epoch": 0.7876651982378855, + "grad_norm": 1.9347692502503655, + "learning_rate": 1.4210921837276792e-05, + "loss": 0.58647221326828, + "step": 2235 + }, + { + "epoch": 0.7880176211453744, + "grad_norm": 1.4888974250205094, + "learning_rate": 1.4205636477805072e-05, + "loss": 0.6893318891525269, + "step": 2236 + }, + { + "epoch": 0.7883700440528635, + "grad_norm": 1.1833417050311776, + "learning_rate": 1.4200349690650654e-05, + "loss": 0.5545464158058167, + "step": 2237 + }, + { + "epoch": 0.7887224669603524, + "grad_norm": 1.6014523598259138, + "learning_rate": 1.4195061477608234e-05, + "loss": 0.6088600158691406, + "step": 2238 + }, + { + "epoch": 0.7890748898678414, + "grad_norm": 1.3513904877886467, + "learning_rate": 1.4189771840472997e-05, + "loss": 0.6330769658088684, + "step": 2239 + }, + { + "epoch": 0.7894273127753304, + "grad_norm": 1.4283770062393895, + "learning_rate": 1.4184480781040613e-05, + "loss": 0.678654670715332, + "step": 2240 + }, + { + "epoch": 0.7897797356828193, + "grad_norm": 1.445633946040222, + "learning_rate": 1.417918830110723e-05, + "loss": 0.6259177923202515, + "step": 2241 + }, + { + "epoch": 0.7901321585903084, + "grad_norm": 1.408151849302333, + "learning_rate": 1.4173894402469477e-05, + "loss": 0.634982168674469, + "step": 2242 + }, + { + "epoch": 0.7904845814977973, + "grad_norm": 1.37778450193705, + "learning_rate": 1.4168599086924473e-05, + "loss": 0.6610612869262695, + "step": 2243 + }, + { + "epoch": 0.7908370044052864, + "grad_norm": 1.386127288755765, + "learning_rate": 1.416330235626981e-05, + "loss": 0.6952961683273315, + "step": 2244 + }, + { + "epoch": 0.7911894273127753, + "grad_norm": 1.6165363001234343, + "learning_rate": 1.4158004212303565e-05, + "loss": 0.5055881142616272, + "step": 2245 + }, + { + "epoch": 0.7915418502202644, + "grad_norm": 1.4841191669035856, + "learning_rate": 1.4152704656824288e-05, + "loss": 0.7284455299377441, + "step": 2246 + }, + { + "epoch": 0.7918942731277533, + "grad_norm": 1.3583334859782668, + "learning_rate": 1.414740369163102e-05, + "loss": 0.6985108852386475, + "step": 2247 + }, + { + "epoch": 0.7922466960352423, + "grad_norm": 1.3664811170856164, + "learning_rate": 1.4142101318523271e-05, + "loss": 0.5967550277709961, + "step": 2248 + }, + { + "epoch": 0.7925991189427313, + "grad_norm": 1.5695298710984633, + "learning_rate": 1.4136797539301033e-05, + "loss": 0.7696695327758789, + "step": 2249 + }, + { + "epoch": 0.7929515418502202, + "grad_norm": 1.3234775564665824, + "learning_rate": 1.413149235576477e-05, + "loss": 0.8131378293037415, + "step": 2250 + }, + { + "epoch": 0.7933039647577093, + "grad_norm": 1.8429663529686, + "learning_rate": 1.4126185769715428e-05, + "loss": 0.8029932975769043, + "step": 2251 + }, + { + "epoch": 0.7936563876651982, + "grad_norm": 1.720051288151631, + "learning_rate": 1.412087778295443e-05, + "loss": 0.7408573031425476, + "step": 2252 + }, + { + "epoch": 0.7940088105726872, + "grad_norm": 1.8037723298533723, + "learning_rate": 1.411556839728367e-05, + "loss": 0.8624325394630432, + "step": 2253 + }, + { + "epoch": 0.7943612334801762, + "grad_norm": 1.5291561523904078, + "learning_rate": 1.411025761450552e-05, + "loss": 0.7635384798049927, + "step": 2254 + }, + { + "epoch": 0.7947136563876652, + "grad_norm": 1.5012301776005823, + "learning_rate": 1.4104945436422832e-05, + "loss": 0.5612920522689819, + "step": 2255 + }, + { + "epoch": 0.7950660792951542, + "grad_norm": 1.5891725973137842, + "learning_rate": 1.4099631864838912e-05, + "loss": 0.5792248845100403, + "step": 2256 + }, + { + "epoch": 0.7954185022026432, + "grad_norm": 1.427703140365858, + "learning_rate": 1.4094316901557563e-05, + "loss": 0.7405142188072205, + "step": 2257 + }, + { + "epoch": 0.7957709251101321, + "grad_norm": 1.5302016454534209, + "learning_rate": 1.4089000548383044e-05, + "loss": 0.630780816078186, + "step": 2258 + }, + { + "epoch": 0.7961233480176212, + "grad_norm": 1.5690685088460359, + "learning_rate": 1.4083682807120092e-05, + "loss": 0.6737201809883118, + "step": 2259 + }, + { + "epoch": 0.7964757709251101, + "grad_norm": 4.158789316506426, + "learning_rate": 1.4078363679573918e-05, + "loss": 0.6469985842704773, + "step": 2260 + }, + { + "epoch": 0.7968281938325992, + "grad_norm": 1.4774582614404035, + "learning_rate": 1.4073043167550198e-05, + "loss": 0.6315224170684814, + "step": 2261 + }, + { + "epoch": 0.7971806167400881, + "grad_norm": 1.1766652256758812, + "learning_rate": 1.4067721272855079e-05, + "loss": 0.6785402297973633, + "step": 2262 + }, + { + "epoch": 0.797533039647577, + "grad_norm": 1.4677269844033833, + "learning_rate": 1.406239799729518e-05, + "loss": 0.7131394147872925, + "step": 2263 + }, + { + "epoch": 0.7978854625550661, + "grad_norm": 1.5575833651180606, + "learning_rate": 1.405707334267759e-05, + "loss": 0.6921142339706421, + "step": 2264 + }, + { + "epoch": 0.798237885462555, + "grad_norm": 1.375694666198905, + "learning_rate": 1.4051747310809863e-05, + "loss": 0.695213794708252, + "step": 2265 + }, + { + "epoch": 0.7985903083700441, + "grad_norm": 1.8529986724322307, + "learning_rate": 1.4046419903500013e-05, + "loss": 0.7081988453865051, + "step": 2266 + }, + { + "epoch": 0.798942731277533, + "grad_norm": 1.4461573292928833, + "learning_rate": 1.4041091122556539e-05, + "loss": 0.6404637098312378, + "step": 2267 + }, + { + "epoch": 0.7992951541850221, + "grad_norm": 1.3566691109367863, + "learning_rate": 1.403576096978839e-05, + "loss": 0.6404134631156921, + "step": 2268 + }, + { + "epoch": 0.799647577092511, + "grad_norm": 1.5118859398886633, + "learning_rate": 1.4030429447004992e-05, + "loss": 0.7963751554489136, + "step": 2269 + }, + { + "epoch": 0.8, + "grad_norm": 1.632997404115334, + "learning_rate": 1.4025096556016224e-05, + "loss": 0.6648174524307251, + "step": 2270 + }, + { + "epoch": 0.800352422907489, + "grad_norm": 1.4103532345019565, + "learning_rate": 1.4019762298632445e-05, + "loss": 0.6661815047264099, + "step": 2271 + }, + { + "epoch": 0.800704845814978, + "grad_norm": 1.7237738440956045, + "learning_rate": 1.4014426676664462e-05, + "loss": 0.6194477081298828, + "step": 2272 + }, + { + "epoch": 0.801057268722467, + "grad_norm": 1.8457235726726873, + "learning_rate": 1.400908969192356e-05, + "loss": 0.6869276762008667, + "step": 2273 + }, + { + "epoch": 0.8014096916299559, + "grad_norm": 1.7545140114513338, + "learning_rate": 1.4003751346221472e-05, + "loss": 0.7352420091629028, + "step": 2274 + }, + { + "epoch": 0.801762114537445, + "grad_norm": 1.5994812918128933, + "learning_rate": 1.3998411641370405e-05, + "loss": 0.8212440609931946, + "step": 2275 + }, + { + "epoch": 0.8021145374449339, + "grad_norm": 1.5868623288152288, + "learning_rate": 1.3993070579183021e-05, + "loss": 0.6897045969963074, + "step": 2276 + }, + { + "epoch": 0.8024669603524229, + "grad_norm": 1.716974382638037, + "learning_rate": 1.3987728161472442e-05, + "loss": 0.8406906127929688, + "step": 2277 + }, + { + "epoch": 0.8028193832599119, + "grad_norm": 1.6664794009014727, + "learning_rate": 1.3982384390052257e-05, + "loss": 0.6236976385116577, + "step": 2278 + }, + { + "epoch": 0.8031718061674009, + "grad_norm": 1.7056031446043847, + "learning_rate": 1.3977039266736508e-05, + "loss": 0.8110965490341187, + "step": 2279 + }, + { + "epoch": 0.8035242290748899, + "grad_norm": 1.6273998334271178, + "learning_rate": 1.3971692793339697e-05, + "loss": 0.635534405708313, + "step": 2280 + }, + { + "epoch": 0.8038766519823789, + "grad_norm": 1.5382566365445476, + "learning_rate": 1.3966344971676789e-05, + "loss": 0.7806028127670288, + "step": 2281 + }, + { + "epoch": 0.8042290748898678, + "grad_norm": 1.7131487498074927, + "learning_rate": 1.3960995803563195e-05, + "loss": 0.6635935306549072, + "step": 2282 + }, + { + "epoch": 0.8045814977973569, + "grad_norm": 1.6068551029738092, + "learning_rate": 1.39556452908148e-05, + "loss": 0.6064634323120117, + "step": 2283 + }, + { + "epoch": 0.8049339207048458, + "grad_norm": 1.7686604234656398, + "learning_rate": 1.3950293435247933e-05, + "loss": 0.760187029838562, + "step": 2284 + }, + { + "epoch": 0.8052863436123348, + "grad_norm": 1.5333245954906318, + "learning_rate": 1.3944940238679384e-05, + "loss": 0.7004644274711609, + "step": 2285 + }, + { + "epoch": 0.8056387665198238, + "grad_norm": 1.9274194313344672, + "learning_rate": 1.393958570292639e-05, + "loss": 0.7662780284881592, + "step": 2286 + }, + { + "epoch": 0.8059911894273127, + "grad_norm": 1.3943181397787612, + "learning_rate": 1.393422982980666e-05, + "loss": 0.7939090132713318, + "step": 2287 + }, + { + "epoch": 0.8063436123348018, + "grad_norm": 1.377559765071464, + "learning_rate": 1.3928872621138337e-05, + "loss": 0.7461861371994019, + "step": 2288 + }, + { + "epoch": 0.8066960352422907, + "grad_norm": 1.4875661773009663, + "learning_rate": 1.3923514078740032e-05, + "loss": 0.5997019410133362, + "step": 2289 + }, + { + "epoch": 0.8070484581497798, + "grad_norm": 1.5379009713311227, + "learning_rate": 1.3918154204430801e-05, + "loss": 0.5437384843826294, + "step": 2290 + }, + { + "epoch": 0.8074008810572687, + "grad_norm": 1.8168415447512607, + "learning_rate": 1.3912793000030154e-05, + "loss": 0.7387127876281738, + "step": 2291 + }, + { + "epoch": 0.8077533039647578, + "grad_norm": 1.305308107523337, + "learning_rate": 1.3907430467358054e-05, + "loss": 0.483035147190094, + "step": 2292 + }, + { + "epoch": 0.8081057268722467, + "grad_norm": 1.3669144351401303, + "learning_rate": 1.3902066608234919e-05, + "loss": 0.6208503842353821, + "step": 2293 + }, + { + "epoch": 0.8084581497797357, + "grad_norm": 1.7196168695476914, + "learning_rate": 1.3896701424481603e-05, + "loss": 0.6691559553146362, + "step": 2294 + }, + { + "epoch": 0.8088105726872247, + "grad_norm": 1.6945751274550964, + "learning_rate": 1.3891334917919422e-05, + "loss": 0.8960802555084229, + "step": 2295 + }, + { + "epoch": 0.8091629955947136, + "grad_norm": 1.7625732291329363, + "learning_rate": 1.388596709037014e-05, + "loss": 0.669715404510498, + "step": 2296 + }, + { + "epoch": 0.8095154185022027, + "grad_norm": 1.4235891674683654, + "learning_rate": 1.3880597943655972e-05, + "loss": 0.7356190085411072, + "step": 2297 + }, + { + "epoch": 0.8098678414096916, + "grad_norm": 1.6403595773987272, + "learning_rate": 1.3875227479599565e-05, + "loss": 0.9158750176429749, + "step": 2298 + }, + { + "epoch": 0.8102202643171806, + "grad_norm": 1.718215094287951, + "learning_rate": 1.3869855700024031e-05, + "loss": 0.7395786643028259, + "step": 2299 + }, + { + "epoch": 0.8105726872246696, + "grad_norm": 1.6360185397225708, + "learning_rate": 1.3864482606752922e-05, + "loss": 0.594106912612915, + "step": 2300 + }, + { + "epoch": 0.8109251101321586, + "grad_norm": 1.6395747499474045, + "learning_rate": 1.3859108201610236e-05, + "loss": 0.7853089570999146, + "step": 2301 + }, + { + "epoch": 0.8112775330396476, + "grad_norm": 1.6313227134249062, + "learning_rate": 1.3853732486420413e-05, + "loss": 0.8346991539001465, + "step": 2302 + }, + { + "epoch": 0.8116299559471366, + "grad_norm": 1.6254363131857819, + "learning_rate": 1.3848355463008344e-05, + "loss": 0.5493819117546082, + "step": 2303 + }, + { + "epoch": 0.8119823788546255, + "grad_norm": 1.566621350016491, + "learning_rate": 1.3842977133199363e-05, + "loss": 0.7474828958511353, + "step": 2304 + }, + { + "epoch": 0.8123348017621146, + "grad_norm": 1.6648296076023164, + "learning_rate": 1.3837597498819242e-05, + "loss": 0.6599621772766113, + "step": 2305 + }, + { + "epoch": 0.8126872246696035, + "grad_norm": 1.5217466732352583, + "learning_rate": 1.38322165616942e-05, + "loss": 0.6751214861869812, + "step": 2306 + }, + { + "epoch": 0.8130396475770925, + "grad_norm": 1.720054765999457, + "learning_rate": 1.3826834323650899e-05, + "loss": 0.7450453042984009, + "step": 2307 + }, + { + "epoch": 0.8133920704845815, + "grad_norm": 1.4739637914592345, + "learning_rate": 1.382145078651644e-05, + "loss": 0.7015345692634583, + "step": 2308 + }, + { + "epoch": 0.8137444933920704, + "grad_norm": 1.4921910425897076, + "learning_rate": 1.3816065952118368e-05, + "loss": 0.7161329984664917, + "step": 2309 + }, + { + "epoch": 0.8140969162995595, + "grad_norm": 1.576440929020717, + "learning_rate": 1.3810679822284665e-05, + "loss": 0.771783709526062, + "step": 2310 + }, + { + "epoch": 0.8144493392070484, + "grad_norm": 1.461165164266228, + "learning_rate": 1.3805292398843755e-05, + "loss": 0.6710794568061829, + "step": 2311 + }, + { + "epoch": 0.8148017621145375, + "grad_norm": 1.6256312715940777, + "learning_rate": 1.3799903683624503e-05, + "loss": 0.6614924669265747, + "step": 2312 + }, + { + "epoch": 0.8151541850220264, + "grad_norm": 1.429649360127197, + "learning_rate": 1.3794513678456203e-05, + "loss": 0.6432225704193115, + "step": 2313 + }, + { + "epoch": 0.8155066079295155, + "grad_norm": 1.233784916709085, + "learning_rate": 1.3789122385168604e-05, + "loss": 0.6228311061859131, + "step": 2314 + }, + { + "epoch": 0.8158590308370044, + "grad_norm": 1.5182036065920572, + "learning_rate": 1.3783729805591875e-05, + "loss": 0.5597498416900635, + "step": 2315 + }, + { + "epoch": 0.8162114537444934, + "grad_norm": 1.954667780900904, + "learning_rate": 1.3778335941556629e-05, + "loss": 0.7651177048683167, + "step": 2316 + }, + { + "epoch": 0.8165638766519824, + "grad_norm": 1.3053642347729657, + "learning_rate": 1.3772940794893916e-05, + "loss": 0.5482406616210938, + "step": 2317 + }, + { + "epoch": 0.8169162995594713, + "grad_norm": 1.4432389735878668, + "learning_rate": 1.3767544367435229e-05, + "loss": 0.767236590385437, + "step": 2318 + }, + { + "epoch": 0.8172687224669604, + "grad_norm": 1.7071036751428772, + "learning_rate": 1.3762146661012471e-05, + "loss": 0.705253541469574, + "step": 2319 + }, + { + "epoch": 0.8176211453744493, + "grad_norm": 1.4969645559129943, + "learning_rate": 1.3756747677458008e-05, + "loss": 0.7800463438034058, + "step": 2320 + }, + { + "epoch": 0.8179735682819383, + "grad_norm": 1.6172262621918039, + "learning_rate": 1.3751347418604623e-05, + "loss": 0.7615088224411011, + "step": 2321 + }, + { + "epoch": 0.8183259911894273, + "grad_norm": 1.6932314886464006, + "learning_rate": 1.3745945886285536e-05, + "loss": 0.8004297614097595, + "step": 2322 + }, + { + "epoch": 0.8186784140969163, + "grad_norm": 1.605867375121777, + "learning_rate": 1.3740543082334399e-05, + "loss": 0.6428912281990051, + "step": 2323 + }, + { + "epoch": 0.8190308370044053, + "grad_norm": 1.4147620040703779, + "learning_rate": 1.3735139008585294e-05, + "loss": 0.6702802777290344, + "step": 2324 + }, + { + "epoch": 0.8193832599118943, + "grad_norm": 1.3127203907182126, + "learning_rate": 1.3729733666872736e-05, + "loss": 0.6003440022468567, + "step": 2325 + }, + { + "epoch": 0.8197356828193832, + "grad_norm": 2.04633486984075, + "learning_rate": 1.3724327059031677e-05, + "loss": 0.8264240622520447, + "step": 2326 + }, + { + "epoch": 0.8200881057268723, + "grad_norm": 1.4037319277657845, + "learning_rate": 1.3718919186897481e-05, + "loss": 0.6974462866783142, + "step": 2327 + }, + { + "epoch": 0.8204405286343612, + "grad_norm": 1.7081986923623933, + "learning_rate": 1.3713510052305962e-05, + "loss": 0.8273947238922119, + "step": 2328 + }, + { + "epoch": 0.8207929515418502, + "grad_norm": 1.5000401588722418, + "learning_rate": 1.3708099657093348e-05, + "loss": 0.6230529546737671, + "step": 2329 + }, + { + "epoch": 0.8211453744493392, + "grad_norm": 1.6377312790274685, + "learning_rate": 1.37026880030963e-05, + "loss": 0.6997084021568298, + "step": 2330 + }, + { + "epoch": 0.8214977973568282, + "grad_norm": 1.582616740422673, + "learning_rate": 1.3697275092151908e-05, + "loss": 0.7212036848068237, + "step": 2331 + }, + { + "epoch": 0.8218502202643172, + "grad_norm": 1.5449017822829925, + "learning_rate": 1.3691860926097685e-05, + "loss": 0.7758737206459045, + "step": 2332 + }, + { + "epoch": 0.8222026431718061, + "grad_norm": 1.7784238395856364, + "learning_rate": 1.368644550677157e-05, + "loss": 0.62369704246521, + "step": 2333 + }, + { + "epoch": 0.8225550660792952, + "grad_norm": 1.6110908974677367, + "learning_rate": 1.3681028836011935e-05, + "loss": 0.8051841855049133, + "step": 2334 + }, + { + "epoch": 0.8229074889867841, + "grad_norm": 1.3626761635443752, + "learning_rate": 1.3675610915657568e-05, + "loss": 0.6087243556976318, + "step": 2335 + }, + { + "epoch": 0.8232599118942732, + "grad_norm": 1.9382202981470131, + "learning_rate": 1.3670191747547685e-05, + "loss": 0.6949581503868103, + "step": 2336 + }, + { + "epoch": 0.8236123348017621, + "grad_norm": 1.5451121537596906, + "learning_rate": 1.3664771333521922e-05, + "loss": 0.5621528029441833, + "step": 2337 + }, + { + "epoch": 0.8239647577092511, + "grad_norm": 1.622327701652298, + "learning_rate": 1.3659349675420346e-05, + "loss": 0.8731498718261719, + "step": 2338 + }, + { + "epoch": 0.8243171806167401, + "grad_norm": 1.5570249925953572, + "learning_rate": 1.3653926775083437e-05, + "loss": 0.6997240781784058, + "step": 2339 + }, + { + "epoch": 0.824669603524229, + "grad_norm": 1.6562463291138314, + "learning_rate": 1.3648502634352104e-05, + "loss": 0.8061426877975464, + "step": 2340 + }, + { + "epoch": 0.8250220264317181, + "grad_norm": 1.7061312576253802, + "learning_rate": 1.3643077255067667e-05, + "loss": 0.6186845302581787, + "step": 2341 + }, + { + "epoch": 0.825374449339207, + "grad_norm": 1.6605971928200247, + "learning_rate": 1.3637650639071884e-05, + "loss": 0.8098937273025513, + "step": 2342 + }, + { + "epoch": 0.825726872246696, + "grad_norm": 1.6091516027269386, + "learning_rate": 1.3632222788206916e-05, + "loss": 0.5810271501541138, + "step": 2343 + }, + { + "epoch": 0.826079295154185, + "grad_norm": 1.4965459276387059, + "learning_rate": 1.3626793704315348e-05, + "loss": 0.48309600353240967, + "step": 2344 + }, + { + "epoch": 0.826431718061674, + "grad_norm": 1.4326274242229946, + "learning_rate": 1.3621363389240188e-05, + "loss": 0.7366980314254761, + "step": 2345 + }, + { + "epoch": 0.826784140969163, + "grad_norm": 1.571199172280502, + "learning_rate": 1.3615931844824859e-05, + "loss": 0.6572252511978149, + "step": 2346 + }, + { + "epoch": 0.827136563876652, + "grad_norm": 1.3078300281358257, + "learning_rate": 1.3610499072913204e-05, + "loss": 0.6776653528213501, + "step": 2347 + }, + { + "epoch": 0.827488986784141, + "grad_norm": 1.772641440888185, + "learning_rate": 1.3605065075349473e-05, + "loss": 0.6536053419113159, + "step": 2348 + }, + { + "epoch": 0.82784140969163, + "grad_norm": 1.600184025362065, + "learning_rate": 1.3599629853978342e-05, + "loss": 0.7000117301940918, + "step": 2349 + }, + { + "epoch": 0.8281938325991189, + "grad_norm": 1.5533713409132957, + "learning_rate": 1.3594193410644902e-05, + "loss": 0.6480045318603516, + "step": 2350 + }, + { + "epoch": 0.8285462555066079, + "grad_norm": 1.5474076871693587, + "learning_rate": 1.3588755747194656e-05, + "loss": 0.6428179740905762, + "step": 2351 + }, + { + "epoch": 0.8288986784140969, + "grad_norm": 1.3886734182652174, + "learning_rate": 1.3583316865473517e-05, + "loss": 0.618633508682251, + "step": 2352 + }, + { + "epoch": 0.8292511013215859, + "grad_norm": 1.5946423674864716, + "learning_rate": 1.357787676732782e-05, + "loss": 0.7289671897888184, + "step": 2353 + }, + { + "epoch": 0.8296035242290749, + "grad_norm": 1.687058159970245, + "learning_rate": 1.3572435454604307e-05, + "loss": 0.6969538927078247, + "step": 2354 + }, + { + "epoch": 0.8299559471365638, + "grad_norm": 1.565248379514886, + "learning_rate": 1.3566992929150137e-05, + "loss": 0.8490859270095825, + "step": 2355 + }, + { + "epoch": 0.8303083700440529, + "grad_norm": 1.532906793366292, + "learning_rate": 1.3561549192812877e-05, + "loss": 0.6883271336555481, + "step": 2356 + }, + { + "epoch": 0.8306607929515418, + "grad_norm": 1.3151000902691472, + "learning_rate": 1.3556104247440504e-05, + "loss": 0.68092280626297, + "step": 2357 + }, + { + "epoch": 0.8310132158590309, + "grad_norm": 1.2591886658215548, + "learning_rate": 1.3550658094881413e-05, + "loss": 0.7077454924583435, + "step": 2358 + }, + { + "epoch": 0.8313656387665198, + "grad_norm": 1.5452673483096302, + "learning_rate": 1.3545210736984393e-05, + "loss": 0.7364591360092163, + "step": 2359 + }, + { + "epoch": 0.8317180616740089, + "grad_norm": 1.4999509926023873, + "learning_rate": 1.3539762175598666e-05, + "loss": 0.8047930002212524, + "step": 2360 + }, + { + "epoch": 0.8320704845814978, + "grad_norm": 1.4862380654794773, + "learning_rate": 1.3534312412573836e-05, + "loss": 0.7717781066894531, + "step": 2361 + }, + { + "epoch": 0.8324229074889867, + "grad_norm": 1.7032828917925678, + "learning_rate": 1.3528861449759938e-05, + "loss": 0.7228613495826721, + "step": 2362 + }, + { + "epoch": 0.8327753303964758, + "grad_norm": 1.5752771060390574, + "learning_rate": 1.3523409289007399e-05, + "loss": 0.8025436401367188, + "step": 2363 + }, + { + "epoch": 0.8331277533039647, + "grad_norm": 1.5214524176303228, + "learning_rate": 1.3517955932167057e-05, + "loss": 0.6653664112091064, + "step": 2364 + }, + { + "epoch": 0.8334801762114538, + "grad_norm": 1.4409217046848606, + "learning_rate": 1.3512501381090158e-05, + "loss": 0.709527313709259, + "step": 2365 + }, + { + "epoch": 0.8338325991189427, + "grad_norm": 1.4678807653581447, + "learning_rate": 1.3507045637628355e-05, + "loss": 0.7317520380020142, + "step": 2366 + }, + { + "epoch": 0.8341850220264317, + "grad_norm": 1.4520344718636113, + "learning_rate": 1.3501588703633703e-05, + "loss": 0.734069287776947, + "step": 2367 + }, + { + "epoch": 0.8345374449339207, + "grad_norm": 1.355050784601881, + "learning_rate": 1.349613058095866e-05, + "loss": 0.5950552225112915, + "step": 2368 + }, + { + "epoch": 0.8348898678414097, + "grad_norm": 1.3916802158941735, + "learning_rate": 1.3490671271456084e-05, + "loss": 0.5958857536315918, + "step": 2369 + }, + { + "epoch": 0.8352422907488987, + "grad_norm": 1.319860830071963, + "learning_rate": 1.348521077697925e-05, + "loss": 0.7094449996948242, + "step": 2370 + }, + { + "epoch": 0.8355947136563877, + "grad_norm": 1.283824481194398, + "learning_rate": 1.3479749099381818e-05, + "loss": 0.6260385513305664, + "step": 2371 + }, + { + "epoch": 0.8359471365638766, + "grad_norm": 1.3546760632082742, + "learning_rate": 1.3474286240517862e-05, + "loss": 0.65608811378479, + "step": 2372 + }, + { + "epoch": 0.8362995594713656, + "grad_norm": 1.5902013950729095, + "learning_rate": 1.346882220224185e-05, + "loss": 0.6942586898803711, + "step": 2373 + }, + { + "epoch": 0.8366519823788546, + "grad_norm": 1.5432700710308092, + "learning_rate": 1.3463356986408653e-05, + "loss": 0.6831374168395996, + "step": 2374 + }, + { + "epoch": 0.8370044052863436, + "grad_norm": 1.2453712902306997, + "learning_rate": 1.3457890594873546e-05, + "loss": 0.6363790035247803, + "step": 2375 + }, + { + "epoch": 0.8373568281938326, + "grad_norm": 1.4407831477600082, + "learning_rate": 1.3452423029492194e-05, + "loss": 0.698935866355896, + "step": 2376 + }, + { + "epoch": 0.8377092511013216, + "grad_norm": 1.6516160077651472, + "learning_rate": 1.3446954292120667e-05, + "loss": 0.8569005727767944, + "step": 2377 + }, + { + "epoch": 0.8380616740088106, + "grad_norm": 1.4963554673760426, + "learning_rate": 1.3441484384615428e-05, + "loss": 0.8461613655090332, + "step": 2378 + }, + { + "epoch": 0.8384140969162995, + "grad_norm": 1.635336062215313, + "learning_rate": 1.343601330883335e-05, + "loss": 0.7481078505516052, + "step": 2379 + }, + { + "epoch": 0.8387665198237886, + "grad_norm": 1.1164155853725835, + "learning_rate": 1.343054106663168e-05, + "loss": 0.5632544755935669, + "step": 2380 + }, + { + "epoch": 0.8391189427312775, + "grad_norm": 1.2387886339726162, + "learning_rate": 1.3425067659868084e-05, + "loss": 0.528980016708374, + "step": 2381 + }, + { + "epoch": 0.8394713656387666, + "grad_norm": 1.2987181937645196, + "learning_rate": 1.341959309040061e-05, + "loss": 0.5520849227905273, + "step": 2382 + }, + { + "epoch": 0.8398237885462555, + "grad_norm": 1.1709661282123542, + "learning_rate": 1.34141173600877e-05, + "loss": 0.569744348526001, + "step": 2383 + }, + { + "epoch": 0.8401762114537445, + "grad_norm": 1.1526596958180186, + "learning_rate": 1.3408640470788202e-05, + "loss": 0.595065712928772, + "step": 2384 + }, + { + "epoch": 0.8405286343612335, + "grad_norm": 1.716530250506247, + "learning_rate": 1.3403162424361342e-05, + "loss": 0.6993277072906494, + "step": 2385 + }, + { + "epoch": 0.8408810572687224, + "grad_norm": 1.467497517918387, + "learning_rate": 1.3397683222666748e-05, + "loss": 0.6183342933654785, + "step": 2386 + }, + { + "epoch": 0.8412334801762115, + "grad_norm": 1.5660447986557493, + "learning_rate": 1.339220286756444e-05, + "loss": 0.7280797362327576, + "step": 2387 + }, + { + "epoch": 0.8415859030837004, + "grad_norm": 1.5538390945999534, + "learning_rate": 1.3386721360914829e-05, + "loss": 0.7377837896347046, + "step": 2388 + }, + { + "epoch": 0.8419383259911895, + "grad_norm": 1.3658202604001934, + "learning_rate": 1.3381238704578718e-05, + "loss": 0.7202758193016052, + "step": 2389 + }, + { + "epoch": 0.8422907488986784, + "grad_norm": 1.4864419338323784, + "learning_rate": 1.3375754900417291e-05, + "loss": 0.5899994969367981, + "step": 2390 + }, + { + "epoch": 0.8426431718061674, + "grad_norm": 1.6545749228929092, + "learning_rate": 1.3370269950292133e-05, + "loss": 0.8128558993339539, + "step": 2391 + }, + { + "epoch": 0.8429955947136564, + "grad_norm": 1.4863580222240895, + "learning_rate": 1.3364783856065213e-05, + "loss": 0.8222962617874146, + "step": 2392 + }, + { + "epoch": 0.8433480176211454, + "grad_norm": 1.5392010225603865, + "learning_rate": 1.3359296619598894e-05, + "loss": 0.7898896932601929, + "step": 2393 + }, + { + "epoch": 0.8437004405286344, + "grad_norm": 1.59106154269148, + "learning_rate": 1.3353808242755912e-05, + "loss": 0.6596726179122925, + "step": 2394 + }, + { + "epoch": 0.8440528634361234, + "grad_norm": 1.6652244607977948, + "learning_rate": 1.3348318727399411e-05, + "loss": 0.8073080778121948, + "step": 2395 + }, + { + "epoch": 0.8444052863436123, + "grad_norm": 1.582055504815832, + "learning_rate": 1.3342828075392902e-05, + "loss": 0.6640043258666992, + "step": 2396 + }, + { + "epoch": 0.8447577092511013, + "grad_norm": 1.415789065826391, + "learning_rate": 1.3337336288600297e-05, + "loss": 0.6067632436752319, + "step": 2397 + }, + { + "epoch": 0.8451101321585903, + "grad_norm": 1.308177796408265, + "learning_rate": 1.3331843368885882e-05, + "loss": 0.6891398429870605, + "step": 2398 + }, + { + "epoch": 0.8454625550660793, + "grad_norm": 1.276250238749864, + "learning_rate": 1.3326349318114335e-05, + "loss": 0.6007423996925354, + "step": 2399 + }, + { + "epoch": 0.8458149779735683, + "grad_norm": 1.6159836309404996, + "learning_rate": 1.3320854138150712e-05, + "loss": 0.7314017415046692, + "step": 2400 + }, + { + "epoch": 0.8461674008810572, + "grad_norm": 1.5060027308979995, + "learning_rate": 1.3315357830860461e-05, + "loss": 0.7352335453033447, + "step": 2401 + }, + { + "epoch": 0.8465198237885463, + "grad_norm": 1.3629774951204896, + "learning_rate": 1.3309860398109402e-05, + "loss": 0.6546785831451416, + "step": 2402 + }, + { + "epoch": 0.8468722466960352, + "grad_norm": 1.4629106252693242, + "learning_rate": 1.3304361841763746e-05, + "loss": 0.590252697467804, + "step": 2403 + }, + { + "epoch": 0.8472246696035243, + "grad_norm": 1.5501476697602834, + "learning_rate": 1.3298862163690078e-05, + "loss": 0.6864089369773865, + "step": 2404 + }, + { + "epoch": 0.8475770925110132, + "grad_norm": 1.452376737172979, + "learning_rate": 1.3293361365755373e-05, + "loss": 0.7818390130996704, + "step": 2405 + }, + { + "epoch": 0.8479295154185023, + "grad_norm": 1.9084475381981967, + "learning_rate": 1.3287859449826977e-05, + "loss": 0.7461166381835938, + "step": 2406 + }, + { + "epoch": 0.8482819383259912, + "grad_norm": 1.7337796671611372, + "learning_rate": 1.3282356417772618e-05, + "loss": 0.7519750595092773, + "step": 2407 + }, + { + "epoch": 0.8486343612334801, + "grad_norm": 1.445619912428175, + "learning_rate": 1.3276852271460406e-05, + "loss": 0.7041791081428528, + "step": 2408 + }, + { + "epoch": 0.8489867841409692, + "grad_norm": 1.3131157575910486, + "learning_rate": 1.327134701275883e-05, + "loss": 0.5649428367614746, + "step": 2409 + }, + { + "epoch": 0.8493392070484581, + "grad_norm": 1.838398891045019, + "learning_rate": 1.3265840643536746e-05, + "loss": 0.6607545614242554, + "step": 2410 + }, + { + "epoch": 0.8496916299559472, + "grad_norm": 1.590568626194504, + "learning_rate": 1.3260333165663406e-05, + "loss": 0.7393547892570496, + "step": 2411 + }, + { + "epoch": 0.8500440528634361, + "grad_norm": 1.660269046740627, + "learning_rate": 1.325482458100842e-05, + "loss": 0.6550742387771606, + "step": 2412 + }, + { + "epoch": 0.8503964757709251, + "grad_norm": 1.3409806360783354, + "learning_rate": 1.324931489144178e-05, + "loss": 0.5104576349258423, + "step": 2413 + }, + { + "epoch": 0.8507488986784141, + "grad_norm": 1.7056036938051933, + "learning_rate": 1.3243804098833859e-05, + "loss": 0.7679733037948608, + "step": 2414 + }, + { + "epoch": 0.8511013215859031, + "grad_norm": 1.3058704920771766, + "learning_rate": 1.3238292205055397e-05, + "loss": 0.6516377925872803, + "step": 2415 + }, + { + "epoch": 0.8514537444933921, + "grad_norm": 1.4749751578789572, + "learning_rate": 1.3232779211977509e-05, + "loss": 0.8509281277656555, + "step": 2416 + }, + { + "epoch": 0.8518061674008811, + "grad_norm": 1.6532741255389543, + "learning_rate": 1.3227265121471691e-05, + "loss": 0.5643317103385925, + "step": 2417 + }, + { + "epoch": 0.85215859030837, + "grad_norm": 1.4681710603298503, + "learning_rate": 1.3221749935409798e-05, + "loss": 0.5294302105903625, + "step": 2418 + }, + { + "epoch": 0.852511013215859, + "grad_norm": 1.4914498870655002, + "learning_rate": 1.3216233655664067e-05, + "loss": 0.6301594972610474, + "step": 2419 + }, + { + "epoch": 0.852863436123348, + "grad_norm": 1.399957922496421, + "learning_rate": 1.32107162841071e-05, + "loss": 0.6930294036865234, + "step": 2420 + }, + { + "epoch": 0.853215859030837, + "grad_norm": 1.4069779391578274, + "learning_rate": 1.3205197822611876e-05, + "loss": 0.6266883611679077, + "step": 2421 + }, + { + "epoch": 0.853568281938326, + "grad_norm": 1.7817063662748283, + "learning_rate": 1.3199678273051743e-05, + "loss": 0.7789868116378784, + "step": 2422 + }, + { + "epoch": 0.853920704845815, + "grad_norm": 1.3387299141459739, + "learning_rate": 1.3194157637300416e-05, + "loss": 0.7148274779319763, + "step": 2423 + }, + { + "epoch": 0.854273127753304, + "grad_norm": 1.4757263125304436, + "learning_rate": 1.3188635917231972e-05, + "loss": 0.550403356552124, + "step": 2424 + }, + { + "epoch": 0.8546255506607929, + "grad_norm": 1.563076871593329, + "learning_rate": 1.3183113114720872e-05, + "loss": 0.6650338768959045, + "step": 2425 + }, + { + "epoch": 0.854977973568282, + "grad_norm": 1.569123753374588, + "learning_rate": 1.317758923164193e-05, + "loss": 0.7774436473846436, + "step": 2426 + }, + { + "epoch": 0.8553303964757709, + "grad_norm": 1.407079429107656, + "learning_rate": 1.3172064269870335e-05, + "loss": 0.6192025542259216, + "step": 2427 + }, + { + "epoch": 0.85568281938326, + "grad_norm": 1.6230407627498116, + "learning_rate": 1.3166538231281635e-05, + "loss": 0.6758309602737427, + "step": 2428 + }, + { + "epoch": 0.8560352422907489, + "grad_norm": 1.6026256588862147, + "learning_rate": 1.3161011117751756e-05, + "loss": 0.7311116456985474, + "step": 2429 + }, + { + "epoch": 0.8563876651982378, + "grad_norm": 1.797024553793142, + "learning_rate": 1.3155482931156977e-05, + "loss": 0.7525666952133179, + "step": 2430 + }, + { + "epoch": 0.8567400881057269, + "grad_norm": 1.7067244433524313, + "learning_rate": 1.3149953673373945e-05, + "loss": 0.6903671026229858, + "step": 2431 + }, + { + "epoch": 0.8570925110132158, + "grad_norm": 1.2833360218942749, + "learning_rate": 1.314442334627967e-05, + "loss": 0.6036638021469116, + "step": 2432 + }, + { + "epoch": 0.8574449339207049, + "grad_norm": 1.6354054518430503, + "learning_rate": 1.3138891951751526e-05, + "loss": 0.6490209698677063, + "step": 2433 + }, + { + "epoch": 0.8577973568281938, + "grad_norm": 1.6970156912379664, + "learning_rate": 1.3133359491667252e-05, + "loss": 0.692024290561676, + "step": 2434 + }, + { + "epoch": 0.8581497797356828, + "grad_norm": 1.4031255607051936, + "learning_rate": 1.3127825967904944e-05, + "loss": 0.6977943181991577, + "step": 2435 + }, + { + "epoch": 0.8585022026431718, + "grad_norm": 1.3842045822286646, + "learning_rate": 1.312229138234306e-05, + "loss": 0.625649094581604, + "step": 2436 + }, + { + "epoch": 0.8588546255506608, + "grad_norm": 1.5910466082409926, + "learning_rate": 1.3116755736860422e-05, + "loss": 0.671939492225647, + "step": 2437 + }, + { + "epoch": 0.8592070484581498, + "grad_norm": 1.3856883940296008, + "learning_rate": 1.3111219033336211e-05, + "loss": 0.700029194355011, + "step": 2438 + }, + { + "epoch": 0.8595594713656388, + "grad_norm": 1.3907118477619378, + "learning_rate": 1.3105681273649959e-05, + "loss": 0.6339718699455261, + "step": 2439 + }, + { + "epoch": 0.8599118942731278, + "grad_norm": 1.306943148235595, + "learning_rate": 1.3100142459681569e-05, + "loss": 0.7105488777160645, + "step": 2440 + }, + { + "epoch": 0.8602643171806167, + "grad_norm": 1.4503861250177865, + "learning_rate": 1.3094602593311294e-05, + "loss": 0.616797924041748, + "step": 2441 + }, + { + "epoch": 0.8606167400881057, + "grad_norm": 1.5110286813274958, + "learning_rate": 1.3089061676419746e-05, + "loss": 0.7167524099349976, + "step": 2442 + }, + { + "epoch": 0.8609691629955947, + "grad_norm": 1.5215961993133658, + "learning_rate": 1.3083519710887895e-05, + "loss": 0.5499090552330017, + "step": 2443 + }, + { + "epoch": 0.8613215859030837, + "grad_norm": 1.4623789546240658, + "learning_rate": 1.3077976698597064e-05, + "loss": 0.5764151811599731, + "step": 2444 + }, + { + "epoch": 0.8616740088105727, + "grad_norm": 1.438510619597336, + "learning_rate": 1.3072432641428931e-05, + "loss": 0.7171419858932495, + "step": 2445 + }, + { + "epoch": 0.8620264317180617, + "grad_norm": 1.3023250448197168, + "learning_rate": 1.3066887541265539e-05, + "loss": 0.7546026706695557, + "step": 2446 + }, + { + "epoch": 0.8623788546255506, + "grad_norm": 1.2250371592811133, + "learning_rate": 1.306134139998927e-05, + "loss": 0.5884296298027039, + "step": 2447 + }, + { + "epoch": 0.8627312775330397, + "grad_norm": 1.3135127283076564, + "learning_rate": 1.3055794219482867e-05, + "loss": 0.6877926588058472, + "step": 2448 + }, + { + "epoch": 0.8630837004405286, + "grad_norm": 1.5935068741769265, + "learning_rate": 1.3050246001629425e-05, + "loss": 0.598037838935852, + "step": 2449 + }, + { + "epoch": 0.8634361233480177, + "grad_norm": 1.4128431939298278, + "learning_rate": 1.3044696748312395e-05, + "loss": 0.6560795307159424, + "step": 2450 + }, + { + "epoch": 0.8637885462555066, + "grad_norm": 1.5856094022002207, + "learning_rate": 1.3039146461415575e-05, + "loss": 0.7130829691886902, + "step": 2451 + }, + { + "epoch": 0.8641409691629955, + "grad_norm": 1.9167144031452974, + "learning_rate": 1.303359514282311e-05, + "loss": 0.7402251958847046, + "step": 2452 + }, + { + "epoch": 0.8644933920704846, + "grad_norm": 1.4143817039312587, + "learning_rate": 1.3028042794419502e-05, + "loss": 0.6610683798789978, + "step": 2453 + }, + { + "epoch": 0.8648458149779735, + "grad_norm": 1.6544654323663863, + "learning_rate": 1.3022489418089606e-05, + "loss": 0.84892737865448, + "step": 2454 + }, + { + "epoch": 0.8651982378854626, + "grad_norm": 1.689285386487206, + "learning_rate": 1.3016935015718612e-05, + "loss": 0.7285948991775513, + "step": 2455 + }, + { + "epoch": 0.8655506607929515, + "grad_norm": 1.46262615014944, + "learning_rate": 1.3011379589192074e-05, + "loss": 0.6800004839897156, + "step": 2456 + }, + { + "epoch": 0.8659030837004406, + "grad_norm": 1.492659523558787, + "learning_rate": 1.3005823140395878e-05, + "loss": 0.618618369102478, + "step": 2457 + }, + { + "epoch": 0.8662555066079295, + "grad_norm": 1.8084387802865425, + "learning_rate": 1.3000265671216278e-05, + "loss": 0.7657757997512817, + "step": 2458 + }, + { + "epoch": 0.8666079295154185, + "grad_norm": 1.5490708834885107, + "learning_rate": 1.2994707183539848e-05, + "loss": 0.7814151644706726, + "step": 2459 + }, + { + "epoch": 0.8669603524229075, + "grad_norm": 1.2899412950022648, + "learning_rate": 1.2989147679253531e-05, + "loss": 0.6494930982589722, + "step": 2460 + }, + { + "epoch": 0.8673127753303965, + "grad_norm": 1.5543724658760723, + "learning_rate": 1.2983587160244602e-05, + "loss": 0.6498425006866455, + "step": 2461 + }, + { + "epoch": 0.8676651982378855, + "grad_norm": 1.5210228165977844, + "learning_rate": 1.2978025628400684e-05, + "loss": 0.635313093662262, + "step": 2462 + }, + { + "epoch": 0.8680176211453744, + "grad_norm": 1.500755936886382, + "learning_rate": 1.2972463085609744e-05, + "loss": 0.6892971992492676, + "step": 2463 + }, + { + "epoch": 0.8683700440528634, + "grad_norm": 1.3872566957567176, + "learning_rate": 1.2966899533760095e-05, + "loss": 0.691922128200531, + "step": 2464 + }, + { + "epoch": 0.8687224669603524, + "grad_norm": 1.773327696286038, + "learning_rate": 1.2961334974740386e-05, + "loss": 0.5764378309249878, + "step": 2465 + }, + { + "epoch": 0.8690748898678414, + "grad_norm": 1.6231464224655543, + "learning_rate": 1.2955769410439616e-05, + "loss": 0.8193005919456482, + "step": 2466 + }, + { + "epoch": 0.8694273127753304, + "grad_norm": 1.4243504226778951, + "learning_rate": 1.2950202842747115e-05, + "loss": 0.6141501665115356, + "step": 2467 + }, + { + "epoch": 0.8697797356828194, + "grad_norm": 1.5061592811010869, + "learning_rate": 1.2944635273552565e-05, + "loss": 0.7464454174041748, + "step": 2468 + }, + { + "epoch": 0.8701321585903083, + "grad_norm": 1.3349759192393535, + "learning_rate": 1.293906670474598e-05, + "loss": 0.5970025062561035, + "step": 2469 + }, + { + "epoch": 0.8704845814977974, + "grad_norm": 1.6022434524431073, + "learning_rate": 1.2933497138217714e-05, + "loss": 0.7247673273086548, + "step": 2470 + }, + { + "epoch": 0.8708370044052863, + "grad_norm": 1.535051650641408, + "learning_rate": 1.2927926575858463e-05, + "loss": 0.746272087097168, + "step": 2471 + }, + { + "epoch": 0.8711894273127754, + "grad_norm": 1.5072596947359789, + "learning_rate": 1.2922355019559265e-05, + "loss": 0.6918776035308838, + "step": 2472 + }, + { + "epoch": 0.8715418502202643, + "grad_norm": 1.553343209452483, + "learning_rate": 1.2916782471211478e-05, + "loss": 0.6056039929389954, + "step": 2473 + }, + { + "epoch": 0.8718942731277532, + "grad_norm": 1.3670048649799473, + "learning_rate": 1.2911208932706821e-05, + "loss": 0.6699481010437012, + "step": 2474 + }, + { + "epoch": 0.8722466960352423, + "grad_norm": 1.4719810242076543, + "learning_rate": 1.2905634405937327e-05, + "loss": 0.5141814947128296, + "step": 2475 + }, + { + "epoch": 0.8725991189427312, + "grad_norm": 1.5819338229003952, + "learning_rate": 1.2900058892795383e-05, + "loss": 0.7521284818649292, + "step": 2476 + }, + { + "epoch": 0.8729515418502203, + "grad_norm": 2.2082732494247916, + "learning_rate": 1.2894482395173695e-05, + "loss": 0.6878937482833862, + "step": 2477 + }, + { + "epoch": 0.8733039647577092, + "grad_norm": 1.3942904192465777, + "learning_rate": 1.2888904914965317e-05, + "loss": 0.5963379144668579, + "step": 2478 + }, + { + "epoch": 0.8736563876651983, + "grad_norm": 1.7634340153188761, + "learning_rate": 1.2883326454063623e-05, + "loss": 0.7572320103645325, + "step": 2479 + }, + { + "epoch": 0.8740088105726872, + "grad_norm": 1.399026210420982, + "learning_rate": 1.2877747014362334e-05, + "loss": 0.7047982215881348, + "step": 2480 + }, + { + "epoch": 0.8743612334801762, + "grad_norm": 2.0588397887454715, + "learning_rate": 1.2872166597755488e-05, + "loss": 0.6449024677276611, + "step": 2481 + }, + { + "epoch": 0.8747136563876652, + "grad_norm": 1.6446468607591163, + "learning_rate": 1.2866585206137469e-05, + "loss": 0.7590922117233276, + "step": 2482 + }, + { + "epoch": 0.8750660792951542, + "grad_norm": 1.6164965426300901, + "learning_rate": 1.2861002841402983e-05, + "loss": 0.7534210085868835, + "step": 2483 + }, + { + "epoch": 0.8754185022026432, + "grad_norm": 1.9198456186069754, + "learning_rate": 1.2855419505447073e-05, + "loss": 0.7091225385665894, + "step": 2484 + }, + { + "epoch": 0.8757709251101321, + "grad_norm": 1.5347710098555305, + "learning_rate": 1.2849835200165104e-05, + "loss": 0.7578933835029602, + "step": 2485 + }, + { + "epoch": 0.8761233480176212, + "grad_norm": 1.3282869408675961, + "learning_rate": 1.2844249927452771e-05, + "loss": 0.5938349962234497, + "step": 2486 + }, + { + "epoch": 0.8764757709251101, + "grad_norm": 1.5090052513716286, + "learning_rate": 1.2838663689206108e-05, + "loss": 0.5726315379142761, + "step": 2487 + }, + { + "epoch": 0.8768281938325991, + "grad_norm": 1.450396836473225, + "learning_rate": 1.2833076487321465e-05, + "loss": 0.8181554079055786, + "step": 2488 + }, + { + "epoch": 0.8771806167400881, + "grad_norm": 1.71919397348368, + "learning_rate": 1.2827488323695522e-05, + "loss": 0.7465275526046753, + "step": 2489 + }, + { + "epoch": 0.8775330396475771, + "grad_norm": 1.2623461784182488, + "learning_rate": 1.2821899200225288e-05, + "loss": 0.6083456873893738, + "step": 2490 + }, + { + "epoch": 0.877885462555066, + "grad_norm": 1.4922167619772364, + "learning_rate": 1.2816309118808095e-05, + "loss": 0.6393307447433472, + "step": 2491 + }, + { + "epoch": 0.8782378854625551, + "grad_norm": 1.3846980777960398, + "learning_rate": 1.2810718081341604e-05, + "loss": 0.6562504768371582, + "step": 2492 + }, + { + "epoch": 0.878590308370044, + "grad_norm": 1.5590691123255283, + "learning_rate": 1.2805126089723798e-05, + "loss": 0.6737300753593445, + "step": 2493 + }, + { + "epoch": 0.8789427312775331, + "grad_norm": 1.7724399876158112, + "learning_rate": 1.2799533145852982e-05, + "loss": 0.6246815919876099, + "step": 2494 + }, + { + "epoch": 0.879295154185022, + "grad_norm": 1.7718655540042538, + "learning_rate": 1.2793939251627788e-05, + "loss": 0.7499577403068542, + "step": 2495 + }, + { + "epoch": 0.8796475770925111, + "grad_norm": 1.6628095797742937, + "learning_rate": 1.2788344408947171e-05, + "loss": 0.7645655870437622, + "step": 2496 + }, + { + "epoch": 0.88, + "grad_norm": 1.732888201165417, + "learning_rate": 1.27827486197104e-05, + "loss": 0.7407524585723877, + "step": 2497 + }, + { + "epoch": 0.8803524229074889, + "grad_norm": 1.590151572985607, + "learning_rate": 1.2777151885817078e-05, + "loss": 0.6401108503341675, + "step": 2498 + }, + { + "epoch": 0.880704845814978, + "grad_norm": 1.5984459598023502, + "learning_rate": 1.2771554209167116e-05, + "loss": 0.8332269191741943, + "step": 2499 + }, + { + "epoch": 0.8810572687224669, + "grad_norm": 1.61859187638703, + "learning_rate": 1.2765955591660757e-05, + "loss": 0.7677830457687378, + "step": 2500 + }, + { + "epoch": 0.881409691629956, + "grad_norm": 1.4420535275594295, + "learning_rate": 1.2760356035198553e-05, + "loss": 0.8532943725585938, + "step": 2501 + }, + { + "epoch": 0.8817621145374449, + "grad_norm": 1.3662949943021319, + "learning_rate": 1.2754755541681384e-05, + "loss": 0.6287009716033936, + "step": 2502 + }, + { + "epoch": 0.882114537444934, + "grad_norm": 1.38981570117233, + "learning_rate": 1.2749154113010432e-05, + "loss": 0.7039133310317993, + "step": 2503 + }, + { + "epoch": 0.8824669603524229, + "grad_norm": 1.6518390089780828, + "learning_rate": 1.2743551751087222e-05, + "loss": 0.6959357857704163, + "step": 2504 + }, + { + "epoch": 0.8828193832599119, + "grad_norm": 1.3554006828606007, + "learning_rate": 1.2737948457813571e-05, + "loss": 0.6862938404083252, + "step": 2505 + }, + { + "epoch": 0.8831718061674009, + "grad_norm": 1.6773466383223146, + "learning_rate": 1.273234423509163e-05, + "loss": 0.6903352737426758, + "step": 2506 + }, + { + "epoch": 0.8835242290748899, + "grad_norm": 1.374322606051121, + "learning_rate": 1.2726739084823851e-05, + "loss": 0.7226145267486572, + "step": 2507 + }, + { + "epoch": 0.8838766519823789, + "grad_norm": 1.4091144718113782, + "learning_rate": 1.2721133008913015e-05, + "loss": 0.7865043878555298, + "step": 2508 + }, + { + "epoch": 0.8842290748898678, + "grad_norm": 1.4501170174913356, + "learning_rate": 1.2715526009262209e-05, + "loss": 0.6594572067260742, + "step": 2509 + }, + { + "epoch": 0.8845814977973568, + "grad_norm": 1.3500042347590218, + "learning_rate": 1.270991808777483e-05, + "loss": 0.5967481136322021, + "step": 2510 + }, + { + "epoch": 0.8849339207048458, + "grad_norm": 1.3600104271689806, + "learning_rate": 1.2704309246354599e-05, + "loss": 0.7843632698059082, + "step": 2511 + }, + { + "epoch": 0.8852863436123348, + "grad_norm": 1.3543191802484777, + "learning_rate": 1.2698699486905538e-05, + "loss": 0.7475506067276001, + "step": 2512 + }, + { + "epoch": 0.8856387665198238, + "grad_norm": 1.4881501151953718, + "learning_rate": 1.2693088811331987e-05, + "loss": 0.8082534670829773, + "step": 2513 + }, + { + "epoch": 0.8859911894273128, + "grad_norm": 1.6899694353159702, + "learning_rate": 1.2687477221538598e-05, + "loss": 0.7421785593032837, + "step": 2514 + }, + { + "epoch": 0.8863436123348017, + "grad_norm": 1.295151070825849, + "learning_rate": 1.2681864719430328e-05, + "loss": 0.6268718242645264, + "step": 2515 + }, + { + "epoch": 0.8866960352422908, + "grad_norm": 1.595396389533138, + "learning_rate": 1.2676251306912448e-05, + "loss": 0.7285459041595459, + "step": 2516 + }, + { + "epoch": 0.8870484581497797, + "grad_norm": 1.4826705601530517, + "learning_rate": 1.2670636985890542e-05, + "loss": 0.6132184267044067, + "step": 2517 + }, + { + "epoch": 0.8874008810572688, + "grad_norm": 1.4018565352445778, + "learning_rate": 1.2665021758270488e-05, + "loss": 0.5550754070281982, + "step": 2518 + }, + { + "epoch": 0.8877533039647577, + "grad_norm": 1.3628132273232696, + "learning_rate": 1.2659405625958488e-05, + "loss": 0.5357390642166138, + "step": 2519 + }, + { + "epoch": 0.8881057268722466, + "grad_norm": 1.4153066703364516, + "learning_rate": 1.2653788590861039e-05, + "loss": 0.5858328342437744, + "step": 2520 + }, + { + "epoch": 0.8884581497797357, + "grad_norm": 1.731815068535558, + "learning_rate": 1.2648170654884955e-05, + "loss": 0.7109283208847046, + "step": 2521 + }, + { + "epoch": 0.8888105726872246, + "grad_norm": 1.9753429482306435, + "learning_rate": 1.2642551819937348e-05, + "loss": 0.808137834072113, + "step": 2522 + }, + { + "epoch": 0.8891629955947137, + "grad_norm": 1.6385693606484741, + "learning_rate": 1.2636932087925637e-05, + "loss": 0.587998628616333, + "step": 2523 + }, + { + "epoch": 0.8895154185022026, + "grad_norm": 1.4234526769499198, + "learning_rate": 1.2631311460757545e-05, + "loss": 0.5555537343025208, + "step": 2524 + }, + { + "epoch": 0.8898678414096917, + "grad_norm": 1.4118650122814267, + "learning_rate": 1.2625689940341102e-05, + "loss": 0.641632080078125, + "step": 2525 + }, + { + "epoch": 0.8902202643171806, + "grad_norm": 1.5401015682174186, + "learning_rate": 1.262006752858464e-05, + "loss": 0.7005184888839722, + "step": 2526 + }, + { + "epoch": 0.8905726872246696, + "grad_norm": 1.272518513643159, + "learning_rate": 1.2614444227396792e-05, + "loss": 0.6907261610031128, + "step": 2527 + }, + { + "epoch": 0.8909251101321586, + "grad_norm": 1.4162379009723582, + "learning_rate": 1.2608820038686492e-05, + "loss": 0.5757718086242676, + "step": 2528 + }, + { + "epoch": 0.8912775330396476, + "grad_norm": 1.888252337049927, + "learning_rate": 1.2603194964362979e-05, + "loss": 0.6462569832801819, + "step": 2529 + }, + { + "epoch": 0.8916299559471366, + "grad_norm": 2.6509089623338586, + "learning_rate": 1.2597569006335787e-05, + "loss": 0.7028999328613281, + "step": 2530 + }, + { + "epoch": 0.8919823788546255, + "grad_norm": 1.3325876541370223, + "learning_rate": 1.2591942166514763e-05, + "loss": 0.5789325833320618, + "step": 2531 + }, + { + "epoch": 0.8923348017621145, + "grad_norm": 1.5373223041612576, + "learning_rate": 1.258631444681003e-05, + "loss": 0.6545255184173584, + "step": 2532 + }, + { + "epoch": 0.8926872246696035, + "grad_norm": 1.560686991488605, + "learning_rate": 1.258068584913204e-05, + "loss": 0.7227469682693481, + "step": 2533 + }, + { + "epoch": 0.8930396475770925, + "grad_norm": 1.3545909427052794, + "learning_rate": 1.2575056375391513e-05, + "loss": 0.5985771417617798, + "step": 2534 + }, + { + "epoch": 0.8933920704845815, + "grad_norm": 1.5422643503857134, + "learning_rate": 1.2569426027499485e-05, + "loss": 0.6705960035324097, + "step": 2535 + }, + { + "epoch": 0.8937444933920705, + "grad_norm": 1.5427105799340322, + "learning_rate": 1.2563794807367284e-05, + "loss": 0.6662027835845947, + "step": 2536 + }, + { + "epoch": 0.8940969162995595, + "grad_norm": 1.5270286613671318, + "learning_rate": 1.2558162716906537e-05, + "loss": 0.7742453813552856, + "step": 2537 + }, + { + "epoch": 0.8944493392070485, + "grad_norm": 1.628032718158035, + "learning_rate": 1.255252975802916e-05, + "loss": 0.6124528050422668, + "step": 2538 + }, + { + "epoch": 0.8948017621145374, + "grad_norm": 1.455711423520218, + "learning_rate": 1.2546895932647365e-05, + "loss": 0.5728615522384644, + "step": 2539 + }, + { + "epoch": 0.8951541850220265, + "grad_norm": 1.5737389396802581, + "learning_rate": 1.2541261242673665e-05, + "loss": 0.6347167491912842, + "step": 2540 + }, + { + "epoch": 0.8955066079295154, + "grad_norm": 1.62324317727844, + "learning_rate": 1.2535625690020861e-05, + "loss": 0.6350656747817993, + "step": 2541 + }, + { + "epoch": 0.8958590308370044, + "grad_norm": 1.674339310689998, + "learning_rate": 1.2529989276602043e-05, + "loss": 0.7538303732872009, + "step": 2542 + }, + { + "epoch": 0.8962114537444934, + "grad_norm": 1.5900983527544528, + "learning_rate": 1.2524352004330607e-05, + "loss": 0.8154318928718567, + "step": 2543 + }, + { + "epoch": 0.8965638766519823, + "grad_norm": 1.4033932104877718, + "learning_rate": 1.2518713875120222e-05, + "loss": 0.5313037633895874, + "step": 2544 + }, + { + "epoch": 0.8969162995594714, + "grad_norm": 1.3069539051845793, + "learning_rate": 1.2513074890884864e-05, + "loss": 0.740921139717102, + "step": 2545 + }, + { + "epoch": 0.8972687224669603, + "grad_norm": 1.593785966579892, + "learning_rate": 1.250743505353879e-05, + "loss": 0.6079888343811035, + "step": 2546 + }, + { + "epoch": 0.8976211453744494, + "grad_norm": 1.266024042192646, + "learning_rate": 1.2501794364996553e-05, + "loss": 0.46736663579940796, + "step": 2547 + }, + { + "epoch": 0.8979735682819383, + "grad_norm": 1.5066472302506413, + "learning_rate": 1.2496152827172982e-05, + "loss": 0.5670880079269409, + "step": 2548 + }, + { + "epoch": 0.8983259911894274, + "grad_norm": 1.4991563073413907, + "learning_rate": 1.2490510441983212e-05, + "loss": 0.7845931649208069, + "step": 2549 + }, + { + "epoch": 0.8986784140969163, + "grad_norm": 1.5458127280177445, + "learning_rate": 1.2484867211342653e-05, + "loss": 0.5625143647193909, + "step": 2550 + }, + { + "epoch": 0.8990308370044053, + "grad_norm": 1.5409896244330605, + "learning_rate": 1.2479223137167011e-05, + "loss": 0.6631217002868652, + "step": 2551 + }, + { + "epoch": 0.8993832599118943, + "grad_norm": 1.6071757454969378, + "learning_rate": 1.247357822137227e-05, + "loss": 0.6588548421859741, + "step": 2552 + }, + { + "epoch": 0.8997356828193832, + "grad_norm": 1.4192601474848106, + "learning_rate": 1.24679324658747e-05, + "loss": 0.8046029806137085, + "step": 2553 + }, + { + "epoch": 0.9000881057268723, + "grad_norm": 1.6272051463241026, + "learning_rate": 1.2462285872590862e-05, + "loss": 0.6651894450187683, + "step": 2554 + }, + { + "epoch": 0.9004405286343612, + "grad_norm": 1.5179002680249722, + "learning_rate": 1.2456638443437605e-05, + "loss": 0.5888474583625793, + "step": 2555 + }, + { + "epoch": 0.9007929515418502, + "grad_norm": 1.7319345866859506, + "learning_rate": 1.2450990180332045e-05, + "loss": 0.5915735363960266, + "step": 2556 + }, + { + "epoch": 0.9011453744493392, + "grad_norm": 1.5409991319630119, + "learning_rate": 1.24453410851916e-05, + "loss": 0.6830431222915649, + "step": 2557 + }, + { + "epoch": 0.9014977973568282, + "grad_norm": 1.3954767744454935, + "learning_rate": 1.2439691159933955e-05, + "loss": 0.6812379956245422, + "step": 2558 + }, + { + "epoch": 0.9018502202643172, + "grad_norm": 1.3481753587360845, + "learning_rate": 1.2434040406477092e-05, + "loss": 0.6887152791023254, + "step": 2559 + }, + { + "epoch": 0.9022026431718062, + "grad_norm": 1.495436388275929, + "learning_rate": 1.2428388826739254e-05, + "loss": 0.677071213722229, + "step": 2560 + }, + { + "epoch": 0.9025550660792951, + "grad_norm": 1.5809198519920526, + "learning_rate": 1.242273642263899e-05, + "loss": 0.6635652780532837, + "step": 2561 + }, + { + "epoch": 0.9029074889867842, + "grad_norm": 1.7455357614962055, + "learning_rate": 1.2417083196095105e-05, + "loss": 0.7543712854385376, + "step": 2562 + }, + { + "epoch": 0.9032599118942731, + "grad_norm": 1.743758273604275, + "learning_rate": 1.2411429149026701e-05, + "loss": 0.6219073534011841, + "step": 2563 + }, + { + "epoch": 0.9036123348017621, + "grad_norm": 1.360518097358955, + "learning_rate": 1.2405774283353144e-05, + "loss": 0.6576533317565918, + "step": 2564 + }, + { + "epoch": 0.9039647577092511, + "grad_norm": 1.3683846685040542, + "learning_rate": 1.240011860099409e-05, + "loss": 0.6458585262298584, + "step": 2565 + }, + { + "epoch": 0.90431718061674, + "grad_norm": 1.5753618523282886, + "learning_rate": 1.2394462103869464e-05, + "loss": 0.6943198442459106, + "step": 2566 + }, + { + "epoch": 0.9046696035242291, + "grad_norm": 1.5425443594991994, + "learning_rate": 1.2388804793899473e-05, + "loss": 0.6684235334396362, + "step": 2567 + }, + { + "epoch": 0.905022026431718, + "grad_norm": 1.4432793187881665, + "learning_rate": 1.2383146673004598e-05, + "loss": 0.6707017421722412, + "step": 2568 + }, + { + "epoch": 0.9053744493392071, + "grad_norm": 1.4610510830510222, + "learning_rate": 1.2377487743105593e-05, + "loss": 0.6009544134140015, + "step": 2569 + }, + { + "epoch": 0.905726872246696, + "grad_norm": 1.3343070463381261, + "learning_rate": 1.2371828006123488e-05, + "loss": 0.57770836353302, + "step": 2570 + }, + { + "epoch": 0.9060792951541851, + "grad_norm": 1.50423514822828, + "learning_rate": 1.236616746397959e-05, + "loss": 0.6146866083145142, + "step": 2571 + }, + { + "epoch": 0.906431718061674, + "grad_norm": 1.4060902038910876, + "learning_rate": 1.2360506118595476e-05, + "loss": 0.6374951601028442, + "step": 2572 + }, + { + "epoch": 0.906784140969163, + "grad_norm": 1.5006132241656203, + "learning_rate": 1.2354843971892998e-05, + "loss": 0.6933800578117371, + "step": 2573 + }, + { + "epoch": 0.907136563876652, + "grad_norm": 1.6402374081466708, + "learning_rate": 1.2349181025794278e-05, + "loss": 0.857126772403717, + "step": 2574 + }, + { + "epoch": 0.9074889867841409, + "grad_norm": 1.7970464713795387, + "learning_rate": 1.2343517282221704e-05, + "loss": 0.7316192388534546, + "step": 2575 + }, + { + "epoch": 0.90784140969163, + "grad_norm": 1.7338748475900745, + "learning_rate": 1.2337852743097947e-05, + "loss": 0.7916824817657471, + "step": 2576 + }, + { + "epoch": 0.9081938325991189, + "grad_norm": 1.342845056559204, + "learning_rate": 1.2332187410345941e-05, + "loss": 0.6437021493911743, + "step": 2577 + }, + { + "epoch": 0.908546255506608, + "grad_norm": 1.547322536503476, + "learning_rate": 1.2326521285888892e-05, + "loss": 0.8788109421730042, + "step": 2578 + }, + { + "epoch": 0.9088986784140969, + "grad_norm": 1.4382005842040866, + "learning_rate": 1.2320854371650268e-05, + "loss": 0.704395055770874, + "step": 2579 + }, + { + "epoch": 0.9092511013215859, + "grad_norm": 1.410037340911335, + "learning_rate": 1.2315186669553814e-05, + "loss": 0.6579844951629639, + "step": 2580 + }, + { + "epoch": 0.9096035242290749, + "grad_norm": 1.3089054036910626, + "learning_rate": 1.2309518181523537e-05, + "loss": 0.6329941749572754, + "step": 2581 + }, + { + "epoch": 0.9099559471365639, + "grad_norm": 1.746183595307062, + "learning_rate": 1.2303848909483711e-05, + "loss": 0.8868603706359863, + "step": 2582 + }, + { + "epoch": 0.9103083700440529, + "grad_norm": 1.4531546458491524, + "learning_rate": 1.2298178855358875e-05, + "loss": 0.6402688026428223, + "step": 2583 + }, + { + "epoch": 0.9106607929515419, + "grad_norm": 1.3289180353613772, + "learning_rate": 1.2292508021073846e-05, + "loss": 0.8017194271087646, + "step": 2584 + }, + { + "epoch": 0.9110132158590308, + "grad_norm": 1.6340808373356166, + "learning_rate": 1.2286836408553687e-05, + "loss": 0.7396517992019653, + "step": 2585 + }, + { + "epoch": 0.9113656387665198, + "grad_norm": 1.5443847526543046, + "learning_rate": 1.2281164019723737e-05, + "loss": 0.6123272776603699, + "step": 2586 + }, + { + "epoch": 0.9117180616740088, + "grad_norm": 1.464544186162697, + "learning_rate": 1.2275490856509591e-05, + "loss": 0.7675807476043701, + "step": 2587 + }, + { + "epoch": 0.9120704845814978, + "grad_norm": 1.67164115622116, + "learning_rate": 1.2269816920837121e-05, + "loss": 0.6814998388290405, + "step": 2588 + }, + { + "epoch": 0.9124229074889868, + "grad_norm": 1.3228366401729674, + "learning_rate": 1.2264142214632441e-05, + "loss": 0.6290348768234253, + "step": 2589 + }, + { + "epoch": 0.9127753303964757, + "grad_norm": 1.5676260945728981, + "learning_rate": 1.2258466739821946e-05, + "loss": 0.6752464175224304, + "step": 2590 + }, + { + "epoch": 0.9131277533039648, + "grad_norm": 1.3388236473063337, + "learning_rate": 1.2252790498332275e-05, + "loss": 0.6153687238693237, + "step": 2591 + }, + { + "epoch": 0.9134801762114537, + "grad_norm": 1.5346187118504635, + "learning_rate": 1.2247113492090344e-05, + "loss": 0.5952479839324951, + "step": 2592 + }, + { + "epoch": 0.9138325991189428, + "grad_norm": 1.4457638395568853, + "learning_rate": 1.2241435723023309e-05, + "loss": 0.5457659959793091, + "step": 2593 + }, + { + "epoch": 0.9141850220264317, + "grad_norm": 1.5389040689398128, + "learning_rate": 1.2235757193058607e-05, + "loss": 0.7373491525650024, + "step": 2594 + }, + { + "epoch": 0.9145374449339208, + "grad_norm": 1.3149945847764668, + "learning_rate": 1.2230077904123914e-05, + "loss": 0.6564488410949707, + "step": 2595 + }, + { + "epoch": 0.9148898678414097, + "grad_norm": 1.8716233271125673, + "learning_rate": 1.2224397858147176e-05, + "loss": 0.6790947914123535, + "step": 2596 + }, + { + "epoch": 0.9152422907488986, + "grad_norm": 1.6467277287942856, + "learning_rate": 1.2218717057056592e-05, + "loss": 0.8304486274719238, + "step": 2597 + }, + { + "epoch": 0.9155947136563877, + "grad_norm": 1.7018746535629268, + "learning_rate": 1.2213035502780616e-05, + "loss": 0.7452701330184937, + "step": 2598 + }, + { + "epoch": 0.9159471365638766, + "grad_norm": 1.270448247487427, + "learning_rate": 1.2207353197247957e-05, + "loss": 0.572200357913971, + "step": 2599 + }, + { + "epoch": 0.9162995594713657, + "grad_norm": 1.574291214704138, + "learning_rate": 1.2201670142387587e-05, + "loss": 0.7142342925071716, + "step": 2600 + }, + { + "epoch": 0.9166519823788546, + "grad_norm": 1.367606009894927, + "learning_rate": 1.219598634012872e-05, + "loss": 0.9390528202056885, + "step": 2601 + }, + { + "epoch": 0.9170044052863436, + "grad_norm": 1.6870829349403977, + "learning_rate": 1.2190301792400832e-05, + "loss": 0.6897540092468262, + "step": 2602 + }, + { + "epoch": 0.9173568281938326, + "grad_norm": 1.5631074773710765, + "learning_rate": 1.2184616501133649e-05, + "loss": 0.7309582233428955, + "step": 2603 + }, + { + "epoch": 0.9177092511013216, + "grad_norm": 1.4956685909345118, + "learning_rate": 1.2178930468257154e-05, + "loss": 0.7692370414733887, + "step": 2604 + }, + { + "epoch": 0.9180616740088106, + "grad_norm": 1.6160577913139176, + "learning_rate": 1.2173243695701575e-05, + "loss": 0.7650456428527832, + "step": 2605 + }, + { + "epoch": 0.9184140969162996, + "grad_norm": 1.4419682356133905, + "learning_rate": 1.2167556185397396e-05, + "loss": 0.6000699996948242, + "step": 2606 + }, + { + "epoch": 0.9187665198237885, + "grad_norm": 1.368037173998054, + "learning_rate": 1.2161867939275344e-05, + "loss": 0.6227651834487915, + "step": 2607 + }, + { + "epoch": 0.9191189427312776, + "grad_norm": 1.3507337866227296, + "learning_rate": 1.2156178959266414e-05, + "loss": 0.6554160118103027, + "step": 2608 + }, + { + "epoch": 0.9194713656387665, + "grad_norm": 1.4986959017577084, + "learning_rate": 1.2150489247301826e-05, + "loss": 0.5360773801803589, + "step": 2609 + }, + { + "epoch": 0.9198237885462555, + "grad_norm": 1.3546990782009203, + "learning_rate": 1.2144798805313065e-05, + "loss": 0.7184062004089355, + "step": 2610 + }, + { + "epoch": 0.9201762114537445, + "grad_norm": 1.6293146255106934, + "learning_rate": 1.2139107635231857e-05, + "loss": 0.646910548210144, + "step": 2611 + }, + { + "epoch": 0.9205286343612334, + "grad_norm": 1.449047238736513, + "learning_rate": 1.2133415738990179e-05, + "loss": 0.7794413566589355, + "step": 2612 + }, + { + "epoch": 0.9208810572687225, + "grad_norm": 1.5173448374489182, + "learning_rate": 1.2127723118520254e-05, + "loss": 0.5904654860496521, + "step": 2613 + }, + { + "epoch": 0.9212334801762114, + "grad_norm": 1.6062827687776695, + "learning_rate": 1.2122029775754545e-05, + "loss": 0.5526635646820068, + "step": 2614 + }, + { + "epoch": 0.9215859030837005, + "grad_norm": 1.584080412995617, + "learning_rate": 1.2116335712625766e-05, + "loss": 0.6832528710365295, + "step": 2615 + }, + { + "epoch": 0.9219383259911894, + "grad_norm": 1.5962919739796952, + "learning_rate": 1.211064093106688e-05, + "loss": 0.5858304500579834, + "step": 2616 + }, + { + "epoch": 0.9222907488986785, + "grad_norm": 1.6542154949587857, + "learning_rate": 1.2104945433011079e-05, + "loss": 0.7383478879928589, + "step": 2617 + }, + { + "epoch": 0.9226431718061674, + "grad_norm": 1.4197774198085091, + "learning_rate": 1.2099249220391815e-05, + "loss": 0.6466768980026245, + "step": 2618 + }, + { + "epoch": 0.9229955947136564, + "grad_norm": 1.6780588288371647, + "learning_rate": 1.209355229514277e-05, + "loss": 0.5681238174438477, + "step": 2619 + }, + { + "epoch": 0.9233480176211454, + "grad_norm": 1.4473210287022626, + "learning_rate": 1.2087854659197874e-05, + "loss": 0.5726606249809265, + "step": 2620 + }, + { + "epoch": 0.9237004405286343, + "grad_norm": 1.5671254030487451, + "learning_rate": 1.2082156314491298e-05, + "loss": 0.6643342971801758, + "step": 2621 + }, + { + "epoch": 0.9240528634361234, + "grad_norm": 1.6891696074210503, + "learning_rate": 1.2076457262957454e-05, + "loss": 0.5408967733383179, + "step": 2622 + }, + { + "epoch": 0.9244052863436123, + "grad_norm": 1.503887173232949, + "learning_rate": 1.207075750653099e-05, + "loss": 0.706169843673706, + "step": 2623 + }, + { + "epoch": 0.9247577092511013, + "grad_norm": 1.7934999117659478, + "learning_rate": 1.2065057047146797e-05, + "loss": 0.7973969578742981, + "step": 2624 + }, + { + "epoch": 0.9251101321585903, + "grad_norm": 1.4120942109312036, + "learning_rate": 1.2059355886740002e-05, + "loss": 0.6907010674476624, + "step": 2625 + }, + { + "epoch": 0.9254625550660793, + "grad_norm": 1.8378017160561377, + "learning_rate": 1.2053654027245977e-05, + "loss": 0.8174253702163696, + "step": 2626 + }, + { + "epoch": 0.9258149779735683, + "grad_norm": 1.8873519737119473, + "learning_rate": 1.204795147060032e-05, + "loss": 0.60319983959198, + "step": 2627 + }, + { + "epoch": 0.9261674008810573, + "grad_norm": 2.916318649806586, + "learning_rate": 1.204224821873887e-05, + "loss": 0.718228816986084, + "step": 2628 + }, + { + "epoch": 0.9265198237885462, + "grad_norm": 1.5801609410641386, + "learning_rate": 1.2036544273597708e-05, + "loss": 0.7385132312774658, + "step": 2629 + }, + { + "epoch": 0.9268722466960353, + "grad_norm": 1.5320403236251587, + "learning_rate": 1.203083963711315e-05, + "loss": 0.7700635194778442, + "step": 2630 + }, + { + "epoch": 0.9272246696035242, + "grad_norm": 1.4381703720368488, + "learning_rate": 1.2025134311221732e-05, + "loss": 0.8767666816711426, + "step": 2631 + }, + { + "epoch": 0.9275770925110132, + "grad_norm": 1.4534975042510074, + "learning_rate": 1.2019428297860241e-05, + "loss": 0.6517986059188843, + "step": 2632 + }, + { + "epoch": 0.9279295154185022, + "grad_norm": 1.3295910752440807, + "learning_rate": 1.2013721598965688e-05, + "loss": 0.5967941284179688, + "step": 2633 + }, + { + "epoch": 0.9282819383259912, + "grad_norm": 2.085302745009741, + "learning_rate": 1.2008014216475327e-05, + "loss": 0.7480533123016357, + "step": 2634 + }, + { + "epoch": 0.9286343612334802, + "grad_norm": 1.415633444981562, + "learning_rate": 1.2002306152326626e-05, + "loss": 0.8020488023757935, + "step": 2635 + }, + { + "epoch": 0.9289867841409691, + "grad_norm": 1.235581839334599, + "learning_rate": 1.1996597408457302e-05, + "loss": 0.5535889863967896, + "step": 2636 + }, + { + "epoch": 0.9293392070484582, + "grad_norm": 1.5093780754929471, + "learning_rate": 1.1990887986805295e-05, + "loss": 0.6888864040374756, + "step": 2637 + }, + { + "epoch": 0.9296916299559471, + "grad_norm": 1.761723253773031, + "learning_rate": 1.1985177889308777e-05, + "loss": 0.7723515033721924, + "step": 2638 + }, + { + "epoch": 0.9300440528634362, + "grad_norm": 2.4386861549294476, + "learning_rate": 1.1979467117906143e-05, + "loss": 0.6929488182067871, + "step": 2639 + }, + { + "epoch": 0.9303964757709251, + "grad_norm": 1.7413716913523498, + "learning_rate": 1.1973755674536027e-05, + "loss": 0.7025216221809387, + "step": 2640 + }, + { + "epoch": 0.9307488986784141, + "grad_norm": 1.5278537581621425, + "learning_rate": 1.1968043561137287e-05, + "loss": 0.6618740558624268, + "step": 2641 + }, + { + "epoch": 0.9311013215859031, + "grad_norm": 1.3720349025623486, + "learning_rate": 1.1962330779649002e-05, + "loss": 0.5308352708816528, + "step": 2642 + }, + { + "epoch": 0.931453744493392, + "grad_norm": 1.6043152610659495, + "learning_rate": 1.1956617332010488e-05, + "loss": 0.6559470891952515, + "step": 2643 + }, + { + "epoch": 0.9318061674008811, + "grad_norm": 1.5758989244918422, + "learning_rate": 1.1950903220161286e-05, + "loss": 0.6572221517562866, + "step": 2644 + }, + { + "epoch": 0.93215859030837, + "grad_norm": 1.7357943090474917, + "learning_rate": 1.194518844604115e-05, + "loss": 0.7854161262512207, + "step": 2645 + }, + { + "epoch": 0.932511013215859, + "grad_norm": 1.555855365183626, + "learning_rate": 1.1939473011590075e-05, + "loss": 0.6471760869026184, + "step": 2646 + }, + { + "epoch": 0.932863436123348, + "grad_norm": 1.5672890328663938, + "learning_rate": 1.1933756918748271e-05, + "loss": 0.6261184215545654, + "step": 2647 + }, + { + "epoch": 0.933215859030837, + "grad_norm": 1.425764950800843, + "learning_rate": 1.1928040169456176e-05, + "loss": 0.6876180171966553, + "step": 2648 + }, + { + "epoch": 0.933568281938326, + "grad_norm": 1.6203483271740744, + "learning_rate": 1.1922322765654446e-05, + "loss": 0.6782447099685669, + "step": 2649 + }, + { + "epoch": 0.933920704845815, + "grad_norm": 1.640471126849017, + "learning_rate": 1.1916604709283958e-05, + "loss": 0.6085894107818604, + "step": 2650 + }, + { + "epoch": 0.934273127753304, + "grad_norm": 1.6964969219798813, + "learning_rate": 1.1910886002285822e-05, + "loss": 0.6940577030181885, + "step": 2651 + }, + { + "epoch": 0.934625550660793, + "grad_norm": 1.4704189591593113, + "learning_rate": 1.1905166646601356e-05, + "loss": 0.8204144239425659, + "step": 2652 + }, + { + "epoch": 0.9349779735682819, + "grad_norm": 1.389489538033466, + "learning_rate": 1.1899446644172106e-05, + "loss": 0.6184309720993042, + "step": 2653 + }, + { + "epoch": 0.9353303964757709, + "grad_norm": 2.1507675107714306, + "learning_rate": 1.1893725996939831e-05, + "loss": 0.7499250173568726, + "step": 2654 + }, + { + "epoch": 0.9356828193832599, + "grad_norm": 1.739709417281562, + "learning_rate": 1.1888004706846519e-05, + "loss": 0.7021058797836304, + "step": 2655 + }, + { + "epoch": 0.9360352422907489, + "grad_norm": 1.4311959050457856, + "learning_rate": 1.188228277583436e-05, + "loss": 0.6005666255950928, + "step": 2656 + }, + { + "epoch": 0.9363876651982379, + "grad_norm": 1.4910024814198868, + "learning_rate": 1.1876560205845782e-05, + "loss": 0.6572481393814087, + "step": 2657 + }, + { + "epoch": 0.9367400881057268, + "grad_norm": 1.5258435486694566, + "learning_rate": 1.187083699882341e-05, + "loss": 0.7402434349060059, + "step": 2658 + }, + { + "epoch": 0.9370925110132159, + "grad_norm": 1.4352893489445113, + "learning_rate": 1.1865113156710106e-05, + "loss": 0.6693596243858337, + "step": 2659 + }, + { + "epoch": 0.9374449339207048, + "grad_norm": 1.6704808140330663, + "learning_rate": 1.1859388681448925e-05, + "loss": 0.7708617448806763, + "step": 2660 + }, + { + "epoch": 0.9377973568281939, + "grad_norm": 1.4245143913781195, + "learning_rate": 1.1853663574983154e-05, + "loss": 0.5871701836585999, + "step": 2661 + }, + { + "epoch": 0.9381497797356828, + "grad_norm": 1.505716027406483, + "learning_rate": 1.1847937839256287e-05, + "loss": 0.6492994427680969, + "step": 2662 + }, + { + "epoch": 0.9385022026431719, + "grad_norm": 1.3908643684674444, + "learning_rate": 1.1842211476212038e-05, + "loss": 0.6803429126739502, + "step": 2663 + }, + { + "epoch": 0.9388546255506608, + "grad_norm": 1.5017846140199234, + "learning_rate": 1.1836484487794318e-05, + "loss": 0.5602244734764099, + "step": 2664 + }, + { + "epoch": 0.9392070484581497, + "grad_norm": 1.2797636855685697, + "learning_rate": 1.183075687594727e-05, + "loss": 0.6562157869338989, + "step": 2665 + }, + { + "epoch": 0.9395594713656388, + "grad_norm": 1.4855818018568143, + "learning_rate": 1.182502864261524e-05, + "loss": 0.71474289894104, + "step": 2666 + }, + { + "epoch": 0.9399118942731277, + "grad_norm": 1.5995143445420303, + "learning_rate": 1.1819299789742782e-05, + "loss": 0.7130062580108643, + "step": 2667 + }, + { + "epoch": 0.9402643171806168, + "grad_norm": 1.645740195320987, + "learning_rate": 1.1813570319274663e-05, + "loss": 0.788813054561615, + "step": 2668 + }, + { + "epoch": 0.9406167400881057, + "grad_norm": 1.965041520497338, + "learning_rate": 1.1807840233155863e-05, + "loss": 0.6485022306442261, + "step": 2669 + }, + { + "epoch": 0.9409691629955947, + "grad_norm": 1.6399057690578631, + "learning_rate": 1.1802109533331562e-05, + "loss": 0.4491521418094635, + "step": 2670 + }, + { + "epoch": 0.9413215859030837, + "grad_norm": 1.6744760497066637, + "learning_rate": 1.1796378221747162e-05, + "loss": 0.6073683500289917, + "step": 2671 + }, + { + "epoch": 0.9416740088105727, + "grad_norm": 1.859395754773969, + "learning_rate": 1.179064630034826e-05, + "loss": 0.5942971706390381, + "step": 2672 + }, + { + "epoch": 0.9420264317180617, + "grad_norm": 1.4303169952284007, + "learning_rate": 1.1784913771080667e-05, + "loss": 0.7295013666152954, + "step": 2673 + }, + { + "epoch": 0.9423788546255507, + "grad_norm": 1.8192026049611665, + "learning_rate": 1.1779180635890394e-05, + "loss": 0.7347372770309448, + "step": 2674 + }, + { + "epoch": 0.9427312775330396, + "grad_norm": 1.5350977995485566, + "learning_rate": 1.1773446896723668e-05, + "loss": 0.5591942667961121, + "step": 2675 + }, + { + "epoch": 0.9430837004405286, + "grad_norm": 1.5036340589436215, + "learning_rate": 1.1767712555526911e-05, + "loss": 0.822568953037262, + "step": 2676 + }, + { + "epoch": 0.9434361233480176, + "grad_norm": 1.4619836017557306, + "learning_rate": 1.1761977614246757e-05, + "loss": 0.649920642375946, + "step": 2677 + }, + { + "epoch": 0.9437885462555066, + "grad_norm": 1.4884584586985279, + "learning_rate": 1.1756242074830036e-05, + "loss": 0.6298861503601074, + "step": 2678 + }, + { + "epoch": 0.9441409691629956, + "grad_norm": 1.6194483495779424, + "learning_rate": 1.1750505939223787e-05, + "loss": 0.81938636302948, + "step": 2679 + }, + { + "epoch": 0.9444933920704845, + "grad_norm": 1.4751430048371623, + "learning_rate": 1.1744769209375248e-05, + "loss": 0.6627225875854492, + "step": 2680 + }, + { + "epoch": 0.9448458149779736, + "grad_norm": 1.310837287475738, + "learning_rate": 1.1739031887231864e-05, + "loss": 0.6563318371772766, + "step": 2681 + }, + { + "epoch": 0.9451982378854625, + "grad_norm": 1.3782616320804129, + "learning_rate": 1.1733293974741273e-05, + "loss": 0.5702694654464722, + "step": 2682 + }, + { + "epoch": 0.9455506607929516, + "grad_norm": 1.5543579440741437, + "learning_rate": 1.1727555473851321e-05, + "loss": 0.685553789138794, + "step": 2683 + }, + { + "epoch": 0.9459030837004405, + "grad_norm": 1.2085432227797441, + "learning_rate": 1.172181638651005e-05, + "loss": 0.6092622876167297, + "step": 2684 + }, + { + "epoch": 0.9462555066079296, + "grad_norm": 2.0946243925185013, + "learning_rate": 1.1716076714665701e-05, + "loss": 0.6650614738464355, + "step": 2685 + }, + { + "epoch": 0.9466079295154185, + "grad_norm": 1.6479809864443196, + "learning_rate": 1.171033646026671e-05, + "loss": 0.7665754556655884, + "step": 2686 + }, + { + "epoch": 0.9469603524229074, + "grad_norm": 1.3199886923676785, + "learning_rate": 1.1704595625261722e-05, + "loss": 0.6365277171134949, + "step": 2687 + }, + { + "epoch": 0.9473127753303965, + "grad_norm": 1.4825934002405374, + "learning_rate": 1.1698854211599565e-05, + "loss": 0.6622267961502075, + "step": 2688 + }, + { + "epoch": 0.9476651982378854, + "grad_norm": 1.4519347010464663, + "learning_rate": 1.1693112221229278e-05, + "loss": 0.6636145710945129, + "step": 2689 + }, + { + "epoch": 0.9480176211453745, + "grad_norm": 1.3381328445735352, + "learning_rate": 1.168736965610008e-05, + "loss": 0.6943212747573853, + "step": 2690 + }, + { + "epoch": 0.9483700440528634, + "grad_norm": 1.5439836232478343, + "learning_rate": 1.1681626518161397e-05, + "loss": 0.7479512691497803, + "step": 2691 + }, + { + "epoch": 0.9487224669603525, + "grad_norm": 1.5424571304173897, + "learning_rate": 1.1675882809362846e-05, + "loss": 0.7227041721343994, + "step": 2692 + }, + { + "epoch": 0.9490748898678414, + "grad_norm": 1.3855049912904343, + "learning_rate": 1.1670138531654238e-05, + "loss": 0.7366166114807129, + "step": 2693 + }, + { + "epoch": 0.9494273127753304, + "grad_norm": 1.634945701470733, + "learning_rate": 1.1664393686985571e-05, + "loss": 0.8634493350982666, + "step": 2694 + }, + { + "epoch": 0.9497797356828194, + "grad_norm": 1.3102748532201536, + "learning_rate": 1.165864827730705e-05, + "loss": 0.5802862048149109, + "step": 2695 + }, + { + "epoch": 0.9501321585903084, + "grad_norm": 1.571840947668404, + "learning_rate": 1.1652902304569053e-05, + "loss": 0.5931085348129272, + "step": 2696 + }, + { + "epoch": 0.9504845814977974, + "grad_norm": 1.7175179856841813, + "learning_rate": 1.164715577072217e-05, + "loss": 0.7684508562088013, + "step": 2697 + }, + { + "epoch": 0.9508370044052863, + "grad_norm": 1.6094834386500196, + "learning_rate": 1.1641408677717158e-05, + "loss": 0.94246906042099, + "step": 2698 + }, + { + "epoch": 0.9511894273127753, + "grad_norm": 1.3999360216133725, + "learning_rate": 1.1635661027504985e-05, + "loss": 0.7072316408157349, + "step": 2699 + }, + { + "epoch": 0.9515418502202643, + "grad_norm": 1.5926279454886292, + "learning_rate": 1.16299128220368e-05, + "loss": 0.5872572064399719, + "step": 2700 + }, + { + "epoch": 0.9518942731277533, + "grad_norm": 1.4987885212929257, + "learning_rate": 1.1624164063263931e-05, + "loss": 0.6549060344696045, + "step": 2701 + }, + { + "epoch": 0.9522466960352423, + "grad_norm": 1.6773153304869155, + "learning_rate": 1.161841475313791e-05, + "loss": 0.7338137626647949, + "step": 2702 + }, + { + "epoch": 0.9525991189427313, + "grad_norm": 1.6523970676343225, + "learning_rate": 1.161266489361045e-05, + "loss": 0.6942911148071289, + "step": 2703 + }, + { + "epoch": 0.9529515418502202, + "grad_norm": 2.037450532351288, + "learning_rate": 1.1606914486633444e-05, + "loss": 0.674375057220459, + "step": 2704 + }, + { + "epoch": 0.9533039647577093, + "grad_norm": 1.6450610385875453, + "learning_rate": 1.160116353415898e-05, + "loss": 0.6790377497673035, + "step": 2705 + }, + { + "epoch": 0.9536563876651982, + "grad_norm": 1.6724856793361191, + "learning_rate": 1.1595412038139326e-05, + "loss": 0.5902142524719238, + "step": 2706 + }, + { + "epoch": 0.9540088105726873, + "grad_norm": 1.4286047469499437, + "learning_rate": 1.1589660000526937e-05, + "loss": 0.7034019231796265, + "step": 2707 + }, + { + "epoch": 0.9543612334801762, + "grad_norm": 3.1062423334867106, + "learning_rate": 1.158390742327445e-05, + "loss": 0.6986846923828125, + "step": 2708 + }, + { + "epoch": 0.9547136563876651, + "grad_norm": 1.8367783325674814, + "learning_rate": 1.1578154308334683e-05, + "loss": 0.6972544193267822, + "step": 2709 + }, + { + "epoch": 0.9550660792951542, + "grad_norm": 1.3370474194561557, + "learning_rate": 1.1572400657660646e-05, + "loss": 0.6312702298164368, + "step": 2710 + }, + { + "epoch": 0.9554185022026431, + "grad_norm": 1.7161015062577845, + "learning_rate": 1.1566646473205518e-05, + "loss": 0.7584360241889954, + "step": 2711 + }, + { + "epoch": 0.9557709251101322, + "grad_norm": 1.256436023255263, + "learning_rate": 1.156089175692267e-05, + "loss": 0.700894296169281, + "step": 2712 + }, + { + "epoch": 0.9561233480176211, + "grad_norm": 1.3257581819044393, + "learning_rate": 1.1555136510765645e-05, + "loss": 0.5637902617454529, + "step": 2713 + }, + { + "epoch": 0.9564757709251102, + "grad_norm": 1.388319575976614, + "learning_rate": 1.1549380736688173e-05, + "loss": 0.4537314772605896, + "step": 2714 + }, + { + "epoch": 0.9568281938325991, + "grad_norm": 1.8324279373886256, + "learning_rate": 1.1543624436644161e-05, + "loss": 0.7880423069000244, + "step": 2715 + }, + { + "epoch": 0.9571806167400881, + "grad_norm": 1.6310441104063826, + "learning_rate": 1.1537867612587692e-05, + "loss": 0.7314344644546509, + "step": 2716 + }, + { + "epoch": 0.9575330396475771, + "grad_norm": 1.7810937354544796, + "learning_rate": 1.1532110266473026e-05, + "loss": 0.9550024271011353, + "step": 2717 + }, + { + "epoch": 0.9578854625550661, + "grad_norm": 1.3474455317445524, + "learning_rate": 1.152635240025461e-05, + "loss": 0.6482470035552979, + "step": 2718 + }, + { + "epoch": 0.9582378854625551, + "grad_norm": 1.6637520992254753, + "learning_rate": 1.152059401588705e-05, + "loss": 0.6347365975379944, + "step": 2719 + }, + { + "epoch": 0.958590308370044, + "grad_norm": 1.469780222161662, + "learning_rate": 1.151483511532515e-05, + "loss": 0.7214993238449097, + "step": 2720 + }, + { + "epoch": 0.958942731277533, + "grad_norm": 1.4597118679681749, + "learning_rate": 1.1509075700523869e-05, + "loss": 0.6255312561988831, + "step": 2721 + }, + { + "epoch": 0.959295154185022, + "grad_norm": 1.4735593911126945, + "learning_rate": 1.1503315773438352e-05, + "loss": 0.6152437925338745, + "step": 2722 + }, + { + "epoch": 0.959647577092511, + "grad_norm": 1.8178378627357112, + "learning_rate": 1.1497555336023916e-05, + "loss": 0.6565401554107666, + "step": 2723 + }, + { + "epoch": 0.96, + "grad_norm": 1.5268947365741583, + "learning_rate": 1.1491794390236047e-05, + "loss": 0.796178936958313, + "step": 2724 + }, + { + "epoch": 0.960352422907489, + "grad_norm": 1.4289859748860345, + "learning_rate": 1.1486032938030409e-05, + "loss": 0.6243436336517334, + "step": 2725 + }, + { + "epoch": 0.960704845814978, + "grad_norm": 3.1702620206811036, + "learning_rate": 1.148027098136284e-05, + "loss": 0.6043159365653992, + "step": 2726 + }, + { + "epoch": 0.961057268722467, + "grad_norm": 2.2643023721896554, + "learning_rate": 1.1474508522189334e-05, + "loss": 0.7268002033233643, + "step": 2727 + }, + { + "epoch": 0.9614096916299559, + "grad_norm": 1.6105062692265093, + "learning_rate": 1.1468745562466076e-05, + "loss": 0.6156840324401855, + "step": 2728 + }, + { + "epoch": 0.961762114537445, + "grad_norm": 1.3602355982897767, + "learning_rate": 1.1462982104149409e-05, + "loss": 0.8415796756744385, + "step": 2729 + }, + { + "epoch": 0.9621145374449339, + "grad_norm": 1.7603646172978014, + "learning_rate": 1.145721814919585e-05, + "loss": 0.5983521342277527, + "step": 2730 + }, + { + "epoch": 0.962466960352423, + "grad_norm": 1.6358592349658665, + "learning_rate": 1.1451453699562077e-05, + "loss": 0.6144511699676514, + "step": 2731 + }, + { + "epoch": 0.9628193832599119, + "grad_norm": 1.66844617820458, + "learning_rate": 1.1445688757204942e-05, + "loss": 0.6449630260467529, + "step": 2732 + }, + { + "epoch": 0.9631718061674008, + "grad_norm": 1.5343236560799753, + "learning_rate": 1.1439923324081465e-05, + "loss": 0.7321716547012329, + "step": 2733 + }, + { + "epoch": 0.9635242290748899, + "grad_norm": 1.9877317345810759, + "learning_rate": 1.1434157402148838e-05, + "loss": 0.8354923129081726, + "step": 2734 + }, + { + "epoch": 0.9638766519823788, + "grad_norm": 1.3653549857555707, + "learning_rate": 1.14283909933644e-05, + "loss": 0.728820264339447, + "step": 2735 + }, + { + "epoch": 0.9642290748898679, + "grad_norm": 1.4013626479373464, + "learning_rate": 1.1422624099685675e-05, + "loss": 0.6683202981948853, + "step": 2736 + }, + { + "epoch": 0.9645814977973568, + "grad_norm": 1.6203635868462385, + "learning_rate": 1.141685672307034e-05, + "loss": 0.7159590125083923, + "step": 2737 + }, + { + "epoch": 0.9649339207048458, + "grad_norm": 1.9197883933040156, + "learning_rate": 1.1411088865476245e-05, + "loss": 0.8269981145858765, + "step": 2738 + }, + { + "epoch": 0.9652863436123348, + "grad_norm": 1.7561037821195844, + "learning_rate": 1.1405320528861393e-05, + "loss": 0.6993168592453003, + "step": 2739 + }, + { + "epoch": 0.9656387665198238, + "grad_norm": 1.4700171152077626, + "learning_rate": 1.1399551715183956e-05, + "loss": 0.6296184062957764, + "step": 2740 + }, + { + "epoch": 0.9659911894273128, + "grad_norm": 1.5505746175576802, + "learning_rate": 1.1393782426402267e-05, + "loss": 0.670283317565918, + "step": 2741 + }, + { + "epoch": 0.9663436123348018, + "grad_norm": 1.6125051339337373, + "learning_rate": 1.1388012664474824e-05, + "loss": 0.9248946905136108, + "step": 2742 + }, + { + "epoch": 0.9666960352422908, + "grad_norm": 1.7027770081175677, + "learning_rate": 1.1382242431360272e-05, + "loss": 0.7965992093086243, + "step": 2743 + }, + { + "epoch": 0.9670484581497797, + "grad_norm": 1.6413263453773168, + "learning_rate": 1.1376471729017435e-05, + "loss": 0.632454514503479, + "step": 2744 + }, + { + "epoch": 0.9674008810572687, + "grad_norm": 1.4364322830343181, + "learning_rate": 1.1370700559405283e-05, + "loss": 0.6463649272918701, + "step": 2745 + }, + { + "epoch": 0.9677533039647577, + "grad_norm": 1.5890798975591325, + "learning_rate": 1.1364928924482952e-05, + "loss": 0.5864677429199219, + "step": 2746 + }, + { + "epoch": 0.9681057268722467, + "grad_norm": 1.5090045708209912, + "learning_rate": 1.1359156826209726e-05, + "loss": 0.6313967108726501, + "step": 2747 + }, + { + "epoch": 0.9684581497797357, + "grad_norm": 1.2634359711899723, + "learning_rate": 1.1353384266545056e-05, + "loss": 0.5736903548240662, + "step": 2748 + }, + { + "epoch": 0.9688105726872247, + "grad_norm": 1.3956693120918684, + "learning_rate": 1.1347611247448544e-05, + "loss": 0.672286868095398, + "step": 2749 + }, + { + "epoch": 0.9691629955947136, + "grad_norm": 1.7905269273993527, + "learning_rate": 1.1341837770879957e-05, + "loss": 0.7181379795074463, + "step": 2750 + }, + { + "epoch": 0.9695154185022027, + "grad_norm": 1.3192307426609728, + "learning_rate": 1.1336063838799204e-05, + "loss": 0.6160816550254822, + "step": 2751 + }, + { + "epoch": 0.9698678414096916, + "grad_norm": 1.3858752821091025, + "learning_rate": 1.1330289453166361e-05, + "loss": 0.737337589263916, + "step": 2752 + }, + { + "epoch": 0.9702202643171807, + "grad_norm": 1.4067461052680075, + "learning_rate": 1.1324514615941644e-05, + "loss": 0.6752150058746338, + "step": 2753 + }, + { + "epoch": 0.9705726872246696, + "grad_norm": 1.502210352579975, + "learning_rate": 1.1318739329085438e-05, + "loss": 0.6917784214019775, + "step": 2754 + }, + { + "epoch": 0.9709251101321585, + "grad_norm": 1.873477988490531, + "learning_rate": 1.131296359455827e-05, + "loss": 0.7863353490829468, + "step": 2755 + }, + { + "epoch": 0.9712775330396476, + "grad_norm": 1.338648959960645, + "learning_rate": 1.1307187414320823e-05, + "loss": 0.6236519813537598, + "step": 2756 + }, + { + "epoch": 0.9716299559471365, + "grad_norm": 1.443196389025093, + "learning_rate": 1.130141079033393e-05, + "loss": 0.6957560181617737, + "step": 2757 + }, + { + "epoch": 0.9719823788546256, + "grad_norm": 1.6687230505642796, + "learning_rate": 1.1295633724558574e-05, + "loss": 0.6460270881652832, + "step": 2758 + }, + { + "epoch": 0.9723348017621145, + "grad_norm": 1.4575621917812085, + "learning_rate": 1.1289856218955892e-05, + "loss": 0.7352741956710815, + "step": 2759 + }, + { + "epoch": 0.9726872246696036, + "grad_norm": 1.7999835448567072, + "learning_rate": 1.1284078275487165e-05, + "loss": 0.6285911798477173, + "step": 2760 + }, + { + "epoch": 0.9730396475770925, + "grad_norm": 1.4280819376163427, + "learning_rate": 1.1278299896113823e-05, + "loss": 0.6577984094619751, + "step": 2761 + }, + { + "epoch": 0.9733920704845815, + "grad_norm": 1.4424142490511096, + "learning_rate": 1.1272521082797452e-05, + "loss": 0.6445770859718323, + "step": 2762 + }, + { + "epoch": 0.9737444933920705, + "grad_norm": 1.3911141072298185, + "learning_rate": 1.1266741837499773e-05, + "loss": 0.557687520980835, + "step": 2763 + }, + { + "epoch": 0.9740969162995595, + "grad_norm": 1.559776829553993, + "learning_rate": 1.1260962162182664e-05, + "loss": 0.6117650866508484, + "step": 2764 + }, + { + "epoch": 0.9744493392070485, + "grad_norm": 1.4751836492364416, + "learning_rate": 1.1255182058808143e-05, + "loss": 0.6498113870620728, + "step": 2765 + }, + { + "epoch": 0.9748017621145374, + "grad_norm": 1.9707928584824135, + "learning_rate": 1.1249401529338375e-05, + "loss": 0.8738062381744385, + "step": 2766 + }, + { + "epoch": 0.9751541850220264, + "grad_norm": 1.6389865398372674, + "learning_rate": 1.1243620575735672e-05, + "loss": 0.551408052444458, + "step": 2767 + }, + { + "epoch": 0.9755066079295154, + "grad_norm": 1.645802380531443, + "learning_rate": 1.1237839199962488e-05, + "loss": 0.7197355031967163, + "step": 2768 + }, + { + "epoch": 0.9758590308370044, + "grad_norm": 1.5393826706252047, + "learning_rate": 1.1232057403981415e-05, + "loss": 0.5704015493392944, + "step": 2769 + }, + { + "epoch": 0.9762114537444934, + "grad_norm": 1.373872634740153, + "learning_rate": 1.1226275189755199e-05, + "loss": 0.603929877281189, + "step": 2770 + }, + { + "epoch": 0.9765638766519824, + "grad_norm": 1.731229349756288, + "learning_rate": 1.1220492559246719e-05, + "loss": 0.8652673363685608, + "step": 2771 + }, + { + "epoch": 0.9769162995594713, + "grad_norm": 1.5891679358388853, + "learning_rate": 1.1214709514418998e-05, + "loss": 0.6827684044837952, + "step": 2772 + }, + { + "epoch": 0.9772687224669604, + "grad_norm": 1.3323036683469254, + "learning_rate": 1.1208926057235197e-05, + "loss": 0.5584808588027954, + "step": 2773 + }, + { + "epoch": 0.9776211453744493, + "grad_norm": 1.5495557729443614, + "learning_rate": 1.1203142189658627e-05, + "loss": 0.7242820262908936, + "step": 2774 + }, + { + "epoch": 0.9779735682819384, + "grad_norm": 1.3489108616226997, + "learning_rate": 1.1197357913652725e-05, + "loss": 0.5299571752548218, + "step": 2775 + }, + { + "epoch": 0.9783259911894273, + "grad_norm": 1.8541326435971137, + "learning_rate": 1.1191573231181074e-05, + "loss": 0.69478440284729, + "step": 2776 + }, + { + "epoch": 0.9786784140969162, + "grad_norm": 1.540885425711554, + "learning_rate": 1.1185788144207394e-05, + "loss": 0.6997090578079224, + "step": 2777 + }, + { + "epoch": 0.9790308370044053, + "grad_norm": 1.422432956680528, + "learning_rate": 1.1180002654695543e-05, + "loss": 0.6882679462432861, + "step": 2778 + }, + { + "epoch": 0.9793832599118942, + "grad_norm": 1.5811365233101125, + "learning_rate": 1.1174216764609514e-05, + "loss": 0.6434916257858276, + "step": 2779 + }, + { + "epoch": 0.9797356828193833, + "grad_norm": 1.5811226707061032, + "learning_rate": 1.1168430475913437e-05, + "loss": 0.6614376902580261, + "step": 2780 + }, + { + "epoch": 0.9800881057268722, + "grad_norm": 1.380437766979243, + "learning_rate": 1.1162643790571574e-05, + "loss": 0.6440471410751343, + "step": 2781 + }, + { + "epoch": 0.9804405286343613, + "grad_norm": 1.6997398594970703, + "learning_rate": 1.1156856710548327e-05, + "loss": 0.6493573188781738, + "step": 2782 + }, + { + "epoch": 0.9807929515418502, + "grad_norm": 1.5246321952125226, + "learning_rate": 1.1151069237808231e-05, + "loss": 0.660174548625946, + "step": 2783 + }, + { + "epoch": 0.9811453744493392, + "grad_norm": 1.7392611870715098, + "learning_rate": 1.1145281374315953e-05, + "loss": 0.8041812181472778, + "step": 2784 + }, + { + "epoch": 0.9814977973568282, + "grad_norm": 1.3479949919135392, + "learning_rate": 1.1139493122036289e-05, + "loss": 0.4758625030517578, + "step": 2785 + }, + { + "epoch": 0.9818502202643172, + "grad_norm": 1.6334305751982239, + "learning_rate": 1.113370448293417e-05, + "loss": 0.6482613682746887, + "step": 2786 + }, + { + "epoch": 0.9822026431718062, + "grad_norm": 1.475447708954463, + "learning_rate": 1.1127915458974665e-05, + "loss": 0.6911569237709045, + "step": 2787 + }, + { + "epoch": 0.9825550660792951, + "grad_norm": 1.362340888945518, + "learning_rate": 1.1122126052122963e-05, + "loss": 0.6851824522018433, + "step": 2788 + }, + { + "epoch": 0.9829074889867842, + "grad_norm": 1.5792587066367831, + "learning_rate": 1.111633626434439e-05, + "loss": 0.6405081748962402, + "step": 2789 + }, + { + "epoch": 0.9832599118942731, + "grad_norm": 1.5781550908818451, + "learning_rate": 1.1110546097604391e-05, + "loss": 0.7064476013183594, + "step": 2790 + }, + { + "epoch": 0.9836123348017621, + "grad_norm": 1.4647903320195184, + "learning_rate": 1.1104755553868559e-05, + "loss": 0.641350269317627, + "step": 2791 + }, + { + "epoch": 0.9839647577092511, + "grad_norm": 1.4142953897430577, + "learning_rate": 1.1098964635102597e-05, + "loss": 0.748977780342102, + "step": 2792 + }, + { + "epoch": 0.9843171806167401, + "grad_norm": 1.3989289975006294, + "learning_rate": 1.1093173343272342e-05, + "loss": 0.6033440828323364, + "step": 2793 + }, + { + "epoch": 0.984669603524229, + "grad_norm": 1.2877663440814373, + "learning_rate": 1.1087381680343754e-05, + "loss": 0.5684633255004883, + "step": 2794 + }, + { + "epoch": 0.9850220264317181, + "grad_norm": 1.5189384787980884, + "learning_rate": 1.1081589648282928e-05, + "loss": 0.7041289210319519, + "step": 2795 + }, + { + "epoch": 0.985374449339207, + "grad_norm": 1.5616342989862266, + "learning_rate": 1.1075797249056079e-05, + "loss": 0.7189786434173584, + "step": 2796 + }, + { + "epoch": 0.9857268722466961, + "grad_norm": 1.534620191791425, + "learning_rate": 1.1070004484629543e-05, + "loss": 0.5114344358444214, + "step": 2797 + }, + { + "epoch": 0.986079295154185, + "grad_norm": 1.6541092784437663, + "learning_rate": 1.1064211356969782e-05, + "loss": 0.5897136926651001, + "step": 2798 + }, + { + "epoch": 0.986431718061674, + "grad_norm": 1.5980123151797752, + "learning_rate": 1.1058417868043387e-05, + "loss": 0.8490760326385498, + "step": 2799 + }, + { + "epoch": 0.986784140969163, + "grad_norm": 1.5100542298165633, + "learning_rate": 1.1052624019817065e-05, + "loss": 0.6392524242401123, + "step": 2800 + }, + { + "epoch": 0.9871365638766519, + "grad_norm": 1.5630522519900902, + "learning_rate": 1.104682981425765e-05, + "loss": 0.7267303466796875, + "step": 2801 + }, + { + "epoch": 0.987488986784141, + "grad_norm": 1.5413815660334662, + "learning_rate": 1.1041035253332087e-05, + "loss": 0.6622469425201416, + "step": 2802 + }, + { + "epoch": 0.9878414096916299, + "grad_norm": 1.4547931829788883, + "learning_rate": 1.1035240339007454e-05, + "loss": 0.643883466720581, + "step": 2803 + }, + { + "epoch": 0.988193832599119, + "grad_norm": 1.4919310534649226, + "learning_rate": 1.1029445073250945e-05, + "loss": 0.6281142234802246, + "step": 2804 + }, + { + "epoch": 0.9885462555066079, + "grad_norm": 1.606048707782168, + "learning_rate": 1.1023649458029873e-05, + "loss": 0.6356241703033447, + "step": 2805 + }, + { + "epoch": 0.988898678414097, + "grad_norm": 1.7018688321982895, + "learning_rate": 1.1017853495311664e-05, + "loss": 0.8118115663528442, + "step": 2806 + }, + { + "epoch": 0.9892511013215859, + "grad_norm": 1.4779776881835476, + "learning_rate": 1.1012057187063872e-05, + "loss": 0.7673395276069641, + "step": 2807 + }, + { + "epoch": 0.9896035242290749, + "grad_norm": 1.5158382122898324, + "learning_rate": 1.1006260535254159e-05, + "loss": 0.6617262959480286, + "step": 2808 + }, + { + "epoch": 0.9899559471365639, + "grad_norm": 1.7342419352159402, + "learning_rate": 1.1000463541850315e-05, + "loss": 0.537519097328186, + "step": 2809 + }, + { + "epoch": 0.9903083700440528, + "grad_norm": 1.8093297060046025, + "learning_rate": 1.0994666208820229e-05, + "loss": 0.6281024813652039, + "step": 2810 + }, + { + "epoch": 0.9906607929515419, + "grad_norm": 1.4111971416204439, + "learning_rate": 1.0988868538131922e-05, + "loss": 0.7189136743545532, + "step": 2811 + }, + { + "epoch": 0.9910132158590308, + "grad_norm": 1.3844162550962045, + "learning_rate": 1.098307053175352e-05, + "loss": 0.622093677520752, + "step": 2812 + }, + { + "epoch": 0.9913656387665198, + "grad_norm": 1.4032650881900075, + "learning_rate": 1.0977272191653272e-05, + "loss": 0.6774802207946777, + "step": 2813 + }, + { + "epoch": 0.9917180616740088, + "grad_norm": 1.490303383982121, + "learning_rate": 1.0971473519799523e-05, + "loss": 0.5999646186828613, + "step": 2814 + }, + { + "epoch": 0.9920704845814978, + "grad_norm": 1.3508886274303966, + "learning_rate": 1.096567451816075e-05, + "loss": 0.6450619697570801, + "step": 2815 + }, + { + "epoch": 0.9924229074889868, + "grad_norm": 1.8693455627252262, + "learning_rate": 1.0959875188705529e-05, + "loss": 0.693134069442749, + "step": 2816 + }, + { + "epoch": 0.9927753303964758, + "grad_norm": 1.744167199385734, + "learning_rate": 1.0954075533402557e-05, + "loss": 0.8968616724014282, + "step": 2817 + }, + { + "epoch": 0.9931277533039647, + "grad_norm": 1.5750441805034816, + "learning_rate": 1.0948275554220632e-05, + "loss": 0.6114391088485718, + "step": 2818 + }, + { + "epoch": 0.9934801762114538, + "grad_norm": 1.3761860122661305, + "learning_rate": 1.0942475253128667e-05, + "loss": 0.7583796977996826, + "step": 2819 + }, + { + "epoch": 0.9938325991189427, + "grad_norm": 2.0494911253957735, + "learning_rate": 1.0936674632095683e-05, + "loss": 0.5683549046516418, + "step": 2820 + }, + { + "epoch": 0.9941850220264317, + "grad_norm": 1.4100630352107084, + "learning_rate": 1.0930873693090815e-05, + "loss": 0.5664689540863037, + "step": 2821 + }, + { + "epoch": 0.9945374449339207, + "grad_norm": 1.1859055454278844, + "learning_rate": 1.0925072438083296e-05, + "loss": 0.5799476504325867, + "step": 2822 + }, + { + "epoch": 0.9948898678414096, + "grad_norm": 1.4558284543811444, + "learning_rate": 1.0919270869042475e-05, + "loss": 0.6879112720489502, + "step": 2823 + }, + { + "epoch": 0.9952422907488987, + "grad_norm": 1.3673096151886848, + "learning_rate": 1.09134689879378e-05, + "loss": 0.6348927021026611, + "step": 2824 + }, + { + "epoch": 0.9955947136563876, + "grad_norm": 1.5301215006310536, + "learning_rate": 1.0907666796738839e-05, + "loss": 0.55754554271698, + "step": 2825 + }, + { + "epoch": 0.9959471365638767, + "grad_norm": 1.6611255848189581, + "learning_rate": 1.090186429741524e-05, + "loss": 0.6664899587631226, + "step": 2826 + }, + { + "epoch": 0.9962995594713656, + "grad_norm": 1.3580224067934683, + "learning_rate": 1.0896061491936782e-05, + "loss": 0.6521929502487183, + "step": 2827 + }, + { + "epoch": 0.9966519823788547, + "grad_norm": 1.4217882734660863, + "learning_rate": 1.0890258382273333e-05, + "loss": 0.542471170425415, + "step": 2828 + }, + { + "epoch": 0.9970044052863436, + "grad_norm": 1.3242120868836005, + "learning_rate": 1.0884454970394871e-05, + "loss": 0.60117506980896, + "step": 2829 + }, + { + "epoch": 0.9973568281938326, + "grad_norm": 1.5563969946549858, + "learning_rate": 1.0878651258271471e-05, + "loss": 0.6783676147460938, + "step": 2830 + }, + { + "epoch": 0.9977092511013216, + "grad_norm": 1.4867095260992749, + "learning_rate": 1.0872847247873315e-05, + "loss": 0.7080766558647156, + "step": 2831 + }, + { + "epoch": 0.9980616740088105, + "grad_norm": 1.7595047000981443, + "learning_rate": 1.0867042941170677e-05, + "loss": 0.9228106141090393, + "step": 2832 + }, + { + "epoch": 0.9984140969162996, + "grad_norm": 1.749212162747955, + "learning_rate": 1.086123834013395e-05, + "loss": 0.7601282596588135, + "step": 2833 + }, + { + "epoch": 0.9987665198237885, + "grad_norm": 1.388473564306277, + "learning_rate": 1.0855433446733607e-05, + "loss": 0.7101393342018127, + "step": 2834 + }, + { + "epoch": 0.9991189427312775, + "grad_norm": 1.426665891638417, + "learning_rate": 1.084962826294023e-05, + "loss": 0.5006242394447327, + "step": 2835 + }, + { + "epoch": 0.9994713656387665, + "grad_norm": 1.6063601330711992, + "learning_rate": 1.08438227907245e-05, + "loss": 0.7270148992538452, + "step": 2836 + }, + { + "epoch": 0.9998237885462555, + "grad_norm": 1.5770914971205114, + "learning_rate": 1.0838017032057194e-05, + "loss": 0.7252628803253174, + "step": 2837 + }, + { + "epoch": 1.0, + "grad_norm": 2.9062070384731578, + "learning_rate": 1.0832210988909187e-05, + "loss": 0.4579252004623413, + "step": 2838 + }, + { + "epoch": 1.000352422907489, + "grad_norm": 1.410073366222354, + "learning_rate": 1.0826404663251446e-05, + "loss": 0.635676920413971, + "step": 2839 + }, + { + "epoch": 1.0007048458149779, + "grad_norm": 1.5085425099131595, + "learning_rate": 1.0820598057055039e-05, + "loss": 0.6083015203475952, + "step": 2840 + }, + { + "epoch": 1.001057268722467, + "grad_norm": 1.2571881093552235, + "learning_rate": 1.0814791172291132e-05, + "loss": 0.5641704797744751, + "step": 2841 + }, + { + "epoch": 1.001409691629956, + "grad_norm": 1.448254627835315, + "learning_rate": 1.0808984010930981e-05, + "loss": 0.7668559551239014, + "step": 2842 + }, + { + "epoch": 1.001762114537445, + "grad_norm": 1.7836674103878665, + "learning_rate": 1.0803176574945933e-05, + "loss": 0.5205796957015991, + "step": 2843 + }, + { + "epoch": 1.0021145374449338, + "grad_norm": 1.2460568970106132, + "learning_rate": 1.0797368866307431e-05, + "loss": 0.6771252155303955, + "step": 2844 + }, + { + "epoch": 1.002466960352423, + "grad_norm": 1.3246167691239887, + "learning_rate": 1.0791560886987016e-05, + "loss": 0.6101677417755127, + "step": 2845 + }, + { + "epoch": 1.002819383259912, + "grad_norm": 1.683370422985012, + "learning_rate": 1.0785752638956315e-05, + "loss": 0.5651522874832153, + "step": 2846 + }, + { + "epoch": 1.0031718061674009, + "grad_norm": 1.3543139981801942, + "learning_rate": 1.0779944124187048e-05, + "loss": 0.6814571619033813, + "step": 2847 + }, + { + "epoch": 1.0035242290748898, + "grad_norm": 1.5579116379809095, + "learning_rate": 1.0774135344651023e-05, + "loss": 0.6786171197891235, + "step": 2848 + }, + { + "epoch": 1.003876651982379, + "grad_norm": 1.341282658364188, + "learning_rate": 1.0768326302320136e-05, + "loss": 0.5244907736778259, + "step": 2849 + }, + { + "epoch": 1.004229074889868, + "grad_norm": 1.5100504884551087, + "learning_rate": 1.0762516999166383e-05, + "loss": 0.6368712186813354, + "step": 2850 + }, + { + "epoch": 1.0045814977973568, + "grad_norm": 1.3929085404961679, + "learning_rate": 1.0756707437161841e-05, + "loss": 0.6389411687850952, + "step": 2851 + }, + { + "epoch": 1.0049339207048458, + "grad_norm": 1.796913818431425, + "learning_rate": 1.0750897618278675e-05, + "loss": 0.6257550716400146, + "step": 2852 + }, + { + "epoch": 1.0052863436123347, + "grad_norm": 1.384078231158131, + "learning_rate": 1.0745087544489132e-05, + "loss": 0.49478042125701904, + "step": 2853 + }, + { + "epoch": 1.0056387665198239, + "grad_norm": 1.3713236142324383, + "learning_rate": 1.0739277217765558e-05, + "loss": 0.6350952386856079, + "step": 2854 + }, + { + "epoch": 1.0059911894273128, + "grad_norm": 1.4287669419061304, + "learning_rate": 1.0733466640080374e-05, + "loss": 0.6057480573654175, + "step": 2855 + }, + { + "epoch": 1.0063436123348017, + "grad_norm": 1.5646694084149986, + "learning_rate": 1.0727655813406094e-05, + "loss": 0.5545427799224854, + "step": 2856 + }, + { + "epoch": 1.0066960352422907, + "grad_norm": 1.371726691889951, + "learning_rate": 1.0721844739715311e-05, + "loss": 0.55484938621521, + "step": 2857 + }, + { + "epoch": 1.0070484581497798, + "grad_norm": 1.6325523903522516, + "learning_rate": 1.0716033420980703e-05, + "loss": 0.6889834403991699, + "step": 2858 + }, + { + "epoch": 1.0074008810572688, + "grad_norm": 1.928061303452338, + "learning_rate": 1.0710221859175031e-05, + "loss": 0.7259023189544678, + "step": 2859 + }, + { + "epoch": 1.0077533039647577, + "grad_norm": 1.7213820381224034, + "learning_rate": 1.0704410056271144e-05, + "loss": 0.6200032234191895, + "step": 2860 + }, + { + "epoch": 1.0081057268722466, + "grad_norm": 1.2488919699208767, + "learning_rate": 1.069859801424196e-05, + "loss": 0.5357909202575684, + "step": 2861 + }, + { + "epoch": 1.0084581497797356, + "grad_norm": 1.462725629247434, + "learning_rate": 1.0692785735060495e-05, + "loss": 0.8121966123580933, + "step": 2862 + }, + { + "epoch": 1.0088105726872247, + "grad_norm": 1.5047486906511685, + "learning_rate": 1.0686973220699834e-05, + "loss": 0.5698819160461426, + "step": 2863 + }, + { + "epoch": 1.0091629955947137, + "grad_norm": 1.3352019656375154, + "learning_rate": 1.0681160473133144e-05, + "loss": 0.6598206162452698, + "step": 2864 + }, + { + "epoch": 1.0095154185022026, + "grad_norm": 1.571854196128042, + "learning_rate": 1.0675347494333667e-05, + "loss": 0.7574363946914673, + "step": 2865 + }, + { + "epoch": 1.0098678414096915, + "grad_norm": 2.0265508752029007, + "learning_rate": 1.0669534286274737e-05, + "loss": 0.6749663949012756, + "step": 2866 + }, + { + "epoch": 1.0102202643171807, + "grad_norm": 1.5445692097493786, + "learning_rate": 1.0663720850929753e-05, + "loss": 0.5932409763336182, + "step": 2867 + }, + { + "epoch": 1.0105726872246696, + "grad_norm": 1.4883467064779885, + "learning_rate": 1.0657907190272197e-05, + "loss": 0.7070773839950562, + "step": 2868 + }, + { + "epoch": 1.0109251101321586, + "grad_norm": 1.6639794076635466, + "learning_rate": 1.0652093306275621e-05, + "loss": 0.531635582447052, + "step": 2869 + }, + { + "epoch": 1.0112775330396475, + "grad_norm": 1.5967103256398283, + "learning_rate": 1.0646279200913665e-05, + "loss": 0.5966447591781616, + "step": 2870 + }, + { + "epoch": 1.0116299559471367, + "grad_norm": 1.5047477869564347, + "learning_rate": 1.0640464876160033e-05, + "loss": 0.6308450698852539, + "step": 2871 + }, + { + "epoch": 1.0119823788546256, + "grad_norm": 1.6938927429813924, + "learning_rate": 1.0634650333988508e-05, + "loss": 0.6477035284042358, + "step": 2872 + }, + { + "epoch": 1.0123348017621145, + "grad_norm": 1.4725648899614407, + "learning_rate": 1.0628835576372942e-05, + "loss": 0.5856079459190369, + "step": 2873 + }, + { + "epoch": 1.0126872246696035, + "grad_norm": 1.6415031005435194, + "learning_rate": 1.062302060528727e-05, + "loss": 0.733691930770874, + "step": 2874 + }, + { + "epoch": 1.0130396475770924, + "grad_norm": 1.6528326658043055, + "learning_rate": 1.0617205422705495e-05, + "loss": 0.6020156145095825, + "step": 2875 + }, + { + "epoch": 1.0133920704845816, + "grad_norm": 1.5978613503890422, + "learning_rate": 1.0611390030601685e-05, + "loss": 0.4980982542037964, + "step": 2876 + }, + { + "epoch": 1.0137444933920705, + "grad_norm": 1.5178573200522583, + "learning_rate": 1.0605574430949983e-05, + "loss": 0.6498349905014038, + "step": 2877 + }, + { + "epoch": 1.0140969162995594, + "grad_norm": 1.7318519084472541, + "learning_rate": 1.0599758625724612e-05, + "loss": 0.6456383466720581, + "step": 2878 + }, + { + "epoch": 1.0144493392070484, + "grad_norm": 1.7056738628689527, + "learning_rate": 1.059394261689985e-05, + "loss": 0.6047386527061462, + "step": 2879 + }, + { + "epoch": 1.0148017621145375, + "grad_norm": 1.6633316847391189, + "learning_rate": 1.0588126406450056e-05, + "loss": 0.641674816608429, + "step": 2880 + }, + { + "epoch": 1.0151541850220265, + "grad_norm": 1.549495353719679, + "learning_rate": 1.0582309996349648e-05, + "loss": 0.6157702207565308, + "step": 2881 + }, + { + "epoch": 1.0155066079295154, + "grad_norm": 1.614686141937513, + "learning_rate": 1.057649338857312e-05, + "loss": 0.6004809737205505, + "step": 2882 + }, + { + "epoch": 1.0158590308370044, + "grad_norm": 1.460588924951717, + "learning_rate": 1.0570676585095028e-05, + "loss": 0.5534430742263794, + "step": 2883 + }, + { + "epoch": 1.0162114537444933, + "grad_norm": 2.0058626486485367, + "learning_rate": 1.0564859587889997e-05, + "loss": 0.7781813144683838, + "step": 2884 + }, + { + "epoch": 1.0165638766519824, + "grad_norm": 1.9228872779765243, + "learning_rate": 1.0559042398932713e-05, + "loss": 0.6949760913848877, + "step": 2885 + }, + { + "epoch": 1.0169162995594714, + "grad_norm": 1.51396598780538, + "learning_rate": 1.0553225020197932e-05, + "loss": 0.5718453526496887, + "step": 2886 + }, + { + "epoch": 1.0172687224669603, + "grad_norm": 1.7835909963123882, + "learning_rate": 1.0547407453660471e-05, + "loss": 0.6689345836639404, + "step": 2887 + }, + { + "epoch": 1.0176211453744493, + "grad_norm": 1.5559332596209525, + "learning_rate": 1.0541589701295222e-05, + "loss": 0.6615442037582397, + "step": 2888 + }, + { + "epoch": 1.0179735682819384, + "grad_norm": 1.4810070180145358, + "learning_rate": 1.0535771765077121e-05, + "loss": 0.6458337306976318, + "step": 2889 + }, + { + "epoch": 1.0183259911894273, + "grad_norm": 1.4770072284014752, + "learning_rate": 1.052995364698118e-05, + "loss": 0.5330519676208496, + "step": 2890 + }, + { + "epoch": 1.0186784140969163, + "grad_norm": 1.4780636522187705, + "learning_rate": 1.0524135348982467e-05, + "loss": 0.6219571232795715, + "step": 2891 + }, + { + "epoch": 1.0190308370044052, + "grad_norm": 1.4624191661889683, + "learning_rate": 1.0518316873056118e-05, + "loss": 0.6731684803962708, + "step": 2892 + }, + { + "epoch": 1.0193832599118944, + "grad_norm": 1.614741871357758, + "learning_rate": 1.0512498221177319e-05, + "loss": 0.6126813888549805, + "step": 2893 + }, + { + "epoch": 1.0197356828193833, + "grad_norm": 1.4895494518265573, + "learning_rate": 1.0506679395321325e-05, + "loss": 0.5796904563903809, + "step": 2894 + }, + { + "epoch": 1.0200881057268723, + "grad_norm": 1.5545739969005041, + "learning_rate": 1.050086039746344e-05, + "loss": 0.5765914916992188, + "step": 2895 + }, + { + "epoch": 1.0204405286343612, + "grad_norm": 1.3710954206781227, + "learning_rate": 1.0495041229579043e-05, + "loss": 0.4798969328403473, + "step": 2896 + }, + { + "epoch": 1.0207929515418501, + "grad_norm": 1.551476741605498, + "learning_rate": 1.0489221893643553e-05, + "loss": 0.673927366733551, + "step": 2897 + }, + { + "epoch": 1.0211453744493393, + "grad_norm": 1.6211129054938926, + "learning_rate": 1.0483402391632453e-05, + "loss": 0.5681431293487549, + "step": 2898 + }, + { + "epoch": 1.0214977973568282, + "grad_norm": 1.3128793329209902, + "learning_rate": 1.0477582725521287e-05, + "loss": 0.6156354546546936, + "step": 2899 + }, + { + "epoch": 1.0218502202643172, + "grad_norm": 1.4369078255379546, + "learning_rate": 1.0471762897285652e-05, + "loss": 0.6569045782089233, + "step": 2900 + }, + { + "epoch": 1.022202643171806, + "grad_norm": 1.4293089736412674, + "learning_rate": 1.046594290890119e-05, + "loss": 0.6125048995018005, + "step": 2901 + }, + { + "epoch": 1.0225550660792952, + "grad_norm": 1.6465466140905431, + "learning_rate": 1.0460122762343614e-05, + "loss": 0.604046106338501, + "step": 2902 + }, + { + "epoch": 1.0229074889867842, + "grad_norm": 1.5461286198100506, + "learning_rate": 1.0454302459588677e-05, + "loss": 0.4569816589355469, + "step": 2903 + }, + { + "epoch": 1.0232599118942731, + "grad_norm": 1.6187784923192434, + "learning_rate": 1.0448482002612194e-05, + "loss": 0.5764607787132263, + "step": 2904 + }, + { + "epoch": 1.023612334801762, + "grad_norm": 1.503585291483294, + "learning_rate": 1.044266139339003e-05, + "loss": 0.5859626531600952, + "step": 2905 + }, + { + "epoch": 1.023964757709251, + "grad_norm": 1.6642769825669268, + "learning_rate": 1.04368406338981e-05, + "loss": 0.7326341271400452, + "step": 2906 + }, + { + "epoch": 1.0243171806167402, + "grad_norm": 1.613324765385094, + "learning_rate": 1.0431019726112366e-05, + "loss": 0.6355161070823669, + "step": 2907 + }, + { + "epoch": 1.024669603524229, + "grad_norm": 1.5833367942965741, + "learning_rate": 1.0425198672008851e-05, + "loss": 0.6990653872489929, + "step": 2908 + }, + { + "epoch": 1.025022026431718, + "grad_norm": 2.3098262824716542, + "learning_rate": 1.0419377473563621e-05, + "loss": 0.631952166557312, + "step": 2909 + }, + { + "epoch": 1.025374449339207, + "grad_norm": 1.4397039525414863, + "learning_rate": 1.041355613275279e-05, + "loss": 0.4872596561908722, + "step": 2910 + }, + { + "epoch": 1.0257268722466961, + "grad_norm": 1.5222931253330352, + "learning_rate": 1.0407734651552522e-05, + "loss": 0.5334043502807617, + "step": 2911 + }, + { + "epoch": 1.026079295154185, + "grad_norm": 1.5817730675020623, + "learning_rate": 1.0401913031939026e-05, + "loss": 0.5971134305000305, + "step": 2912 + }, + { + "epoch": 1.026431718061674, + "grad_norm": 1.7562208471394358, + "learning_rate": 1.0396091275888567e-05, + "loss": 0.6527851819992065, + "step": 2913 + }, + { + "epoch": 1.026784140969163, + "grad_norm": 1.5387477454353993, + "learning_rate": 1.0390269385377444e-05, + "loss": 0.4515818953514099, + "step": 2914 + }, + { + "epoch": 1.027136563876652, + "grad_norm": 1.4624804092376522, + "learning_rate": 1.0384447362382013e-05, + "loss": 0.530797004699707, + "step": 2915 + }, + { + "epoch": 1.027488986784141, + "grad_norm": 1.4915704465108583, + "learning_rate": 1.0378625208878666e-05, + "loss": 0.5477641224861145, + "step": 2916 + }, + { + "epoch": 1.02784140969163, + "grad_norm": 1.6025052451883606, + "learning_rate": 1.0372802926843843e-05, + "loss": 0.6390479207038879, + "step": 2917 + }, + { + "epoch": 1.028193832599119, + "grad_norm": 1.5706073153963707, + "learning_rate": 1.0366980518254028e-05, + "loss": 0.610755443572998, + "step": 2918 + }, + { + "epoch": 1.0285462555066078, + "grad_norm": 1.4805888577219812, + "learning_rate": 1.036115798508575e-05, + "loss": 0.5427766442298889, + "step": 2919 + }, + { + "epoch": 1.028898678414097, + "grad_norm": 1.4610582929917253, + "learning_rate": 1.0355335329315573e-05, + "loss": 0.621055006980896, + "step": 2920 + }, + { + "epoch": 1.029251101321586, + "grad_norm": 1.7760527372961, + "learning_rate": 1.0349512552920114e-05, + "loss": 0.6098253726959229, + "step": 2921 + }, + { + "epoch": 1.0296035242290749, + "grad_norm": 1.8967300437588117, + "learning_rate": 1.0343689657876017e-05, + "loss": 0.591664731502533, + "step": 2922 + }, + { + "epoch": 1.0299559471365638, + "grad_norm": 1.616730113059231, + "learning_rate": 1.033786664615998e-05, + "loss": 0.6531485915184021, + "step": 2923 + }, + { + "epoch": 1.030308370044053, + "grad_norm": 1.5937698715448299, + "learning_rate": 1.0332043519748727e-05, + "loss": 0.6933655738830566, + "step": 2924 + }, + { + "epoch": 1.030660792951542, + "grad_norm": 1.5987643686429562, + "learning_rate": 1.0326220280619036e-05, + "loss": 0.6512705087661743, + "step": 2925 + }, + { + "epoch": 1.0310132158590308, + "grad_norm": 1.829250792437923, + "learning_rate": 1.0320396930747712e-05, + "loss": 0.5671502947807312, + "step": 2926 + }, + { + "epoch": 1.0313656387665198, + "grad_norm": 1.6239123058071627, + "learning_rate": 1.0314573472111601e-05, + "loss": 0.6795192360877991, + "step": 2927 + }, + { + "epoch": 1.0317180616740087, + "grad_norm": 1.5985127083182307, + "learning_rate": 1.0308749906687585e-05, + "loss": 0.6357578039169312, + "step": 2928 + }, + { + "epoch": 1.0320704845814979, + "grad_norm": 1.6982196546251649, + "learning_rate": 1.0302926236452588e-05, + "loss": 0.7009944915771484, + "step": 2929 + }, + { + "epoch": 1.0324229074889868, + "grad_norm": 1.4806960711115318, + "learning_rate": 1.0297102463383557e-05, + "loss": 0.4685679078102112, + "step": 2930 + }, + { + "epoch": 1.0327753303964757, + "grad_norm": 1.5429925693746163, + "learning_rate": 1.0291278589457488e-05, + "loss": 0.6359078884124756, + "step": 2931 + }, + { + "epoch": 1.0331277533039647, + "grad_norm": 1.8631741910761805, + "learning_rate": 1.0285454616651398e-05, + "loss": 0.6606266498565674, + "step": 2932 + }, + { + "epoch": 1.0334801762114538, + "grad_norm": 1.7076039728900445, + "learning_rate": 1.0279630546942353e-05, + "loss": 0.5405932664871216, + "step": 2933 + }, + { + "epoch": 1.0338325991189428, + "grad_norm": 1.4934491606364382, + "learning_rate": 1.0273806382307443e-05, + "loss": 0.8072758316993713, + "step": 2934 + }, + { + "epoch": 1.0341850220264317, + "grad_norm": 1.5899951805886359, + "learning_rate": 1.0267982124723783e-05, + "loss": 0.6923058032989502, + "step": 2935 + }, + { + "epoch": 1.0345374449339206, + "grad_norm": 1.7156977270346485, + "learning_rate": 1.0262157776168533e-05, + "loss": 0.5577275156974792, + "step": 2936 + }, + { + "epoch": 1.0348898678414098, + "grad_norm": 1.6363417924911698, + "learning_rate": 1.0256333338618875e-05, + "loss": 0.6780786514282227, + "step": 2937 + }, + { + "epoch": 1.0352422907488987, + "grad_norm": 1.6093019454005904, + "learning_rate": 1.0250508814052029e-05, + "loss": 0.6966040134429932, + "step": 2938 + }, + { + "epoch": 1.0355947136563877, + "grad_norm": 1.4912092272159942, + "learning_rate": 1.0244684204445237e-05, + "loss": 0.5726339817047119, + "step": 2939 + }, + { + "epoch": 1.0359471365638766, + "grad_norm": 1.372791278777169, + "learning_rate": 1.0238859511775768e-05, + "loss": 0.64924156665802, + "step": 2940 + }, + { + "epoch": 1.0362995594713655, + "grad_norm": 1.5498611273448277, + "learning_rate": 1.0233034738020933e-05, + "loss": 0.49121707677841187, + "step": 2941 + }, + { + "epoch": 1.0366519823788547, + "grad_norm": 1.4698297870867278, + "learning_rate": 1.0227209885158053e-05, + "loss": 0.5505814552307129, + "step": 2942 + }, + { + "epoch": 1.0370044052863436, + "grad_norm": 1.658171020881214, + "learning_rate": 1.022138495516449e-05, + "loss": 0.7429872751235962, + "step": 2943 + }, + { + "epoch": 1.0373568281938326, + "grad_norm": 1.5946562373848934, + "learning_rate": 1.0215559950017624e-05, + "loss": 0.6492434740066528, + "step": 2944 + }, + { + "epoch": 1.0377092511013215, + "grad_norm": 1.5139165780476451, + "learning_rate": 1.0209734871694865e-05, + "loss": 0.5418736338615417, + "step": 2945 + }, + { + "epoch": 1.0380616740088107, + "grad_norm": 1.676058492453494, + "learning_rate": 1.0203909722173644e-05, + "loss": 0.6252620220184326, + "step": 2946 + }, + { + "epoch": 1.0384140969162996, + "grad_norm": 1.4699238771485563, + "learning_rate": 1.0198084503431416e-05, + "loss": 0.5124455690383911, + "step": 2947 + }, + { + "epoch": 1.0387665198237885, + "grad_norm": 1.4358343290990208, + "learning_rate": 1.0192259217445663e-05, + "loss": 0.5729688405990601, + "step": 2948 + }, + { + "epoch": 1.0391189427312775, + "grad_norm": 1.8222711908460536, + "learning_rate": 1.0186433866193893e-05, + "loss": 0.5891536474227905, + "step": 2949 + }, + { + "epoch": 1.0394713656387666, + "grad_norm": 1.7110443983801997, + "learning_rate": 1.0180608451653626e-05, + "loss": 0.774397075176239, + "step": 2950 + }, + { + "epoch": 1.0398237885462556, + "grad_norm": 1.4480826912481708, + "learning_rate": 1.0174782975802408e-05, + "loss": 0.5987098813056946, + "step": 2951 + }, + { + "epoch": 1.0401762114537445, + "grad_norm": 1.634577600554869, + "learning_rate": 1.016895744061781e-05, + "loss": 0.5334598422050476, + "step": 2952 + }, + { + "epoch": 1.0405286343612334, + "grad_norm": 1.7236175912347957, + "learning_rate": 1.0163131848077421e-05, + "loss": 0.5946340560913086, + "step": 2953 + }, + { + "epoch": 1.0408810572687224, + "grad_norm": 1.601606630295311, + "learning_rate": 1.0157306200158847e-05, + "loss": 0.5780941247940063, + "step": 2954 + }, + { + "epoch": 1.0412334801762115, + "grad_norm": 1.6785528445522104, + "learning_rate": 1.0151480498839712e-05, + "loss": 0.6348963975906372, + "step": 2955 + }, + { + "epoch": 1.0415859030837005, + "grad_norm": 1.717999985242494, + "learning_rate": 1.014565474609766e-05, + "loss": 0.6868102550506592, + "step": 2956 + }, + { + "epoch": 1.0419383259911894, + "grad_norm": 1.6612318546166622, + "learning_rate": 1.0139828943910358e-05, + "loss": 0.6507548689842224, + "step": 2957 + }, + { + "epoch": 1.0422907488986783, + "grad_norm": 1.7617270521903845, + "learning_rate": 1.0134003094255478e-05, + "loss": 0.6358312964439392, + "step": 2958 + }, + { + "epoch": 1.0426431718061675, + "grad_norm": 1.5725895362844704, + "learning_rate": 1.0128177199110723e-05, + "loss": 0.7530224919319153, + "step": 2959 + }, + { + "epoch": 1.0429955947136564, + "grad_norm": 1.5496338862557548, + "learning_rate": 1.012235126045379e-05, + "loss": 0.545819878578186, + "step": 2960 + }, + { + "epoch": 1.0433480176211454, + "grad_norm": 1.5828250584633938, + "learning_rate": 1.011652528026242e-05, + "loss": 0.6626788377761841, + "step": 2961 + }, + { + "epoch": 1.0437004405286343, + "grad_norm": 1.6913571400986156, + "learning_rate": 1.0110699260514336e-05, + "loss": 0.6407896280288696, + "step": 2962 + }, + { + "epoch": 1.0440528634361232, + "grad_norm": 1.4558906354554821, + "learning_rate": 1.0104873203187307e-05, + "loss": 0.5633673667907715, + "step": 2963 + }, + { + "epoch": 1.0444052863436124, + "grad_norm": 1.6991226564822444, + "learning_rate": 1.0099047110259081e-05, + "loss": 0.5356892943382263, + "step": 2964 + }, + { + "epoch": 1.0447577092511013, + "grad_norm": 1.6571256461175092, + "learning_rate": 1.0093220983707448e-05, + "loss": 0.5527205467224121, + "step": 2965 + }, + { + "epoch": 1.0451101321585903, + "grad_norm": 1.5928434384321621, + "learning_rate": 1.008739482551019e-05, + "loss": 0.6148320436477661, + "step": 2966 + }, + { + "epoch": 1.0454625550660792, + "grad_norm": 1.8604930696261837, + "learning_rate": 1.0081568637645111e-05, + "loss": 0.5713976621627808, + "step": 2967 + }, + { + "epoch": 1.0458149779735684, + "grad_norm": 1.4811105317563769, + "learning_rate": 1.0075742422090015e-05, + "loss": 0.5836226940155029, + "step": 2968 + }, + { + "epoch": 1.0461674008810573, + "grad_norm": 1.829134506733255, + "learning_rate": 1.0069916180822727e-05, + "loss": 0.6452749371528625, + "step": 2969 + }, + { + "epoch": 1.0465198237885462, + "grad_norm": 1.507975881410604, + "learning_rate": 1.006408991582107e-05, + "loss": 0.5468501448631287, + "step": 2970 + }, + { + "epoch": 1.0468722466960352, + "grad_norm": 1.6217984014708016, + "learning_rate": 1.0058263629062883e-05, + "loss": 0.5195704698562622, + "step": 2971 + }, + { + "epoch": 1.0472246696035241, + "grad_norm": 1.603914403857505, + "learning_rate": 1.0052437322526003e-05, + "loss": 0.5144641995429993, + "step": 2972 + }, + { + "epoch": 1.0475770925110133, + "grad_norm": 1.767647834896278, + "learning_rate": 1.004661099818829e-05, + "loss": 0.7258927822113037, + "step": 2973 + }, + { + "epoch": 1.0479295154185022, + "grad_norm": 1.8920163745404244, + "learning_rate": 1.004078465802759e-05, + "loss": 0.6108053922653198, + "step": 2974 + }, + { + "epoch": 1.0482819383259911, + "grad_norm": 1.5703096539855212, + "learning_rate": 1.0034958304021766e-05, + "loss": 0.612535834312439, + "step": 2975 + }, + { + "epoch": 1.04863436123348, + "grad_norm": 1.6902304674604145, + "learning_rate": 1.0029131938148686e-05, + "loss": 0.7272380590438843, + "step": 2976 + }, + { + "epoch": 1.0489867841409692, + "grad_norm": 1.4306480582223446, + "learning_rate": 1.0023305562386222e-05, + "loss": 0.4748264253139496, + "step": 2977 + }, + { + "epoch": 1.0493392070484582, + "grad_norm": 1.7625234188194432, + "learning_rate": 1.0017479178712245e-05, + "loss": 0.6686758399009705, + "step": 2978 + }, + { + "epoch": 1.0496916299559471, + "grad_norm": 1.6796969203533192, + "learning_rate": 1.0011652789104631e-05, + "loss": 0.5003838539123535, + "step": 2979 + }, + { + "epoch": 1.050044052863436, + "grad_norm": 1.7305572983583226, + "learning_rate": 1.0005826395541257e-05, + "loss": 0.6210055351257324, + "step": 2980 + }, + { + "epoch": 1.0503964757709252, + "grad_norm": 1.6943397299052507, + "learning_rate": 1e-05, + "loss": 0.6160269975662231, + "step": 2981 + }, + { + "epoch": 1.0507488986784141, + "grad_norm": 1.6249468093767248, + "learning_rate": 9.994173604458748e-06, + "loss": 0.6432052850723267, + "step": 2982 + }, + { + "epoch": 1.051101321585903, + "grad_norm": 1.6764234439374022, + "learning_rate": 9.988347210895372e-06, + "loss": 0.588628888130188, + "step": 2983 + }, + { + "epoch": 1.051453744493392, + "grad_norm": 1.5595740377523009, + "learning_rate": 9.982520821287758e-06, + "loss": 0.6694320440292358, + "step": 2984 + }, + { + "epoch": 1.051806167400881, + "grad_norm": 1.7276474901524372, + "learning_rate": 9.976694437613778e-06, + "loss": 0.8591301441192627, + "step": 2985 + }, + { + "epoch": 1.0521585903083701, + "grad_norm": 1.6697380234108412, + "learning_rate": 9.970868061851315e-06, + "loss": 0.6000436544418335, + "step": 2986 + }, + { + "epoch": 1.052511013215859, + "grad_norm": 1.5357275356358564, + "learning_rate": 9.965041695978239e-06, + "loss": 0.624568521976471, + "step": 2987 + }, + { + "epoch": 1.052863436123348, + "grad_norm": 1.4223866897031825, + "learning_rate": 9.959215341972414e-06, + "loss": 0.6173535585403442, + "step": 2988 + }, + { + "epoch": 1.053215859030837, + "grad_norm": 1.7069399452687213, + "learning_rate": 9.953389001811716e-06, + "loss": 0.5991729497909546, + "step": 2989 + }, + { + "epoch": 1.053568281938326, + "grad_norm": 1.782972390393551, + "learning_rate": 9.947562677473999e-06, + "loss": 0.570953905582428, + "step": 2990 + }, + { + "epoch": 1.053920704845815, + "grad_norm": 1.7332305108715658, + "learning_rate": 9.941736370937119e-06, + "loss": 0.6079390048980713, + "step": 2991 + }, + { + "epoch": 1.054273127753304, + "grad_norm": 2.110617001097567, + "learning_rate": 9.935910084178934e-06, + "loss": 0.599539577960968, + "step": 2992 + }, + { + "epoch": 1.0546255506607929, + "grad_norm": 1.5854202353385896, + "learning_rate": 9.930083819177273e-06, + "loss": 0.6736180186271667, + "step": 2993 + }, + { + "epoch": 1.054977973568282, + "grad_norm": 1.6240153775210555, + "learning_rate": 9.924257577909987e-06, + "loss": 0.6953197717666626, + "step": 2994 + }, + { + "epoch": 1.055330396475771, + "grad_norm": 1.8737137053755175, + "learning_rate": 9.918431362354892e-06, + "loss": 0.6670099496841431, + "step": 2995 + }, + { + "epoch": 1.05568281938326, + "grad_norm": 1.844007753613641, + "learning_rate": 9.912605174489811e-06, + "loss": 0.5829994678497314, + "step": 2996 + }, + { + "epoch": 1.0560352422907489, + "grad_norm": 1.9198236703913207, + "learning_rate": 9.906779016292554e-06, + "loss": 0.5926212072372437, + "step": 2997 + }, + { + "epoch": 1.0563876651982378, + "grad_norm": 1.4868752944824364, + "learning_rate": 9.900952889740922e-06, + "loss": 0.6085237860679626, + "step": 2998 + }, + { + "epoch": 1.056740088105727, + "grad_norm": 1.8046049827658854, + "learning_rate": 9.895126796812698e-06, + "loss": 0.5348918437957764, + "step": 2999 + }, + { + "epoch": 1.0570925110132159, + "grad_norm": 1.79509807280399, + "learning_rate": 9.889300739485666e-06, + "loss": 0.6325811743736267, + "step": 3000 + }, + { + "epoch": 1.0574449339207048, + "grad_norm": 1.6006099839795653, + "learning_rate": 9.883474719737582e-06, + "loss": 0.6262463927268982, + "step": 3001 + }, + { + "epoch": 1.0577973568281938, + "grad_norm": 1.5914788157951554, + "learning_rate": 9.877648739546213e-06, + "loss": 0.5863393545150757, + "step": 3002 + }, + { + "epoch": 1.058149779735683, + "grad_norm": 2.0254476885032924, + "learning_rate": 9.871822800889284e-06, + "loss": 0.6200219392776489, + "step": 3003 + }, + { + "epoch": 1.0585022026431719, + "grad_norm": 1.6216300774961065, + "learning_rate": 9.865996905744523e-06, + "loss": 0.6994227170944214, + "step": 3004 + }, + { + "epoch": 1.0588546255506608, + "grad_norm": 1.735404014120002, + "learning_rate": 9.860171056089646e-06, + "loss": 0.6458406448364258, + "step": 3005 + }, + { + "epoch": 1.0592070484581497, + "grad_norm": 1.6209915560634427, + "learning_rate": 9.854345253902342e-06, + "loss": 0.6814782619476318, + "step": 3006 + }, + { + "epoch": 1.0595594713656387, + "grad_norm": 1.455508358080935, + "learning_rate": 9.84851950116029e-06, + "loss": 0.521275520324707, + "step": 3007 + }, + { + "epoch": 1.0599118942731278, + "grad_norm": 1.486020788258086, + "learning_rate": 9.84269379984116e-06, + "loss": 0.5541207790374756, + "step": 3008 + }, + { + "epoch": 1.0602643171806168, + "grad_norm": 1.7060435970959642, + "learning_rate": 9.836868151922579e-06, + "loss": 0.578704833984375, + "step": 3009 + }, + { + "epoch": 1.0606167400881057, + "grad_norm": 1.5220368339292814, + "learning_rate": 9.831042559382193e-06, + "loss": 0.6280980706214905, + "step": 3010 + }, + { + "epoch": 1.0609691629955946, + "grad_norm": 1.8314917502019485, + "learning_rate": 9.825217024197595e-06, + "loss": 0.6059408783912659, + "step": 3011 + }, + { + "epoch": 1.0613215859030838, + "grad_norm": 1.6362891327789773, + "learning_rate": 9.819391548346377e-06, + "loss": 0.6375449299812317, + "step": 3012 + }, + { + "epoch": 1.0616740088105727, + "grad_norm": 2.503364134053993, + "learning_rate": 9.81356613380611e-06, + "loss": 0.5959592461585999, + "step": 3013 + }, + { + "epoch": 1.0620264317180617, + "grad_norm": 1.735073300438408, + "learning_rate": 9.807740782554337e-06, + "loss": 0.7636409401893616, + "step": 3014 + }, + { + "epoch": 1.0623788546255506, + "grad_norm": 2.2227407713805722, + "learning_rate": 9.801915496568586e-06, + "loss": 0.6136656999588013, + "step": 3015 + }, + { + "epoch": 1.0627312775330395, + "grad_norm": 1.7360474444382674, + "learning_rate": 9.796090277826361e-06, + "loss": 0.4659839868545532, + "step": 3016 + }, + { + "epoch": 1.0630837004405287, + "grad_norm": 1.699131973967987, + "learning_rate": 9.790265128305137e-06, + "loss": 0.6053155660629272, + "step": 3017 + }, + { + "epoch": 1.0634361233480176, + "grad_norm": 1.698457126583602, + "learning_rate": 9.78444004998238e-06, + "loss": 0.6885203123092651, + "step": 3018 + }, + { + "epoch": 1.0637885462555066, + "grad_norm": 1.5620062631250171, + "learning_rate": 9.778615044835513e-06, + "loss": 0.4985584616661072, + "step": 3019 + }, + { + "epoch": 1.0641409691629955, + "grad_norm": 1.699890122838272, + "learning_rate": 9.772790114841948e-06, + "loss": 0.5782307386398315, + "step": 3020 + }, + { + "epoch": 1.0644933920704847, + "grad_norm": 1.7427928970766464, + "learning_rate": 9.766965261979072e-06, + "loss": 0.5819451212882996, + "step": 3021 + }, + { + "epoch": 1.0648458149779736, + "grad_norm": 1.9531302264016444, + "learning_rate": 9.761140488224232e-06, + "loss": 0.7316779494285583, + "step": 3022 + }, + { + "epoch": 1.0651982378854625, + "grad_norm": 2.4211241065200633, + "learning_rate": 9.755315795554766e-06, + "loss": 0.5986718535423279, + "step": 3023 + }, + { + "epoch": 1.0655506607929515, + "grad_norm": 1.5565361520380023, + "learning_rate": 9.749491185947977e-06, + "loss": 0.5052427053451538, + "step": 3024 + }, + { + "epoch": 1.0659030837004406, + "grad_norm": 1.658020296029534, + "learning_rate": 9.743666661381123e-06, + "loss": 0.7370901107788086, + "step": 3025 + }, + { + "epoch": 1.0662555066079296, + "grad_norm": 1.575987435195716, + "learning_rate": 9.73784222383147e-06, + "loss": 0.6423007249832153, + "step": 3026 + }, + { + "epoch": 1.0666079295154185, + "grad_norm": 1.94896820476588, + "learning_rate": 9.73201787527622e-06, + "loss": 0.5679126977920532, + "step": 3027 + }, + { + "epoch": 1.0669603524229074, + "grad_norm": 2.498602043471406, + "learning_rate": 9.72619361769256e-06, + "loss": 0.5890183448791504, + "step": 3028 + }, + { + "epoch": 1.0673127753303966, + "grad_norm": 1.7647674693242208, + "learning_rate": 9.720369453057648e-06, + "loss": 0.6772822141647339, + "step": 3029 + }, + { + "epoch": 1.0676651982378855, + "grad_norm": 2.109810086892336, + "learning_rate": 9.714545383348602e-06, + "loss": 0.8275488615036011, + "step": 3030 + }, + { + "epoch": 1.0680176211453745, + "grad_norm": 1.6620933678667917, + "learning_rate": 9.708721410542517e-06, + "loss": 0.5369541645050049, + "step": 3031 + }, + { + "epoch": 1.0683700440528634, + "grad_norm": 1.611800532750273, + "learning_rate": 9.70289753661645e-06, + "loss": 0.7173746824264526, + "step": 3032 + }, + { + "epoch": 1.0687224669603523, + "grad_norm": 1.7405771304623092, + "learning_rate": 9.697073763547415e-06, + "loss": 0.597034215927124, + "step": 3033 + }, + { + "epoch": 1.0690748898678415, + "grad_norm": 1.867958529307263, + "learning_rate": 9.691250093312419e-06, + "loss": 0.6680281162261963, + "step": 3034 + }, + { + "epoch": 1.0694273127753304, + "grad_norm": 1.4898600082698874, + "learning_rate": 9.6854265278884e-06, + "loss": 0.6155321002006531, + "step": 3035 + }, + { + "epoch": 1.0697797356828194, + "grad_norm": 2.4613840016445314, + "learning_rate": 9.67960306925229e-06, + "loss": 0.5945199728012085, + "step": 3036 + }, + { + "epoch": 1.0701321585903083, + "grad_norm": 1.7063166475670735, + "learning_rate": 9.673779719380967e-06, + "loss": 0.6492328643798828, + "step": 3037 + }, + { + "epoch": 1.0704845814977975, + "grad_norm": 1.8638826733925389, + "learning_rate": 9.667956480251273e-06, + "loss": 0.6501325964927673, + "step": 3038 + }, + { + "epoch": 1.0708370044052864, + "grad_norm": 1.4216071761527918, + "learning_rate": 9.662133353840025e-06, + "loss": 0.5956053733825684, + "step": 3039 + }, + { + "epoch": 1.0711894273127753, + "grad_norm": 1.7546711372901296, + "learning_rate": 9.656310342123988e-06, + "loss": 0.5966510772705078, + "step": 3040 + }, + { + "epoch": 1.0715418502202643, + "grad_norm": 1.7715803220306194, + "learning_rate": 9.65048744707989e-06, + "loss": 0.7096615433692932, + "step": 3041 + }, + { + "epoch": 1.0718942731277532, + "grad_norm": 1.5279732385894715, + "learning_rate": 9.644664670684429e-06, + "loss": 0.6697839498519897, + "step": 3042 + }, + { + "epoch": 1.0722466960352424, + "grad_norm": 1.6318262899161158, + "learning_rate": 9.638842014914253e-06, + "loss": 0.6288081407546997, + "step": 3043 + }, + { + "epoch": 1.0725991189427313, + "grad_norm": 1.6830476156095877, + "learning_rate": 9.633019481745973e-06, + "loss": 0.5870436429977417, + "step": 3044 + }, + { + "epoch": 1.0729515418502202, + "grad_norm": 1.4073037692368846, + "learning_rate": 9.62719707315616e-06, + "loss": 0.5540846586227417, + "step": 3045 + }, + { + "epoch": 1.0733039647577092, + "grad_norm": 1.8276869267624827, + "learning_rate": 9.621374791121335e-06, + "loss": 0.6134544014930725, + "step": 3046 + }, + { + "epoch": 1.0736563876651983, + "grad_norm": 1.9310361455307938, + "learning_rate": 9.61555263761799e-06, + "loss": 0.5537046194076538, + "step": 3047 + }, + { + "epoch": 1.0740088105726873, + "grad_norm": 1.5553451953770387, + "learning_rate": 9.60973061462256e-06, + "loss": 0.6423748731613159, + "step": 3048 + }, + { + "epoch": 1.0743612334801762, + "grad_norm": 1.7219317421679232, + "learning_rate": 9.603908724111438e-06, + "loss": 0.575737714767456, + "step": 3049 + }, + { + "epoch": 1.0747136563876651, + "grad_norm": 1.7334347992355148, + "learning_rate": 9.598086968060976e-06, + "loss": 0.5326197147369385, + "step": 3050 + }, + { + "epoch": 1.075066079295154, + "grad_norm": 1.5560472770838902, + "learning_rate": 9.592265348447481e-06, + "loss": 0.6533973217010498, + "step": 3051 + }, + { + "epoch": 1.0754185022026432, + "grad_norm": 1.5101678591543142, + "learning_rate": 9.586443867247212e-06, + "loss": 0.5536586046218872, + "step": 3052 + }, + { + "epoch": 1.0757709251101322, + "grad_norm": 1.6611779528904365, + "learning_rate": 9.580622526436382e-06, + "loss": 0.6024892926216125, + "step": 3053 + }, + { + "epoch": 1.076123348017621, + "grad_norm": 1.5423440836231639, + "learning_rate": 9.574801327991148e-06, + "loss": 0.5070478320121765, + "step": 3054 + }, + { + "epoch": 1.07647577092511, + "grad_norm": 1.9040251147858696, + "learning_rate": 9.568980273887637e-06, + "loss": 0.6518458127975464, + "step": 3055 + }, + { + "epoch": 1.0768281938325992, + "grad_norm": 1.8761852451910037, + "learning_rate": 9.563159366101905e-06, + "loss": 0.6120346784591675, + "step": 3056 + }, + { + "epoch": 1.0771806167400881, + "grad_norm": 1.7428937123650154, + "learning_rate": 9.557338606609973e-06, + "loss": 0.6725353598594666, + "step": 3057 + }, + { + "epoch": 1.077533039647577, + "grad_norm": 1.5136863007311347, + "learning_rate": 9.551517997387809e-06, + "loss": 0.5311183333396912, + "step": 3058 + }, + { + "epoch": 1.077885462555066, + "grad_norm": 1.8000300040025692, + "learning_rate": 9.545697540411324e-06, + "loss": 0.5728713274002075, + "step": 3059 + }, + { + "epoch": 1.0782378854625552, + "grad_norm": 1.7991281029512354, + "learning_rate": 9.53987723765639e-06, + "loss": 0.5527676343917847, + "step": 3060 + }, + { + "epoch": 1.078590308370044, + "grad_norm": 1.9177712397501578, + "learning_rate": 9.534057091098813e-06, + "loss": 0.7529809474945068, + "step": 3061 + }, + { + "epoch": 1.078942731277533, + "grad_norm": 1.6975104946869117, + "learning_rate": 9.528237102714352e-06, + "loss": 0.5485205054283142, + "step": 3062 + }, + { + "epoch": 1.079295154185022, + "grad_norm": 1.8773141561341242, + "learning_rate": 9.522417274478716e-06, + "loss": 0.785184383392334, + "step": 3063 + }, + { + "epoch": 1.079647577092511, + "grad_norm": 1.692195190429073, + "learning_rate": 9.516597608367547e-06, + "loss": 0.5645574331283569, + "step": 3064 + }, + { + "epoch": 1.08, + "grad_norm": 1.6815198266991151, + "learning_rate": 9.51077810635645e-06, + "loss": 0.5878466367721558, + "step": 3065 + }, + { + "epoch": 1.080352422907489, + "grad_norm": 1.7635464385467587, + "learning_rate": 9.504958770420962e-06, + "loss": 0.6610634922981262, + "step": 3066 + }, + { + "epoch": 1.080704845814978, + "grad_norm": 1.8113852263213976, + "learning_rate": 9.49913960253656e-06, + "loss": 0.5928626656532288, + "step": 3067 + }, + { + "epoch": 1.0810572687224669, + "grad_norm": 1.7322633216843277, + "learning_rate": 9.49332060467868e-06, + "loss": 0.7038083672523499, + "step": 3068 + }, + { + "epoch": 1.081409691629956, + "grad_norm": 1.3686406289588096, + "learning_rate": 9.487501778822685e-06, + "loss": 0.5966217517852783, + "step": 3069 + }, + { + "epoch": 1.081762114537445, + "grad_norm": 1.686172060324731, + "learning_rate": 9.481683126943884e-06, + "loss": 0.6594187021255493, + "step": 3070 + }, + { + "epoch": 1.082114537444934, + "grad_norm": 1.4709153501511232, + "learning_rate": 9.475864651017536e-06, + "loss": 0.450161874294281, + "step": 3071 + }, + { + "epoch": 1.0824669603524228, + "grad_norm": 1.9209170149530705, + "learning_rate": 9.470046353018821e-06, + "loss": 0.6459252834320068, + "step": 3072 + }, + { + "epoch": 1.082819383259912, + "grad_norm": 1.5818284678879686, + "learning_rate": 9.464228234922882e-06, + "loss": 0.6505793929100037, + "step": 3073 + }, + { + "epoch": 1.083171806167401, + "grad_norm": 1.5944722571395005, + "learning_rate": 9.458410298704781e-06, + "loss": 0.6480910778045654, + "step": 3074 + }, + { + "epoch": 1.0835242290748899, + "grad_norm": 1.530550500951046, + "learning_rate": 9.452592546339527e-06, + "loss": 0.6494983434677124, + "step": 3075 + }, + { + "epoch": 1.0838766519823788, + "grad_norm": 1.560525752678919, + "learning_rate": 9.44677497980207e-06, + "loss": 0.4710897207260132, + "step": 3076 + }, + { + "epoch": 1.0842290748898677, + "grad_norm": 1.5265540562186208, + "learning_rate": 9.440957601067294e-06, + "loss": 0.599402904510498, + "step": 3077 + }, + { + "epoch": 1.084581497797357, + "grad_norm": 1.9340764168188993, + "learning_rate": 9.435140412110006e-06, + "loss": 0.665642499923706, + "step": 3078 + }, + { + "epoch": 1.0849339207048458, + "grad_norm": 1.8868033009058576, + "learning_rate": 9.429323414904975e-06, + "loss": 0.5861828923225403, + "step": 3079 + }, + { + "epoch": 1.0852863436123348, + "grad_norm": 1.581789489047221, + "learning_rate": 9.42350661142688e-06, + "loss": 0.6115351915359497, + "step": 3080 + }, + { + "epoch": 1.0856387665198237, + "grad_norm": 1.6610293276945491, + "learning_rate": 9.417690003650353e-06, + "loss": 0.6627066135406494, + "step": 3081 + }, + { + "epoch": 1.0859911894273129, + "grad_norm": 1.5744692750190625, + "learning_rate": 9.411873593549947e-06, + "loss": 0.6155676245689392, + "step": 3082 + }, + { + "epoch": 1.0863436123348018, + "grad_norm": 1.59429166731528, + "learning_rate": 9.406057383100151e-06, + "loss": 0.5429089069366455, + "step": 3083 + }, + { + "epoch": 1.0866960352422907, + "grad_norm": 1.638763712553269, + "learning_rate": 9.400241374275391e-06, + "loss": 0.5416614413261414, + "step": 3084 + }, + { + "epoch": 1.0870484581497797, + "grad_norm": 1.5652840639245515, + "learning_rate": 9.394425569050018e-06, + "loss": 0.6708710193634033, + "step": 3085 + }, + { + "epoch": 1.0874008810572686, + "grad_norm": 1.6407899201706977, + "learning_rate": 9.388609969398318e-06, + "loss": 0.588347315788269, + "step": 3086 + }, + { + "epoch": 1.0877533039647578, + "grad_norm": 1.6990356352816562, + "learning_rate": 9.38279457729451e-06, + "loss": 0.4999222755432129, + "step": 3087 + }, + { + "epoch": 1.0881057268722467, + "grad_norm": 1.5508462782114225, + "learning_rate": 9.37697939471273e-06, + "loss": 0.5400034189224243, + "step": 3088 + }, + { + "epoch": 1.0884581497797357, + "grad_norm": 1.6869985582255194, + "learning_rate": 9.37116442362706e-06, + "loss": 0.5583670139312744, + "step": 3089 + }, + { + "epoch": 1.0888105726872246, + "grad_norm": 2.063349590123988, + "learning_rate": 9.365349666011497e-06, + "loss": 0.6863820552825928, + "step": 3090 + }, + { + "epoch": 1.0891629955947137, + "grad_norm": 1.7395123823701124, + "learning_rate": 9.35953512383997e-06, + "loss": 0.6422115564346313, + "step": 3091 + }, + { + "epoch": 1.0895154185022027, + "grad_norm": 1.7254266288951046, + "learning_rate": 9.353720799086337e-06, + "loss": 0.7106888294219971, + "step": 3092 + }, + { + "epoch": 1.0898678414096916, + "grad_norm": 1.7765997338600088, + "learning_rate": 9.347906693724379e-06, + "loss": 0.6070472002029419, + "step": 3093 + }, + { + "epoch": 1.0902202643171806, + "grad_norm": 2.653468303504809, + "learning_rate": 9.342092809727807e-06, + "loss": 0.577377200126648, + "step": 3094 + }, + { + "epoch": 1.0905726872246695, + "grad_norm": 2.222722693331331, + "learning_rate": 9.336279149070252e-06, + "loss": 0.6249948740005493, + "step": 3095 + }, + { + "epoch": 1.0909251101321586, + "grad_norm": 1.7155188858933852, + "learning_rate": 9.330465713725265e-06, + "loss": 0.5515183210372925, + "step": 3096 + }, + { + "epoch": 1.0912775330396476, + "grad_norm": 1.866411497064146, + "learning_rate": 9.324652505666336e-06, + "loss": 0.6074613332748413, + "step": 3097 + }, + { + "epoch": 1.0916299559471365, + "grad_norm": 1.7632595046666684, + "learning_rate": 9.318839526866863e-06, + "loss": 0.6520178318023682, + "step": 3098 + }, + { + "epoch": 1.0919823788546255, + "grad_norm": 1.4274715968201055, + "learning_rate": 9.31302677930017e-06, + "loss": 0.45863813161849976, + "step": 3099 + }, + { + "epoch": 1.0923348017621146, + "grad_norm": 1.6772052003130429, + "learning_rate": 9.307214264939508e-06, + "loss": 0.610805869102478, + "step": 3100 + }, + { + "epoch": 1.0926872246696036, + "grad_norm": 1.6545163632346178, + "learning_rate": 9.30140198575804e-06, + "loss": 0.5954282283782959, + "step": 3101 + }, + { + "epoch": 1.0930396475770925, + "grad_norm": 1.4805927694864789, + "learning_rate": 9.29558994372886e-06, + "loss": 0.6941400170326233, + "step": 3102 + }, + { + "epoch": 1.0933920704845814, + "grad_norm": 1.4236727289117346, + "learning_rate": 9.289778140824974e-06, + "loss": 0.6723533868789673, + "step": 3103 + }, + { + "epoch": 1.0937444933920706, + "grad_norm": 1.5690147341016918, + "learning_rate": 9.2839665790193e-06, + "loss": 0.49137037992477417, + "step": 3104 + }, + { + "epoch": 1.0940969162995595, + "grad_norm": 1.6112616837583658, + "learning_rate": 9.278155260284692e-06, + "loss": 0.5827045440673828, + "step": 3105 + }, + { + "epoch": 1.0944493392070485, + "grad_norm": 1.7496187485651187, + "learning_rate": 9.272344186593909e-06, + "loss": 0.6391462683677673, + "step": 3106 + }, + { + "epoch": 1.0948017621145374, + "grad_norm": 1.857839078789808, + "learning_rate": 9.266533359919628e-06, + "loss": 0.4994915723800659, + "step": 3107 + }, + { + "epoch": 1.0951541850220265, + "grad_norm": 1.7820549618718244, + "learning_rate": 9.260722782234445e-06, + "loss": 0.6480728387832642, + "step": 3108 + }, + { + "epoch": 1.0955066079295155, + "grad_norm": 1.9724258404436363, + "learning_rate": 9.25491245551087e-06, + "loss": 0.5734057426452637, + "step": 3109 + }, + { + "epoch": 1.0958590308370044, + "grad_norm": 1.5757198230236702, + "learning_rate": 9.249102381721328e-06, + "loss": 0.5650345087051392, + "step": 3110 + }, + { + "epoch": 1.0962114537444934, + "grad_norm": 1.6196253415823336, + "learning_rate": 9.243292562838164e-06, + "loss": 0.6261975765228271, + "step": 3111 + }, + { + "epoch": 1.0965638766519823, + "grad_norm": 1.6283298345999566, + "learning_rate": 9.237483000833619e-06, + "loss": 0.730735182762146, + "step": 3112 + }, + { + "epoch": 1.0969162995594715, + "grad_norm": 1.614573149399901, + "learning_rate": 9.231673697679867e-06, + "loss": 0.6198948621749878, + "step": 3113 + }, + { + "epoch": 1.0972687224669604, + "grad_norm": 1.526191646446162, + "learning_rate": 9.225864655348982e-06, + "loss": 0.5302865505218506, + "step": 3114 + }, + { + "epoch": 1.0976211453744493, + "grad_norm": 1.6895671377093768, + "learning_rate": 9.220055875812955e-06, + "loss": 0.5995128154754639, + "step": 3115 + }, + { + "epoch": 1.0979735682819383, + "grad_norm": 1.5451580100020488, + "learning_rate": 9.214247361043687e-06, + "loss": 0.3801479935646057, + "step": 3116 + }, + { + "epoch": 1.0983259911894274, + "grad_norm": 1.7467243659333909, + "learning_rate": 9.208439113012984e-06, + "loss": 0.5617209076881409, + "step": 3117 + }, + { + "epoch": 1.0986784140969164, + "grad_norm": 2.3313501330545776, + "learning_rate": 9.202631133692572e-06, + "loss": 0.5233842134475708, + "step": 3118 + }, + { + "epoch": 1.0990308370044053, + "grad_norm": 1.5308784453968334, + "learning_rate": 9.196823425054073e-06, + "loss": 0.5300124883651733, + "step": 3119 + }, + { + "epoch": 1.0993832599118942, + "grad_norm": 1.6766914696070794, + "learning_rate": 9.191015989069024e-06, + "loss": 0.686185359954834, + "step": 3120 + }, + { + "epoch": 1.0997356828193832, + "grad_norm": 4.625699614895419, + "learning_rate": 9.18520882770887e-06, + "loss": 0.6043056845664978, + "step": 3121 + }, + { + "epoch": 1.1000881057268723, + "grad_norm": 1.4445640616396158, + "learning_rate": 9.179401942944961e-06, + "loss": 0.6299905776977539, + "step": 3122 + }, + { + "epoch": 1.1004405286343613, + "grad_norm": 1.639683344548818, + "learning_rate": 9.173595336748557e-06, + "loss": 0.57872474193573, + "step": 3123 + }, + { + "epoch": 1.1007929515418502, + "grad_norm": 1.6533643796746975, + "learning_rate": 9.167789011090818e-06, + "loss": 0.5638746023178101, + "step": 3124 + }, + { + "epoch": 1.1011453744493391, + "grad_norm": 1.9780317067618627, + "learning_rate": 9.161982967942806e-06, + "loss": 0.6150490045547485, + "step": 3125 + }, + { + "epoch": 1.1014977973568283, + "grad_norm": 1.6035565827670604, + "learning_rate": 9.156177209275503e-06, + "loss": 0.547231912612915, + "step": 3126 + }, + { + "epoch": 1.1018502202643172, + "grad_norm": 1.753224578445511, + "learning_rate": 9.150371737059773e-06, + "loss": 0.6999325752258301, + "step": 3127 + }, + { + "epoch": 1.1022026431718062, + "grad_norm": 1.868897492269033, + "learning_rate": 9.144566553266396e-06, + "loss": 0.7175568342208862, + "step": 3128 + }, + { + "epoch": 1.102555066079295, + "grad_norm": 1.6615553040601516, + "learning_rate": 9.138761659866054e-06, + "loss": 0.7308273911476135, + "step": 3129 + }, + { + "epoch": 1.102907488986784, + "grad_norm": 1.6216416819643327, + "learning_rate": 9.132957058829323e-06, + "loss": 0.5951930284500122, + "step": 3130 + }, + { + "epoch": 1.1032599118942732, + "grad_norm": 1.8459198222998503, + "learning_rate": 9.127152752126688e-06, + "loss": 0.5684988498687744, + "step": 3131 + }, + { + "epoch": 1.1036123348017621, + "grad_norm": 1.6778026851292638, + "learning_rate": 9.121348741728532e-06, + "loss": 0.6490764617919922, + "step": 3132 + }, + { + "epoch": 1.103964757709251, + "grad_norm": 1.9759558630482505, + "learning_rate": 9.115545029605129e-06, + "loss": 0.7795257568359375, + "step": 3133 + }, + { + "epoch": 1.10431718061674, + "grad_norm": 1.677150279034534, + "learning_rate": 9.10974161772667e-06, + "loss": 0.5443774461746216, + "step": 3134 + }, + { + "epoch": 1.1046696035242292, + "grad_norm": 1.4979331299176493, + "learning_rate": 9.103938508063223e-06, + "loss": 0.48989373445510864, + "step": 3135 + }, + { + "epoch": 1.105022026431718, + "grad_norm": 1.7384756252454785, + "learning_rate": 9.098135702584762e-06, + "loss": 0.5628808736801147, + "step": 3136 + }, + { + "epoch": 1.105374449339207, + "grad_norm": 1.7853238397751252, + "learning_rate": 9.092333203261168e-06, + "loss": 0.6549321413040161, + "step": 3137 + }, + { + "epoch": 1.105726872246696, + "grad_norm": 1.6854667721006384, + "learning_rate": 9.0865310120622e-06, + "loss": 0.7353606224060059, + "step": 3138 + }, + { + "epoch": 1.106079295154185, + "grad_norm": 1.4467352618974103, + "learning_rate": 9.080729130957528e-06, + "loss": 0.650668203830719, + "step": 3139 + }, + { + "epoch": 1.106431718061674, + "grad_norm": 1.4313841589857448, + "learning_rate": 9.07492756191671e-06, + "loss": 0.5618860721588135, + "step": 3140 + }, + { + "epoch": 1.106784140969163, + "grad_norm": 1.6263891772619556, + "learning_rate": 9.069126306909187e-06, + "loss": 0.5532773733139038, + "step": 3141 + }, + { + "epoch": 1.107136563876652, + "grad_norm": 1.5761547934103723, + "learning_rate": 9.06332536790432e-06, + "loss": 0.6240289211273193, + "step": 3142 + }, + { + "epoch": 1.1074889867841409, + "grad_norm": 1.6326282131144043, + "learning_rate": 9.057524746871335e-06, + "loss": 0.5952814221382141, + "step": 3143 + }, + { + "epoch": 1.10784140969163, + "grad_norm": 1.7063742447281478, + "learning_rate": 9.051724445779373e-06, + "loss": 0.6011646389961243, + "step": 3144 + }, + { + "epoch": 1.108193832599119, + "grad_norm": 1.54385403751274, + "learning_rate": 9.045924466597448e-06, + "loss": 0.6964641213417053, + "step": 3145 + }, + { + "epoch": 1.108546255506608, + "grad_norm": 1.9798851390043897, + "learning_rate": 9.040124811294473e-06, + "loss": 0.6821622848510742, + "step": 3146 + }, + { + "epoch": 1.1088986784140968, + "grad_norm": 1.569676973352834, + "learning_rate": 9.034325481839253e-06, + "loss": 0.5045080184936523, + "step": 3147 + }, + { + "epoch": 1.109251101321586, + "grad_norm": 1.608921739397865, + "learning_rate": 9.028526480200482e-06, + "loss": 0.5709735155105591, + "step": 3148 + }, + { + "epoch": 1.109603524229075, + "grad_norm": 1.6331449251948336, + "learning_rate": 9.022727808346731e-06, + "loss": 0.5882325172424316, + "step": 3149 + }, + { + "epoch": 1.1099559471365639, + "grad_norm": 1.6560869042500304, + "learning_rate": 9.016929468246482e-06, + "loss": 0.627426266670227, + "step": 3150 + }, + { + "epoch": 1.1103083700440528, + "grad_norm": 1.5720686051365462, + "learning_rate": 9.011131461868078e-06, + "loss": 0.42419761419296265, + "step": 3151 + }, + { + "epoch": 1.110660792951542, + "grad_norm": 1.487398401726564, + "learning_rate": 9.005333791179775e-06, + "loss": 0.5261023044586182, + "step": 3152 + }, + { + "epoch": 1.111013215859031, + "grad_norm": 1.853640852117203, + "learning_rate": 8.999536458149692e-06, + "loss": 0.6654448509216309, + "step": 3153 + }, + { + "epoch": 1.1113656387665198, + "grad_norm": 1.8252144061899127, + "learning_rate": 8.993739464745843e-06, + "loss": 0.5939514636993408, + "step": 3154 + }, + { + "epoch": 1.1117180616740088, + "grad_norm": 2.120048901517583, + "learning_rate": 8.987942812936133e-06, + "loss": 0.6381959319114685, + "step": 3155 + }, + { + "epoch": 1.1120704845814977, + "grad_norm": 1.5708485505419778, + "learning_rate": 8.982146504688343e-06, + "loss": 0.5474847555160522, + "step": 3156 + }, + { + "epoch": 1.1124229074889869, + "grad_norm": 1.9617265332983251, + "learning_rate": 8.97635054197013e-06, + "loss": 0.6306884288787842, + "step": 3157 + }, + { + "epoch": 1.1127753303964758, + "grad_norm": 1.6582794196349533, + "learning_rate": 8.97055492674906e-06, + "loss": 0.5988807678222656, + "step": 3158 + }, + { + "epoch": 1.1131277533039647, + "grad_norm": 1.4627681911625667, + "learning_rate": 8.964759660992547e-06, + "loss": 0.6316757202148438, + "step": 3159 + }, + { + "epoch": 1.1134801762114537, + "grad_norm": 2.1475966254528265, + "learning_rate": 8.958964746667917e-06, + "loss": 0.6031370162963867, + "step": 3160 + }, + { + "epoch": 1.1138325991189428, + "grad_norm": 1.631780585948097, + "learning_rate": 8.953170185742357e-06, + "loss": 0.6334977149963379, + "step": 3161 + }, + { + "epoch": 1.1141850220264318, + "grad_norm": 1.7666867258825858, + "learning_rate": 8.947375980182937e-06, + "loss": 0.49237731099128723, + "step": 3162 + }, + { + "epoch": 1.1145374449339207, + "grad_norm": 1.8113939325794732, + "learning_rate": 8.941582131956615e-06, + "loss": 0.7349523305892944, + "step": 3163 + }, + { + "epoch": 1.1148898678414096, + "grad_norm": 1.9764498599764084, + "learning_rate": 8.935788643030218e-06, + "loss": 0.5048422813415527, + "step": 3164 + }, + { + "epoch": 1.1152422907488986, + "grad_norm": 1.90381850621639, + "learning_rate": 8.92999551537046e-06, + "loss": 0.6217244267463684, + "step": 3165 + }, + { + "epoch": 1.1155947136563877, + "grad_norm": 1.6579628905821213, + "learning_rate": 8.924202750943926e-06, + "loss": 0.4949147701263428, + "step": 3166 + }, + { + "epoch": 1.1159471365638767, + "grad_norm": 1.8665150826118222, + "learning_rate": 8.918410351717074e-06, + "loss": 0.5975630283355713, + "step": 3167 + }, + { + "epoch": 1.1162995594713656, + "grad_norm": 1.8627553919144322, + "learning_rate": 8.91261831965625e-06, + "loss": 0.7546026110649109, + "step": 3168 + }, + { + "epoch": 1.1166519823788545, + "grad_norm": 1.8785066059323416, + "learning_rate": 8.906826656727665e-06, + "loss": 0.6238037347793579, + "step": 3169 + }, + { + "epoch": 1.1170044052863437, + "grad_norm": 1.7775910427875068, + "learning_rate": 8.901035364897407e-06, + "loss": 0.617587685585022, + "step": 3170 + }, + { + "epoch": 1.1173568281938326, + "grad_norm": 1.6345696523196545, + "learning_rate": 8.895244446131445e-06, + "loss": 0.4834432005882263, + "step": 3171 + }, + { + "epoch": 1.1177092511013216, + "grad_norm": 1.8061061322305951, + "learning_rate": 8.889453902395608e-06, + "loss": 0.614972710609436, + "step": 3172 + }, + { + "epoch": 1.1180616740088105, + "grad_norm": 2.26536947887869, + "learning_rate": 8.883663735655612e-06, + "loss": 0.6468379497528076, + "step": 3173 + }, + { + "epoch": 1.1184140969162994, + "grad_norm": 1.8154030785363677, + "learning_rate": 8.877873947877042e-06, + "loss": 0.6372466683387756, + "step": 3174 + }, + { + "epoch": 1.1187665198237886, + "grad_norm": 1.8831907584481906, + "learning_rate": 8.872084541025336e-06, + "loss": 0.6295863389968872, + "step": 3175 + }, + { + "epoch": 1.1191189427312775, + "grad_norm": 1.7211075291863254, + "learning_rate": 8.866295517065831e-06, + "loss": 0.6109524369239807, + "step": 3176 + }, + { + "epoch": 1.1194713656387665, + "grad_norm": 1.6861537948886334, + "learning_rate": 8.860506877963715e-06, + "loss": 0.6724812388420105, + "step": 3177 + }, + { + "epoch": 1.1198237885462554, + "grad_norm": 1.4091706259139964, + "learning_rate": 8.854718625684049e-06, + "loss": 0.6612162590026855, + "step": 3178 + }, + { + "epoch": 1.1201762114537446, + "grad_norm": 1.6332443405139663, + "learning_rate": 8.84893076219177e-06, + "loss": 0.6209636926651001, + "step": 3179 + }, + { + "epoch": 1.1205286343612335, + "grad_norm": 1.7567347030111673, + "learning_rate": 8.843143289451673e-06, + "loss": 0.8548281192779541, + "step": 3180 + }, + { + "epoch": 1.1208810572687224, + "grad_norm": 1.742397796953756, + "learning_rate": 8.837356209428428e-06, + "loss": 0.4621508717536926, + "step": 3181 + }, + { + "epoch": 1.1212334801762114, + "grad_norm": 1.8553184481302196, + "learning_rate": 8.831569524086568e-06, + "loss": 0.5065817832946777, + "step": 3182 + }, + { + "epoch": 1.1215859030837005, + "grad_norm": 1.5532313157641433, + "learning_rate": 8.825783235390488e-06, + "loss": 0.5467691421508789, + "step": 3183 + }, + { + "epoch": 1.1219383259911895, + "grad_norm": 1.3786030341795126, + "learning_rate": 8.81999734530446e-06, + "loss": 0.4938517212867737, + "step": 3184 + }, + { + "epoch": 1.1222907488986784, + "grad_norm": 1.4972934746199023, + "learning_rate": 8.814211855792609e-06, + "loss": 0.6125702857971191, + "step": 3185 + }, + { + "epoch": 1.1226431718061674, + "grad_norm": 1.427476145591487, + "learning_rate": 8.80842676881893e-06, + "loss": 0.5272841453552246, + "step": 3186 + }, + { + "epoch": 1.1229955947136563, + "grad_norm": 1.8463623605620603, + "learning_rate": 8.802642086347278e-06, + "loss": 0.5595715045928955, + "step": 3187 + }, + { + "epoch": 1.1233480176211454, + "grad_norm": 1.7533827268189746, + "learning_rate": 8.796857810341375e-06, + "loss": 0.7178677916526794, + "step": 3188 + }, + { + "epoch": 1.1237004405286344, + "grad_norm": 2.166791630557212, + "learning_rate": 8.791073942764806e-06, + "loss": 0.6000991463661194, + "step": 3189 + }, + { + "epoch": 1.1240528634361233, + "grad_norm": 1.7926160729471858, + "learning_rate": 8.785290485581008e-06, + "loss": 0.537361741065979, + "step": 3190 + }, + { + "epoch": 1.1244052863436123, + "grad_norm": 1.7666842188914018, + "learning_rate": 8.779507440753286e-06, + "loss": 0.7135556936264038, + "step": 3191 + }, + { + "epoch": 1.1247577092511014, + "grad_norm": 1.7053825384185084, + "learning_rate": 8.773724810244805e-06, + "loss": 0.501063346862793, + "step": 3192 + }, + { + "epoch": 1.1251101321585903, + "grad_norm": 1.679109568038749, + "learning_rate": 8.767942596018587e-06, + "loss": 0.6885302662849426, + "step": 3193 + }, + { + "epoch": 1.1254625550660793, + "grad_norm": 1.321748305255468, + "learning_rate": 8.762160800037516e-06, + "loss": 0.5902360081672668, + "step": 3194 + }, + { + "epoch": 1.1258149779735682, + "grad_norm": 1.687654327550192, + "learning_rate": 8.75637942426433e-06, + "loss": 0.6308953762054443, + "step": 3195 + }, + { + "epoch": 1.1261674008810574, + "grad_norm": 1.8380657710321036, + "learning_rate": 8.750598470661625e-06, + "loss": 0.5710124969482422, + "step": 3196 + }, + { + "epoch": 1.1265198237885463, + "grad_norm": 1.76295044659038, + "learning_rate": 8.744817941191862e-06, + "loss": 0.6110632419586182, + "step": 3197 + }, + { + "epoch": 1.1268722466960353, + "grad_norm": 1.7274451742305768, + "learning_rate": 8.73903783781734e-06, + "loss": 0.5274624824523926, + "step": 3198 + }, + { + "epoch": 1.1272246696035242, + "grad_norm": 1.549070468504263, + "learning_rate": 8.733258162500228e-06, + "loss": 0.6144713163375854, + "step": 3199 + }, + { + "epoch": 1.1275770925110131, + "grad_norm": 1.8001185698886477, + "learning_rate": 8.727478917202551e-06, + "loss": 0.6404621005058289, + "step": 3200 + }, + { + "epoch": 1.1279295154185023, + "grad_norm": 1.602548541775438, + "learning_rate": 8.721700103886177e-06, + "loss": 0.5693025588989258, + "step": 3201 + }, + { + "epoch": 1.1282819383259912, + "grad_norm": 1.6563446017851289, + "learning_rate": 8.715921724512838e-06, + "loss": 0.5631159543991089, + "step": 3202 + }, + { + "epoch": 1.1286343612334802, + "grad_norm": 1.5785191171510689, + "learning_rate": 8.710143781044113e-06, + "loss": 0.648078441619873, + "step": 3203 + }, + { + "epoch": 1.128986784140969, + "grad_norm": 2.0721270642934666, + "learning_rate": 8.704366275441426e-06, + "loss": 0.6858379244804382, + "step": 3204 + }, + { + "epoch": 1.1293392070484582, + "grad_norm": 1.8203927475030908, + "learning_rate": 8.698589209666074e-06, + "loss": 0.7244000434875488, + "step": 3205 + }, + { + "epoch": 1.1296916299559472, + "grad_norm": 1.7775130777760553, + "learning_rate": 8.692812585679182e-06, + "loss": 0.5918365716934204, + "step": 3206 + }, + { + "epoch": 1.1300440528634361, + "grad_norm": 1.8950041670387165, + "learning_rate": 8.687036405441733e-06, + "loss": 0.6893443465232849, + "step": 3207 + }, + { + "epoch": 1.130396475770925, + "grad_norm": 1.6934464725865028, + "learning_rate": 8.681260670914564e-06, + "loss": 0.729834794998169, + "step": 3208 + }, + { + "epoch": 1.130748898678414, + "grad_norm": 1.9278305082183818, + "learning_rate": 8.675485384058356e-06, + "loss": 0.6525821685791016, + "step": 3209 + }, + { + "epoch": 1.1311013215859032, + "grad_norm": 1.7892045210081244, + "learning_rate": 8.669710546833642e-06, + "loss": 0.6799874305725098, + "step": 3210 + }, + { + "epoch": 1.131453744493392, + "grad_norm": 1.6216385781826248, + "learning_rate": 8.6639361612008e-06, + "loss": 0.5614932775497437, + "step": 3211 + }, + { + "epoch": 1.131806167400881, + "grad_norm": 1.6912315117870094, + "learning_rate": 8.658162229120045e-06, + "loss": 0.5975101590156555, + "step": 3212 + }, + { + "epoch": 1.13215859030837, + "grad_norm": 1.7352702737909875, + "learning_rate": 8.652388752551458e-06, + "loss": 0.5367887020111084, + "step": 3213 + }, + { + "epoch": 1.1325110132158591, + "grad_norm": 1.360358935584503, + "learning_rate": 8.646615733454949e-06, + "loss": 0.4451865553855896, + "step": 3214 + }, + { + "epoch": 1.132863436123348, + "grad_norm": 1.8983821913108012, + "learning_rate": 8.64084317379028e-06, + "loss": 0.6482576131820679, + "step": 3215 + }, + { + "epoch": 1.133215859030837, + "grad_norm": 1.5858394578763535, + "learning_rate": 8.635071075517053e-06, + "loss": 0.5890318155288696, + "step": 3216 + }, + { + "epoch": 1.133568281938326, + "grad_norm": 1.6567929917802857, + "learning_rate": 8.629299440594719e-06, + "loss": 0.554576575756073, + "step": 3217 + }, + { + "epoch": 1.1339207048458149, + "grad_norm": 1.6966150183280715, + "learning_rate": 8.623528270982567e-06, + "loss": 0.5987116694450378, + "step": 3218 + }, + { + "epoch": 1.134273127753304, + "grad_norm": 1.8696533969224407, + "learning_rate": 8.617757568639731e-06, + "loss": 0.49857625365257263, + "step": 3219 + }, + { + "epoch": 1.134625550660793, + "grad_norm": 1.6960564098429034, + "learning_rate": 8.61198733552518e-06, + "loss": 0.6116641759872437, + "step": 3220 + }, + { + "epoch": 1.134977973568282, + "grad_norm": 1.6619215502907394, + "learning_rate": 8.606217573597738e-06, + "loss": 0.4346674978733063, + "step": 3221 + }, + { + "epoch": 1.1353303964757708, + "grad_norm": 1.6058889875943096, + "learning_rate": 8.600448284816046e-06, + "loss": 0.6973283290863037, + "step": 3222 + }, + { + "epoch": 1.13568281938326, + "grad_norm": 1.547791232560021, + "learning_rate": 8.594679471138613e-06, + "loss": 0.5457896590232849, + "step": 3223 + }, + { + "epoch": 1.136035242290749, + "grad_norm": 1.6457593373386994, + "learning_rate": 8.58891113452376e-06, + "loss": 0.4520479440689087, + "step": 3224 + }, + { + "epoch": 1.1363876651982379, + "grad_norm": 1.6501706928794149, + "learning_rate": 8.58314327692966e-06, + "loss": 0.6169587969779968, + "step": 3225 + }, + { + "epoch": 1.1367400881057268, + "grad_norm": 1.729795732302939, + "learning_rate": 8.577375900314327e-06, + "loss": 0.6398670673370361, + "step": 3226 + }, + { + "epoch": 1.1370925110132157, + "grad_norm": 1.6846614829900397, + "learning_rate": 8.571609006635604e-06, + "loss": 0.5772207975387573, + "step": 3227 + }, + { + "epoch": 1.137444933920705, + "grad_norm": 1.5622430074284195, + "learning_rate": 8.565842597851165e-06, + "loss": 0.5561503171920776, + "step": 3228 + }, + { + "epoch": 1.1377973568281938, + "grad_norm": 1.644881271079104, + "learning_rate": 8.560076675918537e-06, + "loss": 0.4702373743057251, + "step": 3229 + }, + { + "epoch": 1.1381497797356828, + "grad_norm": 1.778044829497574, + "learning_rate": 8.554311242795061e-06, + "loss": 0.5967564582824707, + "step": 3230 + }, + { + "epoch": 1.138502202643172, + "grad_norm": 1.782270527802186, + "learning_rate": 8.548546300437928e-06, + "loss": 0.4749453663825989, + "step": 3231 + }, + { + "epoch": 1.1388546255506609, + "grad_norm": 2.2009062727733046, + "learning_rate": 8.542781850804155e-06, + "loss": 0.6939869523048401, + "step": 3232 + }, + { + "epoch": 1.1392070484581498, + "grad_norm": 1.4327701228186707, + "learning_rate": 8.537017895850593e-06, + "loss": 0.5618892908096313, + "step": 3233 + }, + { + "epoch": 1.1395594713656387, + "grad_norm": 1.6784618730938181, + "learning_rate": 8.531254437533925e-06, + "loss": 0.6627654433250427, + "step": 3234 + }, + { + "epoch": 1.1399118942731277, + "grad_norm": 1.770712809653697, + "learning_rate": 8.525491477810671e-06, + "loss": 0.6365151405334473, + "step": 3235 + }, + { + "epoch": 1.1402643171806168, + "grad_norm": 1.6623213186798471, + "learning_rate": 8.519729018637164e-06, + "loss": 0.5207303762435913, + "step": 3236 + }, + { + "epoch": 1.1406167400881058, + "grad_norm": 1.8240600257881658, + "learning_rate": 8.513967061969594e-06, + "loss": 0.7469059228897095, + "step": 3237 + }, + { + "epoch": 1.1409691629955947, + "grad_norm": 1.7786802310337648, + "learning_rate": 8.508205609763955e-06, + "loss": 0.5778630971908569, + "step": 3238 + }, + { + "epoch": 1.1413215859030836, + "grad_norm": 1.756406665695002, + "learning_rate": 8.502444663976089e-06, + "loss": 0.5447480082511902, + "step": 3239 + }, + { + "epoch": 1.1416740088105728, + "grad_norm": 1.628690443424602, + "learning_rate": 8.496684226561653e-06, + "loss": 0.6002986431121826, + "step": 3240 + }, + { + "epoch": 1.1420264317180617, + "grad_norm": 1.7257255594282812, + "learning_rate": 8.490924299476133e-06, + "loss": 0.7627072930335999, + "step": 3241 + }, + { + "epoch": 1.1423788546255507, + "grad_norm": 1.725113553289998, + "learning_rate": 8.485164884674854e-06, + "loss": 0.6406078338623047, + "step": 3242 + }, + { + "epoch": 1.1427312775330396, + "grad_norm": 2.110533369358698, + "learning_rate": 8.479405984112949e-06, + "loss": 0.47047436237335205, + "step": 3243 + }, + { + "epoch": 1.1430837004405285, + "grad_norm": 2.0564519486525903, + "learning_rate": 8.473647599745393e-06, + "loss": 0.6702529191970825, + "step": 3244 + }, + { + "epoch": 1.1434361233480177, + "grad_norm": 2.1168699536348488, + "learning_rate": 8.467889733526977e-06, + "loss": 0.6570258140563965, + "step": 3245 + }, + { + "epoch": 1.1437885462555066, + "grad_norm": 11.021488641985083, + "learning_rate": 8.462132387412312e-06, + "loss": 0.6248423457145691, + "step": 3246 + }, + { + "epoch": 1.1441409691629956, + "grad_norm": 1.6339128666105858, + "learning_rate": 8.456375563355842e-06, + "loss": 0.7377427816390991, + "step": 3247 + }, + { + "epoch": 1.1444933920704845, + "grad_norm": 1.8159484011485405, + "learning_rate": 8.45061926331183e-06, + "loss": 0.6469020843505859, + "step": 3248 + }, + { + "epoch": 1.1448458149779737, + "grad_norm": 1.81461416151687, + "learning_rate": 8.444863489234356e-06, + "loss": 0.6417430639266968, + "step": 3249 + }, + { + "epoch": 1.1451982378854626, + "grad_norm": 1.7715952211280361, + "learning_rate": 8.439108243077335e-06, + "loss": 0.5447275638580322, + "step": 3250 + }, + { + "epoch": 1.1455506607929515, + "grad_norm": 1.8341737914542349, + "learning_rate": 8.433353526794484e-06, + "loss": 0.6621315479278564, + "step": 3251 + }, + { + "epoch": 1.1459030837004405, + "grad_norm": 1.850872292820976, + "learning_rate": 8.42759934233936e-06, + "loss": 0.5660392045974731, + "step": 3252 + }, + { + "epoch": 1.1462555066079294, + "grad_norm": 1.695638018183687, + "learning_rate": 8.42184569166532e-06, + "loss": 0.43074172735214233, + "step": 3253 + }, + { + "epoch": 1.1466079295154186, + "grad_norm": 1.6152519611154568, + "learning_rate": 8.416092576725554e-06, + "loss": 0.5863226056098938, + "step": 3254 + }, + { + "epoch": 1.1469603524229075, + "grad_norm": 1.8724827582882198, + "learning_rate": 8.410339999473067e-06, + "loss": 0.6003422737121582, + "step": 3255 + }, + { + "epoch": 1.1473127753303964, + "grad_norm": 1.806876842860533, + "learning_rate": 8.404587961860678e-06, + "loss": 0.6109241247177124, + "step": 3256 + }, + { + "epoch": 1.1476651982378854, + "grad_norm": 1.7768687099142642, + "learning_rate": 8.398836465841021e-06, + "loss": 0.5749140977859497, + "step": 3257 + }, + { + "epoch": 1.1480176211453745, + "grad_norm": 1.762377433704451, + "learning_rate": 8.393085513366557e-06, + "loss": 0.6920739412307739, + "step": 3258 + }, + { + "epoch": 1.1483700440528635, + "grad_norm": 1.903311052790267, + "learning_rate": 8.38733510638955e-06, + "loss": 0.6632573008537292, + "step": 3259 + }, + { + "epoch": 1.1487224669603524, + "grad_norm": 1.925929272799836, + "learning_rate": 8.381585246862091e-06, + "loss": 0.6396503448486328, + "step": 3260 + }, + { + "epoch": 1.1490748898678413, + "grad_norm": 4.327872701462553, + "learning_rate": 8.375835936736072e-06, + "loss": 0.5975937843322754, + "step": 3261 + }, + { + "epoch": 1.1494273127753303, + "grad_norm": 1.9097739370767552, + "learning_rate": 8.370087177963204e-06, + "loss": 0.6297920346260071, + "step": 3262 + }, + { + "epoch": 1.1497797356828194, + "grad_norm": 1.6773858737351708, + "learning_rate": 8.364338972495016e-06, + "loss": 0.7004375457763672, + "step": 3263 + }, + { + "epoch": 1.1501321585903084, + "grad_norm": 1.9905333664754346, + "learning_rate": 8.358591322282845e-06, + "loss": 0.5850871801376343, + "step": 3264 + }, + { + "epoch": 1.1504845814977973, + "grad_norm": 1.6216139435027066, + "learning_rate": 8.352844229277834e-06, + "loss": 0.493900865316391, + "step": 3265 + }, + { + "epoch": 1.1508370044052865, + "grad_norm": 1.8994324319983171, + "learning_rate": 8.34709769543095e-06, + "loss": 0.573354959487915, + "step": 3266 + }, + { + "epoch": 1.1511894273127754, + "grad_norm": 2.1672972359364175, + "learning_rate": 8.341351722692951e-06, + "loss": 0.7154442667961121, + "step": 3267 + }, + { + "epoch": 1.1515418502202643, + "grad_norm": 1.705511845117997, + "learning_rate": 8.335606313014432e-06, + "loss": 0.5429074764251709, + "step": 3268 + }, + { + "epoch": 1.1518942731277533, + "grad_norm": 1.8606068751906144, + "learning_rate": 8.329861468345768e-06, + "loss": 0.6938891410827637, + "step": 3269 + }, + { + "epoch": 1.1522466960352422, + "grad_norm": 5.765839224937511, + "learning_rate": 8.324117190637157e-06, + "loss": 0.7114205360412598, + "step": 3270 + }, + { + "epoch": 1.1525991189427314, + "grad_norm": 1.761532917196708, + "learning_rate": 8.318373481838605e-06, + "loss": 0.5353071093559265, + "step": 3271 + }, + { + "epoch": 1.1529515418502203, + "grad_norm": 1.931038515640054, + "learning_rate": 8.312630343899921e-06, + "loss": 0.7838516235351562, + "step": 3272 + }, + { + "epoch": 1.1533039647577092, + "grad_norm": 2.013028743927059, + "learning_rate": 8.306887778770724e-06, + "loss": 0.630479633808136, + "step": 3273 + }, + { + "epoch": 1.1536563876651982, + "grad_norm": 1.908388737326531, + "learning_rate": 8.301145788400438e-06, + "loss": 0.6568116545677185, + "step": 3274 + }, + { + "epoch": 1.1540088105726873, + "grad_norm": 1.4673620532583986, + "learning_rate": 8.295404374738278e-06, + "loss": 0.5410804748535156, + "step": 3275 + }, + { + "epoch": 1.1543612334801763, + "grad_norm": 2.0887831204496017, + "learning_rate": 8.289663539733292e-06, + "loss": 0.6699862480163574, + "step": 3276 + }, + { + "epoch": 1.1547136563876652, + "grad_norm": 2.146352543425904, + "learning_rate": 8.283923285334304e-06, + "loss": 0.6828576326370239, + "step": 3277 + }, + { + "epoch": 1.1550660792951541, + "grad_norm": 1.6441665475307043, + "learning_rate": 8.278183613489951e-06, + "loss": 0.5569214820861816, + "step": 3278 + }, + { + "epoch": 1.155418502202643, + "grad_norm": 1.5736783771881073, + "learning_rate": 8.27244452614868e-06, + "loss": 0.6276477575302124, + "step": 3279 + }, + { + "epoch": 1.1557709251101322, + "grad_norm": 1.639795393267647, + "learning_rate": 8.266706025258727e-06, + "loss": 0.5752792954444885, + "step": 3280 + }, + { + "epoch": 1.1561233480176212, + "grad_norm": 1.8007170708068962, + "learning_rate": 8.260968112768137e-06, + "loss": 0.6149388551712036, + "step": 3281 + }, + { + "epoch": 1.1564757709251101, + "grad_norm": 1.8241425629966381, + "learning_rate": 8.255230790624755e-06, + "loss": 0.6399196982383728, + "step": 3282 + }, + { + "epoch": 1.156828193832599, + "grad_norm": 1.8065599712551461, + "learning_rate": 8.249494060776215e-06, + "loss": 0.6927458047866821, + "step": 3283 + }, + { + "epoch": 1.1571806167400882, + "grad_norm": 1.5535864037785454, + "learning_rate": 8.243757925169968e-06, + "loss": 0.5843946933746338, + "step": 3284 + }, + { + "epoch": 1.1575330396475771, + "grad_norm": 1.7771012211418213, + "learning_rate": 8.238022385753248e-06, + "loss": 0.6469332575798035, + "step": 3285 + }, + { + "epoch": 1.157885462555066, + "grad_norm": 1.5500454202505596, + "learning_rate": 8.23228744447309e-06, + "loss": 0.572630763053894, + "step": 3286 + }, + { + "epoch": 1.158237885462555, + "grad_norm": 1.7219264264044976, + "learning_rate": 8.226553103276335e-06, + "loss": 0.6872239112854004, + "step": 3287 + }, + { + "epoch": 1.158590308370044, + "grad_norm": 1.7206454172461807, + "learning_rate": 8.220819364109607e-06, + "loss": 0.5116995573043823, + "step": 3288 + }, + { + "epoch": 1.1589427312775331, + "grad_norm": 1.7411708693012447, + "learning_rate": 8.215086228919336e-06, + "loss": 0.6179347038269043, + "step": 3289 + }, + { + "epoch": 1.159295154185022, + "grad_norm": 1.5098995111565061, + "learning_rate": 8.209353699651745e-06, + "loss": 0.573688805103302, + "step": 3290 + }, + { + "epoch": 1.159647577092511, + "grad_norm": 1.6209084651188936, + "learning_rate": 8.20362177825284e-06, + "loss": 0.6622583866119385, + "step": 3291 + }, + { + "epoch": 1.16, + "grad_norm": 1.9043199482736668, + "learning_rate": 8.197890466668441e-06, + "loss": 0.4945096969604492, + "step": 3292 + }, + { + "epoch": 1.160352422907489, + "grad_norm": 1.9477760218669748, + "learning_rate": 8.19215976684414e-06, + "loss": 0.5657082796096802, + "step": 3293 + }, + { + "epoch": 1.160704845814978, + "grad_norm": 1.7301213281073105, + "learning_rate": 8.186429680725339e-06, + "loss": 0.5684623122215271, + "step": 3294 + }, + { + "epoch": 1.161057268722467, + "grad_norm": 1.7781389678625354, + "learning_rate": 8.180700210257223e-06, + "loss": 0.567638635635376, + "step": 3295 + }, + { + "epoch": 1.1614096916299559, + "grad_norm": 1.8973989975016394, + "learning_rate": 8.174971357384762e-06, + "loss": 0.7182992696762085, + "step": 3296 + }, + { + "epoch": 1.1617621145374448, + "grad_norm": 1.7202032555937063, + "learning_rate": 8.169243124052731e-06, + "loss": 0.7188737392425537, + "step": 3297 + }, + { + "epoch": 1.162114537444934, + "grad_norm": 1.632750713102644, + "learning_rate": 8.163515512205687e-06, + "loss": 0.5532418489456177, + "step": 3298 + }, + { + "epoch": 1.162466960352423, + "grad_norm": 2.2725291479645136, + "learning_rate": 8.157788523787967e-06, + "loss": 0.7167447209358215, + "step": 3299 + }, + { + "epoch": 1.1628193832599119, + "grad_norm": 1.8053860419209504, + "learning_rate": 8.152062160743716e-06, + "loss": 0.633411169052124, + "step": 3300 + }, + { + "epoch": 1.1631718061674008, + "grad_norm": 1.8006555184567121, + "learning_rate": 8.146336425016849e-06, + "loss": 0.6686321496963501, + "step": 3301 + }, + { + "epoch": 1.16352422907489, + "grad_norm": 1.884331587638867, + "learning_rate": 8.140611318551078e-06, + "loss": 0.608701765537262, + "step": 3302 + }, + { + "epoch": 1.1638766519823789, + "grad_norm": 1.6532674404979102, + "learning_rate": 8.1348868432899e-06, + "loss": 0.5607466101646423, + "step": 3303 + }, + { + "epoch": 1.1642290748898678, + "grad_norm": 1.9224536271892947, + "learning_rate": 8.12916300117659e-06, + "loss": 0.6397457122802734, + "step": 3304 + }, + { + "epoch": 1.1645814977973568, + "grad_norm": 1.9075190910370474, + "learning_rate": 8.123439794154223e-06, + "loss": 0.6681507229804993, + "step": 3305 + }, + { + "epoch": 1.1649339207048457, + "grad_norm": 1.7601065273352539, + "learning_rate": 8.117717224165645e-06, + "loss": 0.5549972057342529, + "step": 3306 + }, + { + "epoch": 1.1652863436123349, + "grad_norm": 1.9981914923817063, + "learning_rate": 8.111995293153486e-06, + "loss": 0.7519058585166931, + "step": 3307 + }, + { + "epoch": 1.1656387665198238, + "grad_norm": 1.8817978978557874, + "learning_rate": 8.106274003060172e-06, + "loss": 0.7100121378898621, + "step": 3308 + }, + { + "epoch": 1.1659911894273127, + "grad_norm": 2.081586750876693, + "learning_rate": 8.100553355827897e-06, + "loss": 0.6297321319580078, + "step": 3309 + }, + { + "epoch": 1.1663436123348019, + "grad_norm": 2.2854313216105635, + "learning_rate": 8.094833353398645e-06, + "loss": 0.6875895857810974, + "step": 3310 + }, + { + "epoch": 1.1666960352422908, + "grad_norm": 1.7297215389141958, + "learning_rate": 8.08911399771418e-06, + "loss": 0.5369099974632263, + "step": 3311 + }, + { + "epoch": 1.1670484581497798, + "grad_norm": 1.7209622601094259, + "learning_rate": 8.083395290716042e-06, + "loss": 0.5598124265670776, + "step": 3312 + }, + { + "epoch": 1.1674008810572687, + "grad_norm": 1.6153396072397332, + "learning_rate": 8.077677234345557e-06, + "loss": 0.6438342332839966, + "step": 3313 + }, + { + "epoch": 1.1677533039647576, + "grad_norm": 1.649767256033485, + "learning_rate": 8.07195983054383e-06, + "loss": 0.5558618307113647, + "step": 3314 + }, + { + "epoch": 1.1681057268722468, + "grad_norm": 1.744681713922102, + "learning_rate": 8.06624308125173e-06, + "loss": 0.5729602575302124, + "step": 3315 + }, + { + "epoch": 1.1684581497797357, + "grad_norm": 2.294706401477936, + "learning_rate": 8.060526988409929e-06, + "loss": 0.5094903707504272, + "step": 3316 + }, + { + "epoch": 1.1688105726872247, + "grad_norm": 1.6352779890455922, + "learning_rate": 8.054811553958853e-06, + "loss": 0.6605818867683411, + "step": 3317 + }, + { + "epoch": 1.1691629955947136, + "grad_norm": 2.240048633930669, + "learning_rate": 8.04909677983872e-06, + "loss": 0.7929576635360718, + "step": 3318 + }, + { + "epoch": 1.1695154185022028, + "grad_norm": 1.7445241989865017, + "learning_rate": 8.043382667989514e-06, + "loss": 0.5915192365646362, + "step": 3319 + }, + { + "epoch": 1.1698678414096917, + "grad_norm": 1.6537456786938194, + "learning_rate": 8.037669220351e-06, + "loss": 0.5923853516578674, + "step": 3320 + }, + { + "epoch": 1.1702202643171806, + "grad_norm": 1.7692219343864357, + "learning_rate": 8.031956438862718e-06, + "loss": 0.7034223079681396, + "step": 3321 + }, + { + "epoch": 1.1705726872246696, + "grad_norm": 1.699093684077835, + "learning_rate": 8.026244325463975e-06, + "loss": 0.6093307733535767, + "step": 3322 + }, + { + "epoch": 1.1709251101321585, + "grad_norm": 1.820021264359909, + "learning_rate": 8.020532882093862e-06, + "loss": 0.5709424614906311, + "step": 3323 + }, + { + "epoch": 1.1712775330396477, + "grad_norm": 1.6327248259933085, + "learning_rate": 8.01482211069123e-06, + "loss": 0.5242069959640503, + "step": 3324 + }, + { + "epoch": 1.1716299559471366, + "grad_norm": 1.8755413800206977, + "learning_rate": 8.009112013194707e-06, + "loss": 0.5869580507278442, + "step": 3325 + }, + { + "epoch": 1.1719823788546255, + "grad_norm": 1.927667149386539, + "learning_rate": 8.0034025915427e-06, + "loss": 0.7281460762023926, + "step": 3326 + }, + { + "epoch": 1.1723348017621145, + "grad_norm": 1.8020991914636244, + "learning_rate": 7.997693847673378e-06, + "loss": 0.6877723336219788, + "step": 3327 + }, + { + "epoch": 1.1726872246696036, + "grad_norm": 1.4739994768631006, + "learning_rate": 7.991985783524676e-06, + "loss": 0.6045002937316895, + "step": 3328 + }, + { + "epoch": 1.1730396475770926, + "grad_norm": 1.7637996531853402, + "learning_rate": 7.986278401034315e-06, + "loss": 0.5698690414428711, + "step": 3329 + }, + { + "epoch": 1.1733920704845815, + "grad_norm": 1.879664532548966, + "learning_rate": 7.980571702139759e-06, + "loss": 0.6802438497543335, + "step": 3330 + }, + { + "epoch": 1.1737444933920704, + "grad_norm": 1.9432824884843154, + "learning_rate": 7.974865688778271e-06, + "loss": 0.5840654373168945, + "step": 3331 + }, + { + "epoch": 1.1740969162995594, + "grad_norm": 1.7557288678447098, + "learning_rate": 7.969160362886855e-06, + "loss": 0.5203073024749756, + "step": 3332 + }, + { + "epoch": 1.1744493392070485, + "grad_norm": 1.5188701776399616, + "learning_rate": 7.963455726402292e-06, + "loss": 0.4558306932449341, + "step": 3333 + }, + { + "epoch": 1.1748017621145375, + "grad_norm": 1.8464169088081481, + "learning_rate": 7.957751781261132e-06, + "loss": 0.6200483441352844, + "step": 3334 + }, + { + "epoch": 1.1751541850220264, + "grad_norm": 1.4009839443781218, + "learning_rate": 7.952048529399686e-06, + "loss": 0.559386670589447, + "step": 3335 + }, + { + "epoch": 1.1755066079295153, + "grad_norm": 1.5776847118393618, + "learning_rate": 7.946345972754026e-06, + "loss": 0.5521356463432312, + "step": 3336 + }, + { + "epoch": 1.1758590308370045, + "grad_norm": 1.6725655120909741, + "learning_rate": 7.940644113260001e-06, + "loss": 0.6235495805740356, + "step": 3337 + }, + { + "epoch": 1.1762114537444934, + "grad_norm": 1.6364629990686756, + "learning_rate": 7.934942952853203e-06, + "loss": 0.5196648836135864, + "step": 3338 + }, + { + "epoch": 1.1765638766519824, + "grad_norm": 1.658819201732712, + "learning_rate": 7.929242493469013e-06, + "loss": 0.5959422588348389, + "step": 3339 + }, + { + "epoch": 1.1769162995594713, + "grad_norm": 1.8867606277211662, + "learning_rate": 7.923542737042549e-06, + "loss": 0.5400167107582092, + "step": 3340 + }, + { + "epoch": 1.1772687224669602, + "grad_norm": 1.8686352871929341, + "learning_rate": 7.917843685508702e-06, + "loss": 0.688996434211731, + "step": 3341 + }, + { + "epoch": 1.1776211453744494, + "grad_norm": 1.844624213320976, + "learning_rate": 7.912145340802127e-06, + "loss": 0.623216450214386, + "step": 3342 + }, + { + "epoch": 1.1779735682819383, + "grad_norm": 1.7951119497780943, + "learning_rate": 7.906447704857233e-06, + "loss": 0.587382435798645, + "step": 3343 + }, + { + "epoch": 1.1783259911894273, + "grad_norm": 1.4508698182802122, + "learning_rate": 7.900750779608187e-06, + "loss": 0.6033053398132324, + "step": 3344 + }, + { + "epoch": 1.1786784140969162, + "grad_norm": 1.5026274052311877, + "learning_rate": 7.895054566988924e-06, + "loss": 0.557671308517456, + "step": 3345 + }, + { + "epoch": 1.1790308370044054, + "grad_norm": 1.6193785911353318, + "learning_rate": 7.889359068933122e-06, + "loss": 0.4550681710243225, + "step": 3346 + }, + { + "epoch": 1.1793832599118943, + "grad_norm": 1.7532225132073032, + "learning_rate": 7.883664287374235e-06, + "loss": 0.6417531967163086, + "step": 3347 + }, + { + "epoch": 1.1797356828193832, + "grad_norm": 2.046641045277204, + "learning_rate": 7.877970224245458e-06, + "loss": 0.703549861907959, + "step": 3348 + }, + { + "epoch": 1.1800881057268722, + "grad_norm": 1.9966595548369739, + "learning_rate": 7.87227688147975e-06, + "loss": 0.7438976764678955, + "step": 3349 + }, + { + "epoch": 1.1804405286343613, + "grad_norm": 1.9757665254478705, + "learning_rate": 7.866584261009823e-06, + "loss": 0.5563932657241821, + "step": 3350 + }, + { + "epoch": 1.1807929515418503, + "grad_norm": 1.9705828017858218, + "learning_rate": 7.860892364768145e-06, + "loss": 0.6332740783691406, + "step": 3351 + }, + { + "epoch": 1.1811453744493392, + "grad_norm": 1.6800252042998722, + "learning_rate": 7.855201194686938e-06, + "loss": 0.5207923650741577, + "step": 3352 + }, + { + "epoch": 1.1814977973568281, + "grad_norm": 1.704285155728578, + "learning_rate": 7.849510752698179e-06, + "loss": 0.5930209755897522, + "step": 3353 + }, + { + "epoch": 1.1818502202643173, + "grad_norm": 1.9626347095192314, + "learning_rate": 7.843821040733588e-06, + "loss": 0.6207472085952759, + "step": 3354 + }, + { + "epoch": 1.1822026431718062, + "grad_norm": 1.631891920380694, + "learning_rate": 7.838132060724657e-06, + "loss": 0.5487867593765259, + "step": 3355 + }, + { + "epoch": 1.1825550660792952, + "grad_norm": 1.719446635213068, + "learning_rate": 7.83244381460261e-06, + "loss": 0.5457941889762878, + "step": 3356 + }, + { + "epoch": 1.182907488986784, + "grad_norm": 4.79087339281713, + "learning_rate": 7.826756304298428e-06, + "loss": 0.5203769207000732, + "step": 3357 + }, + { + "epoch": 1.183259911894273, + "grad_norm": 2.2130523974851006, + "learning_rate": 7.821069531742848e-06, + "loss": 0.7241770029067993, + "step": 3358 + }, + { + "epoch": 1.1836123348017622, + "grad_norm": 1.872241533824603, + "learning_rate": 7.815383498866351e-06, + "loss": 0.5085904598236084, + "step": 3359 + }, + { + "epoch": 1.1839647577092511, + "grad_norm": 1.7457024495825946, + "learning_rate": 7.80969820759917e-06, + "loss": 0.6219276785850525, + "step": 3360 + }, + { + "epoch": 1.18431718061674, + "grad_norm": 1.657619548935653, + "learning_rate": 7.804013659871286e-06, + "loss": 0.5621576309204102, + "step": 3361 + }, + { + "epoch": 1.184669603524229, + "grad_norm": 2.006942738555184, + "learning_rate": 7.798329857612415e-06, + "loss": 0.6862529516220093, + "step": 3362 + }, + { + "epoch": 1.1850220264317182, + "grad_norm": 1.6254700608957282, + "learning_rate": 7.792646802752045e-06, + "loss": 0.5536706447601318, + "step": 3363 + }, + { + "epoch": 1.185374449339207, + "grad_norm": 1.8365676060407183, + "learning_rate": 7.786964497219389e-06, + "loss": 0.7158493995666504, + "step": 3364 + }, + { + "epoch": 1.185726872246696, + "grad_norm": 1.5882377854785632, + "learning_rate": 7.781282942943411e-06, + "loss": 0.6510338187217712, + "step": 3365 + }, + { + "epoch": 1.186079295154185, + "grad_norm": 1.6887309758558333, + "learning_rate": 7.775602141852827e-06, + "loss": 0.4999651312828064, + "step": 3366 + }, + { + "epoch": 1.186431718061674, + "grad_norm": 1.7482854003458987, + "learning_rate": 7.769922095876088e-06, + "loss": 0.566371738910675, + "step": 3367 + }, + { + "epoch": 1.186784140969163, + "grad_norm": 1.8523910267151578, + "learning_rate": 7.764242806941396e-06, + "loss": 0.6424880623817444, + "step": 3368 + }, + { + "epoch": 1.187136563876652, + "grad_norm": 1.7770666290685069, + "learning_rate": 7.758564276976696e-06, + "loss": 0.6731792688369751, + "step": 3369 + }, + { + "epoch": 1.187488986784141, + "grad_norm": 1.8284341736993877, + "learning_rate": 7.752886507909661e-06, + "loss": 0.7350698113441467, + "step": 3370 + }, + { + "epoch": 1.1878414096916299, + "grad_norm": 1.6211597569244138, + "learning_rate": 7.747209501667729e-06, + "loss": 0.49212586879730225, + "step": 3371 + }, + { + "epoch": 1.188193832599119, + "grad_norm": 1.8399284999038652, + "learning_rate": 7.741533260178058e-06, + "loss": 0.46775591373443604, + "step": 3372 + }, + { + "epoch": 1.188546255506608, + "grad_norm": 1.9173381710912725, + "learning_rate": 7.73585778536756e-06, + "loss": 0.7006367444992065, + "step": 3373 + }, + { + "epoch": 1.188898678414097, + "grad_norm": 1.9011259462553447, + "learning_rate": 7.730183079162882e-06, + "loss": 0.6403789520263672, + "step": 3374 + }, + { + "epoch": 1.1892511013215858, + "grad_norm": 1.7192698764020407, + "learning_rate": 7.724509143490409e-06, + "loss": 0.5788881778717041, + "step": 3375 + }, + { + "epoch": 1.1896035242290748, + "grad_norm": 1.8160886708158774, + "learning_rate": 7.718835980276265e-06, + "loss": 0.5216118693351746, + "step": 3376 + }, + { + "epoch": 1.189955947136564, + "grad_norm": 1.8022868379388808, + "learning_rate": 7.713163591446318e-06, + "loss": 0.5951248407363892, + "step": 3377 + }, + { + "epoch": 1.1903083700440529, + "grad_norm": 1.7460515067285554, + "learning_rate": 7.707491978926157e-06, + "loss": 0.4975050687789917, + "step": 3378 + }, + { + "epoch": 1.1906607929515418, + "grad_norm": 1.770763460120106, + "learning_rate": 7.701821144641127e-06, + "loss": 0.6019243001937866, + "step": 3379 + }, + { + "epoch": 1.1910132158590307, + "grad_norm": 1.7832166509700509, + "learning_rate": 7.696151090516292e-06, + "loss": 0.6395450830459595, + "step": 3380 + }, + { + "epoch": 1.19136563876652, + "grad_norm": 1.6347986183513594, + "learning_rate": 7.690481818476468e-06, + "loss": 0.579787015914917, + "step": 3381 + }, + { + "epoch": 1.1917180616740088, + "grad_norm": 1.4744637046036069, + "learning_rate": 7.684813330446191e-06, + "loss": 0.5136005878448486, + "step": 3382 + }, + { + "epoch": 1.1920704845814978, + "grad_norm": 1.7266158280823927, + "learning_rate": 7.679145628349734e-06, + "loss": 0.6639782190322876, + "step": 3383 + }, + { + "epoch": 1.1924229074889867, + "grad_norm": 1.8900727159770023, + "learning_rate": 7.673478714111111e-06, + "loss": 0.5575984716415405, + "step": 3384 + }, + { + "epoch": 1.1927753303964757, + "grad_norm": 2.0885094289190658, + "learning_rate": 7.667812589654062e-06, + "loss": 0.6456045508384705, + "step": 3385 + }, + { + "epoch": 1.1931277533039648, + "grad_norm": 1.9286041654650978, + "learning_rate": 7.662147256902055e-06, + "loss": 0.6936196088790894, + "step": 3386 + }, + { + "epoch": 1.1934801762114537, + "grad_norm": 1.758654368664718, + "learning_rate": 7.656482717778299e-06, + "loss": 0.5490384697914124, + "step": 3387 + }, + { + "epoch": 1.1938325991189427, + "grad_norm": 1.9621511017976598, + "learning_rate": 7.650818974205727e-06, + "loss": 0.6973621845245361, + "step": 3388 + }, + { + "epoch": 1.1941850220264318, + "grad_norm": 1.835769632858156, + "learning_rate": 7.645156028107005e-06, + "loss": 0.7471047639846802, + "step": 3389 + }, + { + "epoch": 1.1945374449339208, + "grad_norm": 1.7902415027725214, + "learning_rate": 7.639493881404526e-06, + "loss": 0.6205108165740967, + "step": 3390 + }, + { + "epoch": 1.1948898678414097, + "grad_norm": 1.6920866725907067, + "learning_rate": 7.63383253602041e-06, + "loss": 0.747038722038269, + "step": 3391 + }, + { + "epoch": 1.1952422907488987, + "grad_norm": 1.5771320255200836, + "learning_rate": 7.628171993876514e-06, + "loss": 0.5185794830322266, + "step": 3392 + }, + { + "epoch": 1.1955947136563876, + "grad_norm": 1.6878325344643712, + "learning_rate": 7.6225122568944124e-06, + "loss": 0.6059385538101196, + "step": 3393 + }, + { + "epoch": 1.1959471365638767, + "grad_norm": 1.6275144870635614, + "learning_rate": 7.6168533269954045e-06, + "loss": 0.5154507160186768, + "step": 3394 + }, + { + "epoch": 1.1962995594713657, + "grad_norm": 1.8584269669132367, + "learning_rate": 7.611195206100529e-06, + "loss": 0.684306263923645, + "step": 3395 + }, + { + "epoch": 1.1966519823788546, + "grad_norm": 1.60676147024925, + "learning_rate": 7.605537896130537e-06, + "loss": 0.5637205839157104, + "step": 3396 + }, + { + "epoch": 1.1970044052863436, + "grad_norm": 2.099988274984523, + "learning_rate": 7.599881399005913e-06, + "loss": 0.700809121131897, + "step": 3397 + }, + { + "epoch": 1.1973568281938327, + "grad_norm": 1.8285381374549698, + "learning_rate": 7.594225716646859e-06, + "loss": 0.45139041543006897, + "step": 3398 + }, + { + "epoch": 1.1977092511013216, + "grad_norm": 1.9616153744225684, + "learning_rate": 7.588570850973301e-06, + "loss": 0.6623016595840454, + "step": 3399 + }, + { + "epoch": 1.1980616740088106, + "grad_norm": 1.5510325285611402, + "learning_rate": 7.582916803904899e-06, + "loss": 0.47430598735809326, + "step": 3400 + }, + { + "epoch": 1.1984140969162995, + "grad_norm": 1.7180906175268718, + "learning_rate": 7.57726357736101e-06, + "loss": 0.7190637588500977, + "step": 3401 + }, + { + "epoch": 1.1987665198237885, + "grad_norm": 1.4703339836450204, + "learning_rate": 7.571611173260747e-06, + "loss": 0.552079439163208, + "step": 3402 + }, + { + "epoch": 1.1991189427312776, + "grad_norm": 1.665813020849203, + "learning_rate": 7.565959593522914e-06, + "loss": 0.5499744415283203, + "step": 3403 + }, + { + "epoch": 1.1994713656387666, + "grad_norm": 1.6507149154277247, + "learning_rate": 7.560308840066046e-06, + "loss": 0.6013774871826172, + "step": 3404 + }, + { + "epoch": 1.1998237885462555, + "grad_norm": 1.5847999964914972, + "learning_rate": 7.554658914808404e-06, + "loss": 0.5489538908004761, + "step": 3405 + }, + { + "epoch": 1.2001762114537444, + "grad_norm": 1.72263968265959, + "learning_rate": 7.549009819667956e-06, + "loss": 0.6124382615089417, + "step": 3406 + }, + { + "epoch": 1.2005286343612336, + "grad_norm": 2.1073738195754594, + "learning_rate": 7.543361556562397e-06, + "loss": 0.6895862817764282, + "step": 3407 + }, + { + "epoch": 1.2008810572687225, + "grad_norm": 2.063900978481081, + "learning_rate": 7.537714127409139e-06, + "loss": 0.6632197499275208, + "step": 3408 + }, + { + "epoch": 1.2012334801762115, + "grad_norm": 1.6352648722318401, + "learning_rate": 7.5320675341253e-06, + "loss": 0.5940145254135132, + "step": 3409 + }, + { + "epoch": 1.2015859030837004, + "grad_norm": 1.884013328310988, + "learning_rate": 7.526421778627735e-06, + "loss": 0.646323561668396, + "step": 3410 + }, + { + "epoch": 1.2019383259911893, + "grad_norm": 1.7070941231545174, + "learning_rate": 7.520776862832993e-06, + "loss": 0.6173659563064575, + "step": 3411 + }, + { + "epoch": 1.2022907488986785, + "grad_norm": 1.8582208465763577, + "learning_rate": 7.515132788657347e-06, + "loss": 0.574191689491272, + "step": 3412 + }, + { + "epoch": 1.2026431718061674, + "grad_norm": 1.9220370982111243, + "learning_rate": 7.50948955801679e-06, + "loss": 0.6243089437484741, + "step": 3413 + }, + { + "epoch": 1.2029955947136564, + "grad_norm": 1.7949632694678572, + "learning_rate": 7.503847172827022e-06, + "loss": 0.692270040512085, + "step": 3414 + }, + { + "epoch": 1.2033480176211453, + "grad_norm": 1.6803082040464332, + "learning_rate": 7.498205635003451e-06, + "loss": 0.5929970145225525, + "step": 3415 + }, + { + "epoch": 1.2037004405286345, + "grad_norm": 1.6077232593078599, + "learning_rate": 7.4925649464612126e-06, + "loss": 0.5479272603988647, + "step": 3416 + }, + { + "epoch": 1.2040528634361234, + "grad_norm": 1.5415384890909907, + "learning_rate": 7.486925109115135e-06, + "loss": 0.5923635363578796, + "step": 3417 + }, + { + "epoch": 1.2044052863436123, + "grad_norm": 1.7506756122488851, + "learning_rate": 7.48128612487978e-06, + "loss": 0.6530192494392395, + "step": 3418 + }, + { + "epoch": 1.2047577092511013, + "grad_norm": 1.533550542452438, + "learning_rate": 7.475647995669397e-06, + "loss": 0.5104716420173645, + "step": 3419 + }, + { + "epoch": 1.2051101321585902, + "grad_norm": 1.8415327152950194, + "learning_rate": 7.470010723397958e-06, + "loss": 0.6526790261268616, + "step": 3420 + }, + { + "epoch": 1.2054625550660794, + "grad_norm": 1.746747219195987, + "learning_rate": 7.464374309979143e-06, + "loss": 0.5985254645347595, + "step": 3421 + }, + { + "epoch": 1.2058149779735683, + "grad_norm": 1.9679342498420438, + "learning_rate": 7.458738757326336e-06, + "loss": 0.6575271487236023, + "step": 3422 + }, + { + "epoch": 1.2061674008810572, + "grad_norm": 1.7353179250025277, + "learning_rate": 7.453104067352637e-06, + "loss": 0.5906708836555481, + "step": 3423 + }, + { + "epoch": 1.2065198237885462, + "grad_norm": 1.7518769855954601, + "learning_rate": 7.4474702419708465e-06, + "loss": 0.7992517352104187, + "step": 3424 + }, + { + "epoch": 1.2068722466960353, + "grad_norm": 1.7067520122752557, + "learning_rate": 7.4418372830934645e-06, + "loss": 0.5935543179512024, + "step": 3425 + }, + { + "epoch": 1.2072246696035243, + "grad_norm": 1.877304862966978, + "learning_rate": 7.436205192632719e-06, + "loss": 0.7166613340377808, + "step": 3426 + }, + { + "epoch": 1.2075770925110132, + "grad_norm": 1.7575954983917004, + "learning_rate": 7.430573972500519e-06, + "loss": 0.5254578590393066, + "step": 3427 + }, + { + "epoch": 1.2079295154185021, + "grad_norm": 1.7449214411247376, + "learning_rate": 7.42494362460849e-06, + "loss": 0.6586379408836365, + "step": 3428 + }, + { + "epoch": 1.208281938325991, + "grad_norm": 1.7864206478373184, + "learning_rate": 7.419314150867964e-06, + "loss": 0.6960606575012207, + "step": 3429 + }, + { + "epoch": 1.2086343612334802, + "grad_norm": 1.7557785377406303, + "learning_rate": 7.413685553189969e-06, + "loss": 0.6107728481292725, + "step": 3430 + }, + { + "epoch": 1.2089867841409692, + "grad_norm": 1.624755754090177, + "learning_rate": 7.408057833485241e-06, + "loss": 0.6446499824523926, + "step": 3431 + }, + { + "epoch": 1.209339207048458, + "grad_norm": 1.9153166988080477, + "learning_rate": 7.402430993664216e-06, + "loss": 0.7070472240447998, + "step": 3432 + }, + { + "epoch": 1.2096916299559473, + "grad_norm": 2.004011228140917, + "learning_rate": 7.396805035637023e-06, + "loss": 0.5919365882873535, + "step": 3433 + }, + { + "epoch": 1.2100440528634362, + "grad_norm": 1.7861550041093852, + "learning_rate": 7.391179961313512e-06, + "loss": 0.5975243449211121, + "step": 3434 + }, + { + "epoch": 1.2103964757709251, + "grad_norm": 1.6863010997131964, + "learning_rate": 7.385555772603212e-06, + "loss": 0.5772840976715088, + "step": 3435 + }, + { + "epoch": 1.210748898678414, + "grad_norm": 1.8451401620227157, + "learning_rate": 7.379932471415362e-06, + "loss": 0.7335072755813599, + "step": 3436 + }, + { + "epoch": 1.211101321585903, + "grad_norm": 2.0255796426124877, + "learning_rate": 7.3743100596589e-06, + "loss": 0.6214553713798523, + "step": 3437 + }, + { + "epoch": 1.2114537444933922, + "grad_norm": 1.8204785128516552, + "learning_rate": 7.368688539242457e-06, + "loss": 0.6515316963195801, + "step": 3438 + }, + { + "epoch": 1.211806167400881, + "grad_norm": 1.778475729690813, + "learning_rate": 7.3630679120743665e-06, + "loss": 0.6479551196098328, + "step": 3439 + }, + { + "epoch": 1.21215859030837, + "grad_norm": 1.8992442060407408, + "learning_rate": 7.357448180062657e-06, + "loss": 0.6195069551467896, + "step": 3440 + }, + { + "epoch": 1.212511013215859, + "grad_norm": 1.8044588174946172, + "learning_rate": 7.351829345115047e-06, + "loss": 0.5939193964004517, + "step": 3441 + }, + { + "epoch": 1.2128634361233481, + "grad_norm": 1.7404213735338998, + "learning_rate": 7.346211409138964e-06, + "loss": 0.6346434354782104, + "step": 3442 + }, + { + "epoch": 1.213215859030837, + "grad_norm": 1.7854241859310716, + "learning_rate": 7.340594374041516e-06, + "loss": 0.5924171209335327, + "step": 3443 + }, + { + "epoch": 1.213568281938326, + "grad_norm": 1.4550427635518266, + "learning_rate": 7.334978241729514e-06, + "loss": 0.48560285568237305, + "step": 3444 + }, + { + "epoch": 1.213920704845815, + "grad_norm": 2.0456790867838865, + "learning_rate": 7.329363014109463e-06, + "loss": 0.643998384475708, + "step": 3445 + }, + { + "epoch": 1.2142731277533039, + "grad_norm": 1.9340204732587762, + "learning_rate": 7.323748693087551e-06, + "loss": 0.6041159629821777, + "step": 3446 + }, + { + "epoch": 1.214625550660793, + "grad_norm": 1.991943883280592, + "learning_rate": 7.318135280569674e-06, + "loss": 0.7143498659133911, + "step": 3447 + }, + { + "epoch": 1.214977973568282, + "grad_norm": 1.910490525820005, + "learning_rate": 7.312522778461409e-06, + "loss": 0.5821564197540283, + "step": 3448 + }, + { + "epoch": 1.215330396475771, + "grad_norm": 1.9609409525419488, + "learning_rate": 7.3069111886680166e-06, + "loss": 0.5786745548248291, + "step": 3449 + }, + { + "epoch": 1.2156828193832598, + "grad_norm": 1.7004659993753848, + "learning_rate": 7.3013005130944666e-06, + "loss": 0.6740534901618958, + "step": 3450 + }, + { + "epoch": 1.216035242290749, + "grad_norm": 1.9264837774532027, + "learning_rate": 7.2956907536454045e-06, + "loss": 0.6353983879089355, + "step": 3451 + }, + { + "epoch": 1.216387665198238, + "grad_norm": 1.6467978200520468, + "learning_rate": 7.290081912225172e-06, + "loss": 0.6890027523040771, + "step": 3452 + }, + { + "epoch": 1.2167400881057269, + "grad_norm": 2.194089687314607, + "learning_rate": 7.284473990737795e-06, + "loss": 0.6485118269920349, + "step": 3453 + }, + { + "epoch": 1.2170925110132158, + "grad_norm": 1.8020323615419078, + "learning_rate": 7.2788669910869845e-06, + "loss": 0.5364162921905518, + "step": 3454 + }, + { + "epoch": 1.2174449339207047, + "grad_norm": 1.8770204171846867, + "learning_rate": 7.27326091517615e-06, + "loss": 0.6625754833221436, + "step": 3455 + }, + { + "epoch": 1.217797356828194, + "grad_norm": 1.9138778572255513, + "learning_rate": 7.267655764908374e-06, + "loss": 0.7090050578117371, + "step": 3456 + }, + { + "epoch": 1.2181497797356828, + "grad_norm": 1.7151154871040917, + "learning_rate": 7.26205154218643e-06, + "loss": 0.6556301116943359, + "step": 3457 + }, + { + "epoch": 1.2185022026431718, + "grad_norm": 2.12213118759585, + "learning_rate": 7.2564482489127815e-06, + "loss": 0.7998625636100769, + "step": 3458 + }, + { + "epoch": 1.2188546255506607, + "grad_norm": 1.8721449700246833, + "learning_rate": 7.250845886989568e-06, + "loss": 0.6336952447891235, + "step": 3459 + }, + { + "epoch": 1.2192070484581499, + "grad_norm": 1.7786932342182031, + "learning_rate": 7.245244458318621e-06, + "loss": 0.5072300434112549, + "step": 3460 + }, + { + "epoch": 1.2195594713656388, + "grad_norm": 1.9350920817100896, + "learning_rate": 7.23964396480145e-06, + "loss": 0.6297830939292908, + "step": 3461 + }, + { + "epoch": 1.2199118942731277, + "grad_norm": 1.7384183002767206, + "learning_rate": 7.234044408339243e-06, + "loss": 0.5560386180877686, + "step": 3462 + }, + { + "epoch": 1.2202643171806167, + "grad_norm": 1.7834281461054429, + "learning_rate": 7.228445790832885e-06, + "loss": 0.5180274844169617, + "step": 3463 + }, + { + "epoch": 1.2206167400881056, + "grad_norm": 1.5903839847735544, + "learning_rate": 7.222848114182926e-06, + "loss": 0.4870688319206238, + "step": 3464 + }, + { + "epoch": 1.2209691629955948, + "grad_norm": 1.5913924611315027, + "learning_rate": 7.217251380289602e-06, + "loss": 0.46914681792259216, + "step": 3465 + }, + { + "epoch": 1.2213215859030837, + "grad_norm": 1.6510218664086935, + "learning_rate": 7.211655591052833e-06, + "loss": 0.5980997085571289, + "step": 3466 + }, + { + "epoch": 1.2216740088105726, + "grad_norm": 2.0761228855668468, + "learning_rate": 7.206060748372212e-06, + "loss": 0.5982732772827148, + "step": 3467 + }, + { + "epoch": 1.2220264317180616, + "grad_norm": 1.5384750193393883, + "learning_rate": 7.200466854147019e-06, + "loss": 0.612629771232605, + "step": 3468 + }, + { + "epoch": 1.2223788546255507, + "grad_norm": 1.6776022561511, + "learning_rate": 7.194873910276205e-06, + "loss": 0.606558084487915, + "step": 3469 + }, + { + "epoch": 1.2227312775330397, + "grad_norm": 2.093853594654106, + "learning_rate": 7.189281918658396e-06, + "loss": 0.7133803367614746, + "step": 3470 + }, + { + "epoch": 1.2230837004405286, + "grad_norm": 1.737492396211302, + "learning_rate": 7.183690881191908e-06, + "loss": 0.5640908479690552, + "step": 3471 + }, + { + "epoch": 1.2234361233480175, + "grad_norm": 1.9131350962270206, + "learning_rate": 7.178100799774717e-06, + "loss": 0.6376210451126099, + "step": 3472 + }, + { + "epoch": 1.2237885462555067, + "grad_norm": 1.7418892302924867, + "learning_rate": 7.172511676304481e-06, + "loss": 0.6207184791564941, + "step": 3473 + }, + { + "epoch": 1.2241409691629956, + "grad_norm": 2.0136397077316133, + "learning_rate": 7.166923512678538e-06, + "loss": 0.47848421335220337, + "step": 3474 + }, + { + "epoch": 1.2244933920704846, + "grad_norm": 1.89946756738985, + "learning_rate": 7.161336310793894e-06, + "loss": 0.6052829027175903, + "step": 3475 + }, + { + "epoch": 1.2248458149779735, + "grad_norm": 1.968672987503914, + "learning_rate": 7.155750072547229e-06, + "loss": 0.6050940155982971, + "step": 3476 + }, + { + "epoch": 1.2251982378854627, + "grad_norm": 2.566995671782078, + "learning_rate": 7.150164799834902e-06, + "loss": 0.6121659278869629, + "step": 3477 + }, + { + "epoch": 1.2255506607929516, + "grad_norm": 1.9679344001124786, + "learning_rate": 7.144580494552929e-06, + "loss": 0.6886739730834961, + "step": 3478 + }, + { + "epoch": 1.2259030837004405, + "grad_norm": 1.5760234299307694, + "learning_rate": 7.13899715859702e-06, + "loss": 0.5001103281974792, + "step": 3479 + }, + { + "epoch": 1.2262555066079295, + "grad_norm": 2.1260048612910216, + "learning_rate": 7.133414793862532e-06, + "loss": 0.5948734283447266, + "step": 3480 + }, + { + "epoch": 1.2266079295154184, + "grad_norm": 2.593831579740968, + "learning_rate": 7.127833402244515e-06, + "loss": 0.6179298162460327, + "step": 3481 + }, + { + "epoch": 1.2269603524229076, + "grad_norm": 1.6926296837265904, + "learning_rate": 7.122252985637672e-06, + "loss": 0.5543676614761353, + "step": 3482 + }, + { + "epoch": 1.2273127753303965, + "grad_norm": 1.6008632106545562, + "learning_rate": 7.116673545936379e-06, + "loss": 0.6279658079147339, + "step": 3483 + }, + { + "epoch": 1.2276651982378854, + "grad_norm": 1.5383086530060461, + "learning_rate": 7.111095085034687e-06, + "loss": 0.6692230701446533, + "step": 3484 + }, + { + "epoch": 1.2280176211453744, + "grad_norm": 1.7218507243355061, + "learning_rate": 7.1055176048263085e-06, + "loss": 0.6124502420425415, + "step": 3485 + }, + { + "epoch": 1.2283700440528635, + "grad_norm": 2.0325469007846007, + "learning_rate": 7.09994110720462e-06, + "loss": 0.6241810321807861, + "step": 3486 + }, + { + "epoch": 1.2287224669603525, + "grad_norm": 1.7620353767255947, + "learning_rate": 7.094365594062675e-06, + "loss": 0.6556589603424072, + "step": 3487 + }, + { + "epoch": 1.2290748898678414, + "grad_norm": 1.660185756567605, + "learning_rate": 7.0887910672931815e-06, + "loss": 0.480433851480484, + "step": 3488 + }, + { + "epoch": 1.2294273127753303, + "grad_norm": 1.7666817554476708, + "learning_rate": 7.083217528788524e-06, + "loss": 0.6198803782463074, + "step": 3489 + }, + { + "epoch": 1.2297797356828193, + "grad_norm": 1.7945939958355666, + "learning_rate": 7.077644980440741e-06, + "loss": 0.6368751525878906, + "step": 3490 + }, + { + "epoch": 1.2301321585903084, + "grad_norm": 1.904999974616483, + "learning_rate": 7.072073424141538e-06, + "loss": 0.5992522239685059, + "step": 3491 + }, + { + "epoch": 1.2304845814977974, + "grad_norm": 1.6441410368294835, + "learning_rate": 7.066502861782289e-06, + "loss": 0.5917885303497314, + "step": 3492 + }, + { + "epoch": 1.2308370044052863, + "grad_norm": 1.9090985571817867, + "learning_rate": 7.060933295254027e-06, + "loss": 0.615925669670105, + "step": 3493 + }, + { + "epoch": 1.2311894273127753, + "grad_norm": 1.5510149338562214, + "learning_rate": 7.055364726447437e-06, + "loss": 0.4408820867538452, + "step": 3494 + }, + { + "epoch": 1.2315418502202644, + "grad_norm": 1.706805010144051, + "learning_rate": 7.049797157252889e-06, + "loss": 0.4918386936187744, + "step": 3495 + }, + { + "epoch": 1.2318942731277533, + "grad_norm": 2.0047166519470965, + "learning_rate": 7.0442305895603844e-06, + "loss": 0.6964970827102661, + "step": 3496 + }, + { + "epoch": 1.2322466960352423, + "grad_norm": 1.993882373770559, + "learning_rate": 7.038665025259615e-06, + "loss": 0.5269606113433838, + "step": 3497 + }, + { + "epoch": 1.2325991189427312, + "grad_norm": 1.7338430673292662, + "learning_rate": 7.033100466239908e-06, + "loss": 0.6146842241287231, + "step": 3498 + }, + { + "epoch": 1.2329515418502202, + "grad_norm": 1.8958783101408965, + "learning_rate": 7.027536914390257e-06, + "loss": 0.7163739800453186, + "step": 3499 + }, + { + "epoch": 1.2333039647577093, + "grad_norm": 1.5575657818438158, + "learning_rate": 7.021974371599318e-06, + "loss": 0.5851477980613708, + "step": 3500 + }, + { + "epoch": 1.2336563876651983, + "grad_norm": 1.3831914970718109, + "learning_rate": 7.0164128397554e-06, + "loss": 0.585768461227417, + "step": 3501 + }, + { + "epoch": 1.2340088105726872, + "grad_norm": 1.651121323438745, + "learning_rate": 7.0108523207464706e-06, + "loss": 0.5467718839645386, + "step": 3502 + }, + { + "epoch": 1.2343612334801761, + "grad_norm": 1.8179588757324485, + "learning_rate": 7.0052928164601564e-06, + "loss": 0.638299822807312, + "step": 3503 + }, + { + "epoch": 1.2347136563876653, + "grad_norm": 1.8158584952636452, + "learning_rate": 6.9997343287837275e-06, + "loss": 0.6737650036811829, + "step": 3504 + }, + { + "epoch": 1.2350660792951542, + "grad_norm": 1.7619528960945736, + "learning_rate": 6.9941768596041224e-06, + "loss": 0.6659837961196899, + "step": 3505 + }, + { + "epoch": 1.2354185022026432, + "grad_norm": 1.9059656133131788, + "learning_rate": 6.988620410807932e-06, + "loss": 0.6731020212173462, + "step": 3506 + }, + { + "epoch": 1.235770925110132, + "grad_norm": 1.8111638058637756, + "learning_rate": 6.983064984281389e-06, + "loss": 0.6236598491668701, + "step": 3507 + }, + { + "epoch": 1.236123348017621, + "grad_norm": 1.8485171900570894, + "learning_rate": 6.9775105819103985e-06, + "loss": 0.6233193874359131, + "step": 3508 + }, + { + "epoch": 1.2364757709251102, + "grad_norm": 1.7456936175280036, + "learning_rate": 6.971957205580497e-06, + "loss": 0.5914918184280396, + "step": 3509 + }, + { + "epoch": 1.2368281938325991, + "grad_norm": 2.069060854376664, + "learning_rate": 6.966404857176893e-06, + "loss": 0.6576484441757202, + "step": 3510 + }, + { + "epoch": 1.237180616740088, + "grad_norm": 1.6371442891988068, + "learning_rate": 6.960853538584431e-06, + "loss": 0.5609208941459656, + "step": 3511 + }, + { + "epoch": 1.2375330396475772, + "grad_norm": 1.8336206343046235, + "learning_rate": 6.955303251687609e-06, + "loss": 0.6405455470085144, + "step": 3512 + }, + { + "epoch": 1.2378854625550662, + "grad_norm": 1.6981959386126726, + "learning_rate": 6.949753998370579e-06, + "loss": 0.5621844530105591, + "step": 3513 + }, + { + "epoch": 1.238237885462555, + "grad_norm": 1.6040361718583698, + "learning_rate": 6.944205780517138e-06, + "loss": 0.5674207210540771, + "step": 3514 + }, + { + "epoch": 1.238590308370044, + "grad_norm": 1.8089615708578142, + "learning_rate": 6.938658600010734e-06, + "loss": 0.6744752526283264, + "step": 3515 + }, + { + "epoch": 1.238942731277533, + "grad_norm": 1.851260674535246, + "learning_rate": 6.9331124587344655e-06, + "loss": 0.537495493888855, + "step": 3516 + }, + { + "epoch": 1.2392951541850221, + "grad_norm": 1.7599394880527937, + "learning_rate": 6.92756735857107e-06, + "loss": 0.8405104875564575, + "step": 3517 + }, + { + "epoch": 1.239647577092511, + "grad_norm": 1.7838209985249966, + "learning_rate": 6.92202330140294e-06, + "loss": 0.6751723885536194, + "step": 3518 + }, + { + "epoch": 1.24, + "grad_norm": 1.8012761946666955, + "learning_rate": 6.9164802891121105e-06, + "loss": 0.5763178467750549, + "step": 3519 + }, + { + "epoch": 1.240352422907489, + "grad_norm": 1.7859481797599979, + "learning_rate": 6.910938323580256e-06, + "loss": 0.7713793516159058, + "step": 3520 + }, + { + "epoch": 1.240704845814978, + "grad_norm": 2.0598557028652356, + "learning_rate": 6.90539740668871e-06, + "loss": 0.6354435682296753, + "step": 3521 + }, + { + "epoch": 1.241057268722467, + "grad_norm": 1.6780280463346202, + "learning_rate": 6.899857540318434e-06, + "loss": 0.5121721625328064, + "step": 3522 + }, + { + "epoch": 1.241409691629956, + "grad_norm": 1.8470903920827393, + "learning_rate": 6.894318726350042e-06, + "loss": 0.586428165435791, + "step": 3523 + }, + { + "epoch": 1.241762114537445, + "grad_norm": 1.690234288859414, + "learning_rate": 6.888780966663792e-06, + "loss": 0.4868311285972595, + "step": 3524 + }, + { + "epoch": 1.2421145374449338, + "grad_norm": 1.7688170320163026, + "learning_rate": 6.883244263139578e-06, + "loss": 0.7057775259017944, + "step": 3525 + }, + { + "epoch": 1.242466960352423, + "grad_norm": 1.630207980484645, + "learning_rate": 6.877708617656942e-06, + "loss": 0.4993360638618469, + "step": 3526 + }, + { + "epoch": 1.242819383259912, + "grad_norm": 1.7093781024880734, + "learning_rate": 6.872174032095061e-06, + "loss": 0.6096793413162231, + "step": 3527 + }, + { + "epoch": 1.2431718061674009, + "grad_norm": 1.7005141830755592, + "learning_rate": 6.866640508332751e-06, + "loss": 0.584385871887207, + "step": 3528 + }, + { + "epoch": 1.2435242290748898, + "grad_norm": 1.6033098221924098, + "learning_rate": 6.861108048248477e-06, + "loss": 0.5857449173927307, + "step": 3529 + }, + { + "epoch": 1.243876651982379, + "grad_norm": 1.6447411339873705, + "learning_rate": 6.855576653720333e-06, + "loss": 0.4337875247001648, + "step": 3530 + }, + { + "epoch": 1.244229074889868, + "grad_norm": 1.924557656954366, + "learning_rate": 6.850046326626058e-06, + "loss": 0.6949163675308228, + "step": 3531 + }, + { + "epoch": 1.2445814977973568, + "grad_norm": 2.029468434582643, + "learning_rate": 6.844517068843025e-06, + "loss": 0.5876098871231079, + "step": 3532 + }, + { + "epoch": 1.2449339207048458, + "grad_norm": 2.0143379278356153, + "learning_rate": 6.838988882248243e-06, + "loss": 0.5460488796234131, + "step": 3533 + }, + { + "epoch": 1.2452863436123347, + "grad_norm": 2.284896657447092, + "learning_rate": 6.833461768718365e-06, + "loss": 0.6500875949859619, + "step": 3534 + }, + { + "epoch": 1.2456387665198239, + "grad_norm": 1.9702281980181484, + "learning_rate": 6.82793573012967e-06, + "loss": 0.6504626274108887, + "step": 3535 + }, + { + "epoch": 1.2459911894273128, + "grad_norm": 1.8635901517060365, + "learning_rate": 6.822410768358072e-06, + "loss": 0.6881722211837769, + "step": 3536 + }, + { + "epoch": 1.2463436123348017, + "grad_norm": 1.7111090644899583, + "learning_rate": 6.816886885279132e-06, + "loss": 0.6747599840164185, + "step": 3537 + }, + { + "epoch": 1.2466960352422907, + "grad_norm": 2.61809094535544, + "learning_rate": 6.811364082768028e-06, + "loss": 0.5987570285797119, + "step": 3538 + }, + { + "epoch": 1.2470484581497798, + "grad_norm": 1.8641726073707956, + "learning_rate": 6.8058423626995885e-06, + "loss": 0.6614603996276855, + "step": 3539 + }, + { + "epoch": 1.2474008810572688, + "grad_norm": 1.5529990518062367, + "learning_rate": 6.80032172694826e-06, + "loss": 0.542367696762085, + "step": 3540 + }, + { + "epoch": 1.2477533039647577, + "grad_norm": 1.7771584963866378, + "learning_rate": 6.7948021773881235e-06, + "loss": 0.6200593709945679, + "step": 3541 + }, + { + "epoch": 1.2481057268722466, + "grad_norm": 1.896811225090905, + "learning_rate": 6.789283715892905e-06, + "loss": 0.6425306797027588, + "step": 3542 + }, + { + "epoch": 1.2484581497797356, + "grad_norm": 1.4798584901842344, + "learning_rate": 6.78376634433594e-06, + "loss": 0.5277592539787292, + "step": 3543 + }, + { + "epoch": 1.2488105726872247, + "grad_norm": 1.8357663435279958, + "learning_rate": 6.778250064590206e-06, + "loss": 0.6120523810386658, + "step": 3544 + }, + { + "epoch": 1.2491629955947137, + "grad_norm": 2.0042129559914654, + "learning_rate": 6.772734878528313e-06, + "loss": 0.538428544998169, + "step": 3545 + }, + { + "epoch": 1.2495154185022026, + "grad_norm": 1.7456851140249008, + "learning_rate": 6.76722078802249e-06, + "loss": 0.6439732909202576, + "step": 3546 + }, + { + "epoch": 1.2498678414096915, + "grad_norm": 1.5580174742798336, + "learning_rate": 6.761707794944605e-06, + "loss": 0.5951697826385498, + "step": 3547 + }, + { + "epoch": 1.2502202643171807, + "grad_norm": 1.5461650468928614, + "learning_rate": 6.7561959011661456e-06, + "loss": 0.5548606514930725, + "step": 3548 + }, + { + "epoch": 1.2505726872246696, + "grad_norm": 1.936721806656616, + "learning_rate": 6.750685108558221e-06, + "loss": 0.4768974781036377, + "step": 3549 + }, + { + "epoch": 1.2509251101321586, + "grad_norm": 1.6130866640641843, + "learning_rate": 6.745175418991585e-06, + "loss": 0.6629552245140076, + "step": 3550 + }, + { + "epoch": 1.2512775330396475, + "grad_norm": 1.8826604922139925, + "learning_rate": 6.739666834336599e-06, + "loss": 0.6550329923629761, + "step": 3551 + }, + { + "epoch": 1.2516299559471364, + "grad_norm": 1.7091222991512534, + "learning_rate": 6.734159356463254e-06, + "loss": 0.37340015172958374, + "step": 3552 + }, + { + "epoch": 1.2519823788546256, + "grad_norm": 2.0454082069330424, + "learning_rate": 6.728652987241175e-06, + "loss": 0.6343201398849487, + "step": 3553 + }, + { + "epoch": 1.2523348017621145, + "grad_norm": 1.8938201811077042, + "learning_rate": 6.723147728539596e-06, + "loss": 0.7555221319198608, + "step": 3554 + }, + { + "epoch": 1.2526872246696035, + "grad_norm": 1.7356069524639768, + "learning_rate": 6.717643582227384e-06, + "loss": 0.5944523215293884, + "step": 3555 + }, + { + "epoch": 1.2530396475770926, + "grad_norm": 1.627279375354834, + "learning_rate": 6.71214055017303e-06, + "loss": 0.5686212778091431, + "step": 3556 + }, + { + "epoch": 1.2533920704845816, + "grad_norm": 1.697482530075543, + "learning_rate": 6.706638634244629e-06, + "loss": 0.6401857137680054, + "step": 3557 + }, + { + "epoch": 1.2537444933920705, + "grad_norm": 1.5933991655989903, + "learning_rate": 6.701137836309926e-06, + "loss": 0.4571516513824463, + "step": 3558 + }, + { + "epoch": 1.2540969162995594, + "grad_norm": 1.7606001647916119, + "learning_rate": 6.695638158236255e-06, + "loss": 0.5857570171356201, + "step": 3559 + }, + { + "epoch": 1.2544493392070484, + "grad_norm": 1.7187772621235449, + "learning_rate": 6.690139601890601e-06, + "loss": 0.6981472969055176, + "step": 3560 + }, + { + "epoch": 1.2548017621145373, + "grad_norm": 1.685629147285753, + "learning_rate": 6.684642169139544e-06, + "loss": 0.5120254755020142, + "step": 3561 + }, + { + "epoch": 1.2551541850220265, + "grad_norm": 2.043587366608814, + "learning_rate": 6.67914586184929e-06, + "loss": 0.6975923776626587, + "step": 3562 + }, + { + "epoch": 1.2555066079295154, + "grad_norm": 2.1694224742588233, + "learning_rate": 6.673650681885668e-06, + "loss": 0.5825072526931763, + "step": 3563 + }, + { + "epoch": 1.2558590308370043, + "grad_norm": 1.9388578444875513, + "learning_rate": 6.668156631114124e-06, + "loss": 0.5701749324798584, + "step": 3564 + }, + { + "epoch": 1.2562114537444935, + "grad_norm": 1.6715281124187895, + "learning_rate": 6.662663711399705e-06, + "loss": 0.5230482220649719, + "step": 3565 + }, + { + "epoch": 1.2565638766519824, + "grad_norm": 1.7540798103539514, + "learning_rate": 6.657171924607102e-06, + "loss": 0.6680361032485962, + "step": 3566 + }, + { + "epoch": 1.2569162995594714, + "grad_norm": 1.7792330481880054, + "learning_rate": 6.651681272600592e-06, + "loss": 0.6745159029960632, + "step": 3567 + }, + { + "epoch": 1.2572687224669603, + "grad_norm": 1.5777367956881352, + "learning_rate": 6.646191757244089e-06, + "loss": 0.587162971496582, + "step": 3568 + }, + { + "epoch": 1.2576211453744492, + "grad_norm": 2.0091715660610183, + "learning_rate": 6.640703380401111e-06, + "loss": 0.6170785427093506, + "step": 3569 + }, + { + "epoch": 1.2579735682819384, + "grad_norm": 1.8496931248102404, + "learning_rate": 6.6352161439347875e-06, + "loss": 0.4955494999885559, + "step": 3570 + }, + { + "epoch": 1.2583259911894273, + "grad_norm": 1.8039519732213443, + "learning_rate": 6.62973004970787e-06, + "loss": 0.7183424234390259, + "step": 3571 + }, + { + "epoch": 1.2586784140969163, + "grad_norm": 1.6920151696252388, + "learning_rate": 6.624245099582713e-06, + "loss": 0.6266030669212341, + "step": 3572 + }, + { + "epoch": 1.2590308370044052, + "grad_norm": 1.8260182971737482, + "learning_rate": 6.6187612954212845e-06, + "loss": 0.5234469175338745, + "step": 3573 + }, + { + "epoch": 1.2593832599118944, + "grad_norm": 2.0762206956902234, + "learning_rate": 6.6132786390851725e-06, + "loss": 0.7066231966018677, + "step": 3574 + }, + { + "epoch": 1.2597356828193833, + "grad_norm": 1.8486791061565373, + "learning_rate": 6.60779713243556e-06, + "loss": 0.622086226940155, + "step": 3575 + }, + { + "epoch": 1.2600881057268722, + "grad_norm": 2.003110770323092, + "learning_rate": 6.6023167773332554e-06, + "loss": 0.6607370376586914, + "step": 3576 + }, + { + "epoch": 1.2604405286343612, + "grad_norm": 1.9512971078148649, + "learning_rate": 6.596837575638663e-06, + "loss": 0.6846165657043457, + "step": 3577 + }, + { + "epoch": 1.2607929515418501, + "grad_norm": 2.1137757907106574, + "learning_rate": 6.5913595292118024e-06, + "loss": 0.6329103708267212, + "step": 3578 + }, + { + "epoch": 1.2611453744493393, + "grad_norm": 1.7067433363159659, + "learning_rate": 6.585882639912302e-06, + "loss": 0.7942261695861816, + "step": 3579 + }, + { + "epoch": 1.2614977973568282, + "grad_norm": 1.923592126322299, + "learning_rate": 6.580406909599393e-06, + "loss": 0.5446548461914062, + "step": 3580 + }, + { + "epoch": 1.2618502202643171, + "grad_norm": 2.584270827853736, + "learning_rate": 6.574932340131917e-06, + "loss": 0.581193208694458, + "step": 3581 + }, + { + "epoch": 1.2622026431718063, + "grad_norm": 1.789761494779322, + "learning_rate": 6.569458933368323e-06, + "loss": 0.6099729537963867, + "step": 3582 + }, + { + "epoch": 1.2625550660792952, + "grad_norm": 1.7689292642576144, + "learning_rate": 6.563986691166655e-06, + "loss": 0.45215970277786255, + "step": 3583 + }, + { + "epoch": 1.2629074889867842, + "grad_norm": 1.9037008934232844, + "learning_rate": 6.558515615384573e-06, + "loss": 0.6674731969833374, + "step": 3584 + }, + { + "epoch": 1.2632599118942731, + "grad_norm": 1.4782940862298068, + "learning_rate": 6.553045707879338e-06, + "loss": 0.4951098561286926, + "step": 3585 + }, + { + "epoch": 1.263612334801762, + "grad_norm": 1.7852149202748289, + "learning_rate": 6.54757697050781e-06, + "loss": 0.5853816270828247, + "step": 3586 + }, + { + "epoch": 1.263964757709251, + "grad_norm": 1.5907197274079232, + "learning_rate": 6.5421094051264575e-06, + "loss": 0.5236951112747192, + "step": 3587 + }, + { + "epoch": 1.2643171806167401, + "grad_norm": 1.733068587169355, + "learning_rate": 6.536643013591347e-06, + "loss": 0.5717612504959106, + "step": 3588 + }, + { + "epoch": 1.264669603524229, + "grad_norm": 2.033496211612474, + "learning_rate": 6.531177797758155e-06, + "loss": 0.6144098043441772, + "step": 3589 + }, + { + "epoch": 1.265022026431718, + "grad_norm": 1.6355266077439052, + "learning_rate": 6.525713759482144e-06, + "loss": 0.5634705424308777, + "step": 3590 + }, + { + "epoch": 1.2653744493392072, + "grad_norm": 1.7147225194337798, + "learning_rate": 6.520250900618186e-06, + "loss": 0.582956075668335, + "step": 3591 + }, + { + "epoch": 1.265726872246696, + "grad_norm": 1.843768096592032, + "learning_rate": 6.514789223020754e-06, + "loss": 0.7649297714233398, + "step": 3592 + }, + { + "epoch": 1.266079295154185, + "grad_norm": 1.6261733555902604, + "learning_rate": 6.509328728543918e-06, + "loss": 0.6035098433494568, + "step": 3593 + }, + { + "epoch": 1.266431718061674, + "grad_norm": 1.8493319579504743, + "learning_rate": 6.503869419041344e-06, + "loss": 0.6405705809593201, + "step": 3594 + }, + { + "epoch": 1.266784140969163, + "grad_norm": 2.26304309310324, + "learning_rate": 6.498411296366299e-06, + "loss": 0.674353301525116, + "step": 3595 + }, + { + "epoch": 1.2671365638766519, + "grad_norm": 1.7621656180677492, + "learning_rate": 6.492954362371644e-06, + "loss": 0.6018465757369995, + "step": 3596 + }, + { + "epoch": 1.267488986784141, + "grad_norm": 2.127137234030612, + "learning_rate": 6.487498618909845e-06, + "loss": 0.6491270065307617, + "step": 3597 + }, + { + "epoch": 1.26784140969163, + "grad_norm": 1.6636292273445474, + "learning_rate": 6.4820440678329474e-06, + "loss": 0.5126988887786865, + "step": 3598 + }, + { + "epoch": 1.2681938325991189, + "grad_norm": 1.7884980833676332, + "learning_rate": 6.476590710992605e-06, + "loss": 0.5931694507598877, + "step": 3599 + }, + { + "epoch": 1.268546255506608, + "grad_norm": 1.9386898901162777, + "learning_rate": 6.471138550240066e-06, + "loss": 0.5455423593521118, + "step": 3600 + }, + { + "epoch": 1.268898678414097, + "grad_norm": 1.6361281925349132, + "learning_rate": 6.465687587426166e-06, + "loss": 0.4870053231716156, + "step": 3601 + }, + { + "epoch": 1.269251101321586, + "grad_norm": 1.9069149245463006, + "learning_rate": 6.460237824401337e-06, + "loss": 0.6434903144836426, + "step": 3602 + }, + { + "epoch": 1.2696035242290749, + "grad_norm": 1.676899060774639, + "learning_rate": 6.454789263015609e-06, + "loss": 0.6256476640701294, + "step": 3603 + }, + { + "epoch": 1.2699559471365638, + "grad_norm": 1.8004511475353204, + "learning_rate": 6.449341905118589e-06, + "loss": 0.6304135322570801, + "step": 3604 + }, + { + "epoch": 1.2703083700440527, + "grad_norm": 1.9009929525157667, + "learning_rate": 6.443895752559498e-06, + "loss": 0.5315194725990295, + "step": 3605 + }, + { + "epoch": 1.2706607929515419, + "grad_norm": 1.4321615697348329, + "learning_rate": 6.438450807187127e-06, + "loss": 0.5232852697372437, + "step": 3606 + }, + { + "epoch": 1.2710132158590308, + "grad_norm": 1.6584356511216338, + "learning_rate": 6.433007070849863e-06, + "loss": 0.4462543725967407, + "step": 3607 + }, + { + "epoch": 1.2713656387665198, + "grad_norm": 1.6730765460300174, + "learning_rate": 6.4275645453956945e-06, + "loss": 0.6347709894180298, + "step": 3608 + }, + { + "epoch": 1.271718061674009, + "grad_norm": 1.625329738549371, + "learning_rate": 6.422123232672182e-06, + "loss": 0.5277259349822998, + "step": 3609 + }, + { + "epoch": 1.2720704845814979, + "grad_norm": 1.7954090025098361, + "learning_rate": 6.416683134526486e-06, + "loss": 0.6297650933265686, + "step": 3610 + }, + { + "epoch": 1.2724229074889868, + "grad_norm": 1.7743916636003476, + "learning_rate": 6.411244252805351e-06, + "loss": 0.503609836101532, + "step": 3611 + }, + { + "epoch": 1.2727753303964757, + "grad_norm": 1.7300375262211753, + "learning_rate": 6.405806589355099e-06, + "loss": 0.6026735305786133, + "step": 3612 + }, + { + "epoch": 1.2731277533039647, + "grad_norm": 1.543883502597784, + "learning_rate": 6.400370146021662e-06, + "loss": 0.4918368458747864, + "step": 3613 + }, + { + "epoch": 1.2734801762114538, + "grad_norm": 2.125830682883153, + "learning_rate": 6.394934924650532e-06, + "loss": 0.6215550899505615, + "step": 3614 + }, + { + "epoch": 1.2738325991189428, + "grad_norm": 2.1843858701221563, + "learning_rate": 6.389500927086801e-06, + "loss": 0.6979820728302002, + "step": 3615 + }, + { + "epoch": 1.2741850220264317, + "grad_norm": 1.9168565956279218, + "learning_rate": 6.384068155175143e-06, + "loss": 0.5661836266517639, + "step": 3616 + }, + { + "epoch": 1.2745374449339206, + "grad_norm": 2.2497484972303896, + "learning_rate": 6.378636610759812e-06, + "loss": 0.699792742729187, + "step": 3617 + }, + { + "epoch": 1.2748898678414098, + "grad_norm": 2.1298001613626765, + "learning_rate": 6.373206295684653e-06, + "loss": 0.6418631076812744, + "step": 3618 + }, + { + "epoch": 1.2752422907488987, + "grad_norm": 1.639324838954067, + "learning_rate": 6.3677772117930895e-06, + "loss": 0.4975489675998688, + "step": 3619 + }, + { + "epoch": 1.2755947136563877, + "grad_norm": 1.6787243090627195, + "learning_rate": 6.362349360928117e-06, + "loss": 0.5621567964553833, + "step": 3620 + }, + { + "epoch": 1.2759471365638766, + "grad_norm": 1.9441609125211634, + "learning_rate": 6.356922744932335e-06, + "loss": 0.538573682308197, + "step": 3621 + }, + { + "epoch": 1.2762995594713655, + "grad_norm": 1.8099521315485383, + "learning_rate": 6.351497365647903e-06, + "loss": 0.5726763010025024, + "step": 3622 + }, + { + "epoch": 1.2766519823788547, + "grad_norm": 1.509968688666824, + "learning_rate": 6.346073224916565e-06, + "loss": 0.5911343097686768, + "step": 3623 + }, + { + "epoch": 1.2770044052863436, + "grad_norm": 1.8960352229890238, + "learning_rate": 6.340650324579658e-06, + "loss": 0.6181383728981018, + "step": 3624 + }, + { + "epoch": 1.2773568281938326, + "grad_norm": 1.8065087463718459, + "learning_rate": 6.3352286664780785e-06, + "loss": 0.5941140651702881, + "step": 3625 + }, + { + "epoch": 1.2777092511013217, + "grad_norm": 1.980034412220703, + "learning_rate": 6.329808252452316e-06, + "loss": 0.7604472637176514, + "step": 3626 + }, + { + "epoch": 1.2780616740088107, + "grad_norm": 1.7265138262893938, + "learning_rate": 6.324389084342435e-06, + "loss": 0.6063867211341858, + "step": 3627 + }, + { + "epoch": 1.2784140969162996, + "grad_norm": 1.8844241099487, + "learning_rate": 6.3189711639880644e-06, + "loss": 0.7202302813529968, + "step": 3628 + }, + { + "epoch": 1.2787665198237885, + "grad_norm": 1.7295127580755116, + "learning_rate": 6.313554493228431e-06, + "loss": 0.5934856534004211, + "step": 3629 + }, + { + "epoch": 1.2791189427312775, + "grad_norm": 1.7905829637835577, + "learning_rate": 6.3081390739023175e-06, + "loss": 0.6403088569641113, + "step": 3630 + }, + { + "epoch": 1.2794713656387664, + "grad_norm": 1.9400757232043577, + "learning_rate": 6.302724907848096e-06, + "loss": 0.6679831743240356, + "step": 3631 + }, + { + "epoch": 1.2798237885462556, + "grad_norm": 1.9107919043768602, + "learning_rate": 6.297311996903703e-06, + "loss": 0.6914902329444885, + "step": 3632 + }, + { + "epoch": 1.2801762114537445, + "grad_norm": 1.4865016000129294, + "learning_rate": 6.2919003429066535e-06, + "loss": 0.5391600131988525, + "step": 3633 + }, + { + "epoch": 1.2805286343612334, + "grad_norm": 1.7774288854868727, + "learning_rate": 6.286489947694041e-06, + "loss": 0.5740962028503418, + "step": 3634 + }, + { + "epoch": 1.2808810572687226, + "grad_norm": 1.9144175178404335, + "learning_rate": 6.281080813102523e-06, + "loss": 0.6497045159339905, + "step": 3635 + }, + { + "epoch": 1.2812334801762115, + "grad_norm": 1.6649274023798961, + "learning_rate": 6.275672940968326e-06, + "loss": 0.5481048226356506, + "step": 3636 + }, + { + "epoch": 1.2815859030837005, + "grad_norm": 1.6547388155087517, + "learning_rate": 6.270266333127266e-06, + "loss": 0.5412508249282837, + "step": 3637 + }, + { + "epoch": 1.2819383259911894, + "grad_norm": 1.8289845737684471, + "learning_rate": 6.264860991414709e-06, + "loss": 0.5055446624755859, + "step": 3638 + }, + { + "epoch": 1.2822907488986783, + "grad_norm": 1.9772143213144648, + "learning_rate": 6.259456917665605e-06, + "loss": 0.6073929071426392, + "step": 3639 + }, + { + "epoch": 1.2826431718061673, + "grad_norm": 1.6297327309789957, + "learning_rate": 6.254054113714467e-06, + "loss": 0.5277928113937378, + "step": 3640 + }, + { + "epoch": 1.2829955947136564, + "grad_norm": 1.7440990717646376, + "learning_rate": 6.248652581395378e-06, + "loss": 0.5106299519538879, + "step": 3641 + }, + { + "epoch": 1.2833480176211454, + "grad_norm": 1.612143250274434, + "learning_rate": 6.243252322541993e-06, + "loss": 0.485049843788147, + "step": 3642 + }, + { + "epoch": 1.2837004405286343, + "grad_norm": 2.0115453178937894, + "learning_rate": 6.237853338987532e-06, + "loss": 0.5899066925048828, + "step": 3643 + }, + { + "epoch": 1.2840528634361235, + "grad_norm": 1.6956228425038977, + "learning_rate": 6.2324556325647745e-06, + "loss": 0.5761981010437012, + "step": 3644 + }, + { + "epoch": 1.2844052863436124, + "grad_norm": 1.732932337254408, + "learning_rate": 6.227059205106085e-06, + "loss": 0.6288208961486816, + "step": 3645 + }, + { + "epoch": 1.2847577092511013, + "grad_norm": 1.7671756166643349, + "learning_rate": 6.2216640584433726e-06, + "loss": 0.6122645139694214, + "step": 3646 + }, + { + "epoch": 1.2851101321585903, + "grad_norm": 1.8312838317562172, + "learning_rate": 6.2162701944081295e-06, + "loss": 0.5838489532470703, + "step": 3647 + }, + { + "epoch": 1.2854625550660792, + "grad_norm": 1.5533740438356287, + "learning_rate": 6.2108776148314005e-06, + "loss": 0.6020689606666565, + "step": 3648 + }, + { + "epoch": 1.2858149779735684, + "grad_norm": 1.9453055966993607, + "learning_rate": 6.205486321543798e-06, + "loss": 0.5852698683738708, + "step": 3649 + }, + { + "epoch": 1.2861674008810573, + "grad_norm": 1.7649785944212673, + "learning_rate": 6.2000963163755015e-06, + "loss": 0.560903012752533, + "step": 3650 + }, + { + "epoch": 1.2865198237885462, + "grad_norm": 2.053972717306982, + "learning_rate": 6.194707601156249e-06, + "loss": 0.7750356197357178, + "step": 3651 + }, + { + "epoch": 1.2868722466960352, + "grad_norm": 1.7842589241914402, + "learning_rate": 6.189320177715338e-06, + "loss": 0.5503605604171753, + "step": 3652 + }, + { + "epoch": 1.2872246696035243, + "grad_norm": 1.8162609150425584, + "learning_rate": 6.183934047881636e-06, + "loss": 0.6910672187805176, + "step": 3653 + }, + { + "epoch": 1.2875770925110133, + "grad_norm": 1.6952370527492193, + "learning_rate": 6.1785492134835626e-06, + "loss": 0.7773069739341736, + "step": 3654 + }, + { + "epoch": 1.2879295154185022, + "grad_norm": 1.7765631560225321, + "learning_rate": 6.173165676349103e-06, + "loss": 0.6777454018592834, + "step": 3655 + }, + { + "epoch": 1.2882819383259911, + "grad_norm": 1.6097825614884171, + "learning_rate": 6.167783438305803e-06, + "loss": 0.6103118658065796, + "step": 3656 + }, + { + "epoch": 1.28863436123348, + "grad_norm": 2.4016366240266454, + "learning_rate": 6.1624025011807595e-06, + "loss": 0.593717634677887, + "step": 3657 + }, + { + "epoch": 1.2889867841409692, + "grad_norm": 1.700445284940488, + "learning_rate": 6.1570228668006395e-06, + "loss": 0.5822824835777283, + "step": 3658 + }, + { + "epoch": 1.2893392070484582, + "grad_norm": 1.7095957018221146, + "learning_rate": 6.151644536991656e-06, + "loss": 0.5180603861808777, + "step": 3659 + }, + { + "epoch": 1.289691629955947, + "grad_norm": 1.799926440179644, + "learning_rate": 6.14626751357959e-06, + "loss": 0.6283069849014282, + "step": 3660 + }, + { + "epoch": 1.290044052863436, + "grad_norm": 2.2706339647511613, + "learning_rate": 6.14089179838977e-06, + "loss": 0.7590633630752563, + "step": 3661 + }, + { + "epoch": 1.2903964757709252, + "grad_norm": 1.4238309589699358, + "learning_rate": 6.135517393247081e-06, + "loss": 0.6044079661369324, + "step": 3662 + }, + { + "epoch": 1.2907488986784141, + "grad_norm": 2.078820338247561, + "learning_rate": 6.130144299975973e-06, + "loss": 0.603421688079834, + "step": 3663 + }, + { + "epoch": 1.291101321585903, + "grad_norm": 1.9398452395479244, + "learning_rate": 6.1247725204004395e-06, + "loss": 0.577094554901123, + "step": 3664 + }, + { + "epoch": 1.291453744493392, + "grad_norm": 1.7780187513951604, + "learning_rate": 6.119402056344033e-06, + "loss": 0.5752004981040955, + "step": 3665 + }, + { + "epoch": 1.291806167400881, + "grad_norm": 1.6979532493457608, + "learning_rate": 6.114032909629863e-06, + "loss": 0.730962872505188, + "step": 3666 + }, + { + "epoch": 1.29215859030837, + "grad_norm": 2.0386068832784465, + "learning_rate": 6.108665082080578e-06, + "loss": 0.5361749529838562, + "step": 3667 + }, + { + "epoch": 1.292511013215859, + "grad_norm": 1.470729033877409, + "learning_rate": 6.103298575518401e-06, + "loss": 0.4841603636741638, + "step": 3668 + }, + { + "epoch": 1.292863436123348, + "grad_norm": 1.706501413292354, + "learning_rate": 6.097933391765087e-06, + "loss": 0.6614999771118164, + "step": 3669 + }, + { + "epoch": 1.2932158590308371, + "grad_norm": 1.6930402108862321, + "learning_rate": 6.092569532641947e-06, + "loss": 0.6088405847549438, + "step": 3670 + }, + { + "epoch": 1.293568281938326, + "grad_norm": 1.9173247230823398, + "learning_rate": 6.087206999969848e-06, + "loss": 0.601859986782074, + "step": 3671 + }, + { + "epoch": 1.293920704845815, + "grad_norm": 1.8019332247534052, + "learning_rate": 6.081845795569204e-06, + "loss": 0.5724194049835205, + "step": 3672 + }, + { + "epoch": 1.294273127753304, + "grad_norm": 1.7101141845528827, + "learning_rate": 6.07648592125997e-06, + "loss": 0.7899144887924194, + "step": 3673 + }, + { + "epoch": 1.2946255506607929, + "grad_norm": 1.8438581079047975, + "learning_rate": 6.071127378861667e-06, + "loss": 0.5778594017028809, + "step": 3674 + }, + { + "epoch": 1.2949779735682818, + "grad_norm": 1.6768623613769682, + "learning_rate": 6.065770170193342e-06, + "loss": 0.6357566118240356, + "step": 3675 + }, + { + "epoch": 1.295330396475771, + "grad_norm": 1.5951400768860937, + "learning_rate": 6.0604142970736115e-06, + "loss": 0.511436939239502, + "step": 3676 + }, + { + "epoch": 1.29568281938326, + "grad_norm": 1.883542435313207, + "learning_rate": 6.0550597613206205e-06, + "loss": 0.6469998955726624, + "step": 3677 + }, + { + "epoch": 1.2960352422907488, + "grad_norm": 1.5730405198836903, + "learning_rate": 6.049706564752069e-06, + "loss": 0.5724819898605347, + "step": 3678 + }, + { + "epoch": 1.296387665198238, + "grad_norm": 1.5360587172523898, + "learning_rate": 6.044354709185203e-06, + "loss": 0.6567148566246033, + "step": 3679 + }, + { + "epoch": 1.296740088105727, + "grad_norm": 1.8931575903206552, + "learning_rate": 6.039004196436807e-06, + "loss": 0.6694033145904541, + "step": 3680 + }, + { + "epoch": 1.2970925110132159, + "grad_norm": 1.8190573258877898, + "learning_rate": 6.033655028323215e-06, + "loss": 0.5147275924682617, + "step": 3681 + }, + { + "epoch": 1.2974449339207048, + "grad_norm": 2.0405860057138256, + "learning_rate": 6.0283072066603075e-06, + "loss": 0.5881609320640564, + "step": 3682 + }, + { + "epoch": 1.2977973568281937, + "grad_norm": 1.7248898652229567, + "learning_rate": 6.022960733263493e-06, + "loss": 0.625927209854126, + "step": 3683 + }, + { + "epoch": 1.2981497797356827, + "grad_norm": 1.8738096752650604, + "learning_rate": 6.017615609947747e-06, + "loss": 0.693459153175354, + "step": 3684 + }, + { + "epoch": 1.2985022026431718, + "grad_norm": 1.6745028766810846, + "learning_rate": 6.0122718385275615e-06, + "loss": 0.5185744762420654, + "step": 3685 + }, + { + "epoch": 1.2988546255506608, + "grad_norm": 1.7625922291600025, + "learning_rate": 6.006929420816982e-06, + "loss": 0.5153995752334595, + "step": 3686 + }, + { + "epoch": 1.2992070484581497, + "grad_norm": 1.9617946738772851, + "learning_rate": 6.001588358629598e-06, + "loss": 0.5844067931175232, + "step": 3687 + }, + { + "epoch": 1.2995594713656389, + "grad_norm": 1.7999387557140187, + "learning_rate": 5.996248653778529e-06, + "loss": 0.6021767854690552, + "step": 3688 + }, + { + "epoch": 1.2999118942731278, + "grad_norm": 1.650868828635221, + "learning_rate": 5.990910308076443e-06, + "loss": 0.573150098323822, + "step": 3689 + }, + { + "epoch": 1.3002643171806167, + "grad_norm": 1.8809065032795727, + "learning_rate": 5.985573323335541e-06, + "loss": 0.5125507116317749, + "step": 3690 + }, + { + "epoch": 1.3006167400881057, + "grad_norm": 1.5884199689542184, + "learning_rate": 5.980237701367556e-06, + "loss": 0.541732668876648, + "step": 3691 + }, + { + "epoch": 1.3009691629955946, + "grad_norm": 2.0151748973563577, + "learning_rate": 5.974903443983778e-06, + "loss": 0.66359543800354, + "step": 3692 + }, + { + "epoch": 1.3013215859030838, + "grad_norm": 1.8831727632454829, + "learning_rate": 5.969570552995014e-06, + "loss": 0.6986300349235535, + "step": 3693 + }, + { + "epoch": 1.3016740088105727, + "grad_norm": 2.0800644206104195, + "learning_rate": 5.9642390302116125e-06, + "loss": 0.6829022169113159, + "step": 3694 + }, + { + "epoch": 1.3020264317180616, + "grad_norm": 1.9073088749861613, + "learning_rate": 5.9589088774434655e-06, + "loss": 0.5710464715957642, + "step": 3695 + }, + { + "epoch": 1.3023788546255506, + "grad_norm": 1.8154393300824316, + "learning_rate": 5.953580096499989e-06, + "loss": 0.5604938268661499, + "step": 3696 + }, + { + "epoch": 1.3027312775330397, + "grad_norm": 1.755426899711885, + "learning_rate": 5.948252689190141e-06, + "loss": 0.678723931312561, + "step": 3697 + }, + { + "epoch": 1.3030837004405287, + "grad_norm": 1.8845664461665383, + "learning_rate": 5.9429266573224145e-06, + "loss": 0.6652591228485107, + "step": 3698 + }, + { + "epoch": 1.3034361233480176, + "grad_norm": 1.8800654237619134, + "learning_rate": 5.937602002704819e-06, + "loss": 0.6141147017478943, + "step": 3699 + }, + { + "epoch": 1.3037885462555066, + "grad_norm": 1.937561336880738, + "learning_rate": 5.932278727144924e-06, + "loss": 0.5260860919952393, + "step": 3700 + }, + { + "epoch": 1.3041409691629955, + "grad_norm": 1.6945627397292862, + "learning_rate": 5.926956832449806e-06, + "loss": 0.464357852935791, + "step": 3701 + }, + { + "epoch": 1.3044933920704846, + "grad_norm": 1.8301641414278105, + "learning_rate": 5.921636320426085e-06, + "loss": 0.6513686180114746, + "step": 3702 + }, + { + "epoch": 1.3048458149779736, + "grad_norm": 1.7297134138158161, + "learning_rate": 5.91631719287991e-06, + "loss": 0.44547855854034424, + "step": 3703 + }, + { + "epoch": 1.3051982378854625, + "grad_norm": 1.8572950621020996, + "learning_rate": 5.910999451616959e-06, + "loss": 0.714026153087616, + "step": 3704 + }, + { + "epoch": 1.3055506607929517, + "grad_norm": 1.5164059156260825, + "learning_rate": 5.90568309844244e-06, + "loss": 0.48294252157211304, + "step": 3705 + }, + { + "epoch": 1.3059030837004406, + "grad_norm": 2.0148835282111275, + "learning_rate": 5.900368135161093e-06, + "loss": 0.587759256362915, + "step": 3706 + }, + { + "epoch": 1.3062555066079296, + "grad_norm": 1.7833437474608147, + "learning_rate": 5.895054563577172e-06, + "loss": 0.6251810789108276, + "step": 3707 + }, + { + "epoch": 1.3066079295154185, + "grad_norm": 1.98023378159902, + "learning_rate": 5.889742385494481e-06, + "loss": 0.6488438844680786, + "step": 3708 + }, + { + "epoch": 1.3069603524229074, + "grad_norm": 2.3062951128393325, + "learning_rate": 5.8844316027163315e-06, + "loss": 0.6682882308959961, + "step": 3709 + }, + { + "epoch": 1.3073127753303964, + "grad_norm": 1.9459894886811675, + "learning_rate": 5.879122217045573e-06, + "loss": 0.6537875533103943, + "step": 3710 + }, + { + "epoch": 1.3076651982378855, + "grad_norm": 1.994395753049965, + "learning_rate": 5.873814230284576e-06, + "loss": 0.6813541650772095, + "step": 3711 + }, + { + "epoch": 1.3080176211453745, + "grad_norm": 2.002875607232805, + "learning_rate": 5.868507644235233e-06, + "loss": 0.6962395906448364, + "step": 3712 + }, + { + "epoch": 1.3083700440528634, + "grad_norm": 1.8811127927416966, + "learning_rate": 5.863202460698972e-06, + "loss": 0.6872841119766235, + "step": 3713 + }, + { + "epoch": 1.3087224669603525, + "grad_norm": 2.007681646131619, + "learning_rate": 5.857898681476732e-06, + "loss": 0.7200508117675781, + "step": 3714 + }, + { + "epoch": 1.3090748898678415, + "grad_norm": 1.7850989505478374, + "learning_rate": 5.852596308368982e-06, + "loss": 0.6100003719329834, + "step": 3715 + }, + { + "epoch": 1.3094273127753304, + "grad_norm": 1.962305695853223, + "learning_rate": 5.847295343175714e-06, + "loss": 0.7347345352172852, + "step": 3716 + }, + { + "epoch": 1.3097797356828194, + "grad_norm": 1.8094012131106647, + "learning_rate": 5.841995787696438e-06, + "loss": 0.6955733895301819, + "step": 3717 + }, + { + "epoch": 1.3101321585903083, + "grad_norm": 1.6497459626323396, + "learning_rate": 5.836697643730193e-06, + "loss": 0.5266987085342407, + "step": 3718 + }, + { + "epoch": 1.3104845814977972, + "grad_norm": 1.7072540878561502, + "learning_rate": 5.83140091307553e-06, + "loss": 0.5978814363479614, + "step": 3719 + }, + { + "epoch": 1.3108370044052864, + "grad_norm": 1.9008641546548906, + "learning_rate": 5.826105597530526e-06, + "loss": 0.608231782913208, + "step": 3720 + }, + { + "epoch": 1.3111894273127753, + "grad_norm": 1.660571967924875, + "learning_rate": 5.820811698892775e-06, + "loss": 0.5834963321685791, + "step": 3721 + }, + { + "epoch": 1.3115418502202643, + "grad_norm": 1.7715871926900555, + "learning_rate": 5.8155192189593915e-06, + "loss": 0.6675208806991577, + "step": 3722 + }, + { + "epoch": 1.3118942731277534, + "grad_norm": 2.0125396897962156, + "learning_rate": 5.810228159527003e-06, + "loss": 0.655093789100647, + "step": 3723 + }, + { + "epoch": 1.3122466960352424, + "grad_norm": 1.832975656309839, + "learning_rate": 5.804938522391768e-06, + "loss": 0.5658842921257019, + "step": 3724 + }, + { + "epoch": 1.3125991189427313, + "grad_norm": 1.7484570770381627, + "learning_rate": 5.799650309349348e-06, + "loss": 0.4502618610858917, + "step": 3725 + }, + { + "epoch": 1.3129515418502202, + "grad_norm": 1.6150871905896036, + "learning_rate": 5.79436352219493e-06, + "loss": 0.6165845394134521, + "step": 3726 + }, + { + "epoch": 1.3133039647577092, + "grad_norm": 1.6734001609648903, + "learning_rate": 5.7890781627232115e-06, + "loss": 0.6315968036651611, + "step": 3727 + }, + { + "epoch": 1.313656387665198, + "grad_norm": 1.5048326218576167, + "learning_rate": 5.783794232728408e-06, + "loss": 0.58831787109375, + "step": 3728 + }, + { + "epoch": 1.3140088105726873, + "grad_norm": 1.7597864288310854, + "learning_rate": 5.778511734004248e-06, + "loss": 0.5056396722793579, + "step": 3729 + }, + { + "epoch": 1.3143612334801762, + "grad_norm": 2.3417954571274753, + "learning_rate": 5.773230668343978e-06, + "loss": 0.5469251871109009, + "step": 3730 + }, + { + "epoch": 1.3147136563876651, + "grad_norm": 1.768855633328091, + "learning_rate": 5.76795103754035e-06, + "loss": 0.7011934518814087, + "step": 3731 + }, + { + "epoch": 1.3150660792951543, + "grad_norm": 1.574817644372446, + "learning_rate": 5.762672843385643e-06, + "loss": 0.7080543041229248, + "step": 3732 + }, + { + "epoch": 1.3154185022026432, + "grad_norm": 1.7812689751161113, + "learning_rate": 5.757396087671634e-06, + "loss": 0.5180330276489258, + "step": 3733 + }, + { + "epoch": 1.3157709251101322, + "grad_norm": 1.6465709022018649, + "learning_rate": 5.75212077218962e-06, + "loss": 0.5282220840454102, + "step": 3734 + }, + { + "epoch": 1.316123348017621, + "grad_norm": 1.9100789844293367, + "learning_rate": 5.746846898730403e-06, + "loss": 0.7174440026283264, + "step": 3735 + }, + { + "epoch": 1.31647577092511, + "grad_norm": 1.7156784573652895, + "learning_rate": 5.7415744690843025e-06, + "loss": 0.537194013595581, + "step": 3736 + }, + { + "epoch": 1.3168281938325992, + "grad_norm": 1.714186482517803, + "learning_rate": 5.7363034850411415e-06, + "loss": 0.7514588832855225, + "step": 3737 + }, + { + "epoch": 1.3171806167400881, + "grad_norm": 1.6138774970176952, + "learning_rate": 5.731033948390252e-06, + "loss": 0.601151704788208, + "step": 3738 + }, + { + "epoch": 1.317533039647577, + "grad_norm": 1.9652638368208295, + "learning_rate": 5.7257658609204865e-06, + "loss": 0.6046192646026611, + "step": 3739 + }, + { + "epoch": 1.317885462555066, + "grad_norm": 1.9909773544544114, + "learning_rate": 5.720499224420196e-06, + "loss": 0.5003835558891296, + "step": 3740 + }, + { + "epoch": 1.3182378854625552, + "grad_norm": 2.7143275056165237, + "learning_rate": 5.715234040677229e-06, + "loss": 0.6251966953277588, + "step": 3741 + }, + { + "epoch": 1.318590308370044, + "grad_norm": 1.9483642954012013, + "learning_rate": 5.709970311478961e-06, + "loss": 0.6681240797042847, + "step": 3742 + }, + { + "epoch": 1.318942731277533, + "grad_norm": 1.6278748497204938, + "learning_rate": 5.704708038612261e-06, + "loss": 0.582561194896698, + "step": 3743 + }, + { + "epoch": 1.319295154185022, + "grad_norm": 1.8550137845260724, + "learning_rate": 5.699447223863508e-06, + "loss": 0.5616302490234375, + "step": 3744 + }, + { + "epoch": 1.319647577092511, + "grad_norm": 1.7452561285826282, + "learning_rate": 5.6941878690185835e-06, + "loss": 0.6131408214569092, + "step": 3745 + }, + { + "epoch": 1.32, + "grad_norm": 1.8334584062109562, + "learning_rate": 5.688929975862873e-06, + "loss": 0.5772547721862793, + "step": 3746 + }, + { + "epoch": 1.320352422907489, + "grad_norm": 1.7519534139582256, + "learning_rate": 5.683673546181274e-06, + "loss": 0.5927203893661499, + "step": 3747 + }, + { + "epoch": 1.320704845814978, + "grad_norm": 1.9849489030223588, + "learning_rate": 5.67841858175818e-06, + "loss": 0.6001334190368652, + "step": 3748 + }, + { + "epoch": 1.321057268722467, + "grad_norm": 1.584893703676267, + "learning_rate": 5.673165084377479e-06, + "loss": 0.4598100781440735, + "step": 3749 + }, + { + "epoch": 1.321409691629956, + "grad_norm": 1.9316178856088813, + "learning_rate": 5.667913055822578e-06, + "loss": 0.6455222368240356, + "step": 3750 + }, + { + "epoch": 1.321762114537445, + "grad_norm": 1.9234057001448424, + "learning_rate": 5.662662497876375e-06, + "loss": 0.6327164173126221, + "step": 3751 + }, + { + "epoch": 1.322114537444934, + "grad_norm": 1.7096288638222439, + "learning_rate": 5.657413412321271e-06, + "loss": 0.6699539422988892, + "step": 3752 + }, + { + "epoch": 1.3224669603524228, + "grad_norm": 2.0694083676949107, + "learning_rate": 5.6521658009391676e-06, + "loss": 0.7507830858230591, + "step": 3753 + }, + { + "epoch": 1.3228193832599118, + "grad_norm": 1.7615687866950613, + "learning_rate": 5.646919665511461e-06, + "loss": 0.5164662003517151, + "step": 3754 + }, + { + "epoch": 1.323171806167401, + "grad_norm": 2.267697288539615, + "learning_rate": 5.641675007819058e-06, + "loss": 0.7059702277183533, + "step": 3755 + }, + { + "epoch": 1.3235242290748899, + "grad_norm": 2.1165471311290243, + "learning_rate": 5.636431829642359e-06, + "loss": 0.6535515189170837, + "step": 3756 + }, + { + "epoch": 1.3238766519823788, + "grad_norm": 1.782117402624855, + "learning_rate": 5.631190132761247e-06, + "loss": 0.5912176370620728, + "step": 3757 + }, + { + "epoch": 1.324229074889868, + "grad_norm": 1.6111457739999588, + "learning_rate": 5.625949918955126e-06, + "loss": 0.6527940034866333, + "step": 3758 + }, + { + "epoch": 1.324581497797357, + "grad_norm": 1.9751426120017839, + "learning_rate": 5.620711190002879e-06, + "loss": 0.7236875295639038, + "step": 3759 + }, + { + "epoch": 1.3249339207048458, + "grad_norm": 2.042390900324052, + "learning_rate": 5.6154739476829e-06, + "loss": 0.6823146343231201, + "step": 3760 + }, + { + "epoch": 1.3252863436123348, + "grad_norm": 2.058457581887865, + "learning_rate": 5.610238193773061e-06, + "loss": 0.5795537233352661, + "step": 3761 + }, + { + "epoch": 1.3256387665198237, + "grad_norm": 1.90461931046175, + "learning_rate": 5.605003930050738e-06, + "loss": 0.5530939102172852, + "step": 3762 + }, + { + "epoch": 1.3259911894273126, + "grad_norm": 1.6978922894801083, + "learning_rate": 5.599771158292806e-06, + "loss": 0.5362278819084167, + "step": 3763 + }, + { + "epoch": 1.3263436123348018, + "grad_norm": 1.9521190182519916, + "learning_rate": 5.5945398802756315e-06, + "loss": 0.6136768460273743, + "step": 3764 + }, + { + "epoch": 1.3266960352422907, + "grad_norm": 1.7782753118174626, + "learning_rate": 5.589310097775055e-06, + "loss": 0.5979033708572388, + "step": 3765 + }, + { + "epoch": 1.3270484581497797, + "grad_norm": 1.810593191069574, + "learning_rate": 5.584081812566439e-06, + "loss": 0.6750006675720215, + "step": 3766 + }, + { + "epoch": 1.3274008810572688, + "grad_norm": 1.6815578779160076, + "learning_rate": 5.578855026424619e-06, + "loss": 0.6004951000213623, + "step": 3767 + }, + { + "epoch": 1.3277533039647578, + "grad_norm": 1.522422246822047, + "learning_rate": 5.573629741123926e-06, + "loss": 0.570702075958252, + "step": 3768 + }, + { + "epoch": 1.3281057268722467, + "grad_norm": 1.5435622334320813, + "learning_rate": 5.5684059584381826e-06, + "loss": 0.506945788860321, + "step": 3769 + }, + { + "epoch": 1.3284581497797356, + "grad_norm": 1.647967795112189, + "learning_rate": 5.563183680140696e-06, + "loss": 0.5935436487197876, + "step": 3770 + }, + { + "epoch": 1.3288105726872246, + "grad_norm": 2.7715355389110043, + "learning_rate": 5.5579629080042755e-06, + "loss": 0.641446590423584, + "step": 3771 + }, + { + "epoch": 1.3291629955947137, + "grad_norm": 1.7489195207611605, + "learning_rate": 5.552743643801209e-06, + "loss": 0.5816437005996704, + "step": 3772 + }, + { + "epoch": 1.3295154185022027, + "grad_norm": 1.7699530777692443, + "learning_rate": 5.547525889303265e-06, + "loss": 0.666487991809845, + "step": 3773 + }, + { + "epoch": 1.3298678414096916, + "grad_norm": 2.100750588167558, + "learning_rate": 5.542309646281718e-06, + "loss": 0.7961397767066956, + "step": 3774 + }, + { + "epoch": 1.3302202643171805, + "grad_norm": 1.5292695888779975, + "learning_rate": 5.53709491650732e-06, + "loss": 0.4736033082008362, + "step": 3775 + }, + { + "epoch": 1.3305726872246697, + "grad_norm": 1.8004482810288622, + "learning_rate": 5.531881701750304e-06, + "loss": 0.542208194732666, + "step": 3776 + }, + { + "epoch": 1.3309251101321586, + "grad_norm": 1.8151751535940353, + "learning_rate": 5.526670003780399e-06, + "loss": 0.6306429505348206, + "step": 3777 + }, + { + "epoch": 1.3312775330396476, + "grad_norm": 1.7520809852323194, + "learning_rate": 5.521459824366808e-06, + "loss": 0.531991720199585, + "step": 3778 + }, + { + "epoch": 1.3316299559471365, + "grad_norm": 1.9852873895231067, + "learning_rate": 5.516251165278235e-06, + "loss": 0.688262939453125, + "step": 3779 + }, + { + "epoch": 1.3319823788546254, + "grad_norm": 2.0026356133489416, + "learning_rate": 5.511044028282853e-06, + "loss": 0.7555293440818787, + "step": 3780 + }, + { + "epoch": 1.3323348017621146, + "grad_norm": 1.9387490035628434, + "learning_rate": 5.505838415148317e-06, + "loss": 0.7518796324729919, + "step": 3781 + }, + { + "epoch": 1.3326872246696035, + "grad_norm": 1.859399241253671, + "learning_rate": 5.500634327641777e-06, + "loss": 0.5161253809928894, + "step": 3782 + }, + { + "epoch": 1.3330396475770925, + "grad_norm": 1.5897606830745852, + "learning_rate": 5.4954317675298586e-06, + "loss": 0.5617681741714478, + "step": 3783 + }, + { + "epoch": 1.3333920704845814, + "grad_norm": 1.6894758792140483, + "learning_rate": 5.4902307365786676e-06, + "loss": 0.5707885026931763, + "step": 3784 + }, + { + "epoch": 1.3337444933920706, + "grad_norm": 1.9016603426520955, + "learning_rate": 5.485031236553792e-06, + "loss": 0.5842025876045227, + "step": 3785 + }, + { + "epoch": 1.3340969162995595, + "grad_norm": 2.278549510271659, + "learning_rate": 5.479833269220296e-06, + "loss": 0.7103949785232544, + "step": 3786 + }, + { + "epoch": 1.3344493392070484, + "grad_norm": 1.8432428404869632, + "learning_rate": 5.474636836342737e-06, + "loss": 0.7704740762710571, + "step": 3787 + }, + { + "epoch": 1.3348017621145374, + "grad_norm": 1.808727631247744, + "learning_rate": 5.469441939685137e-06, + "loss": 0.6402652263641357, + "step": 3788 + }, + { + "epoch": 1.3351541850220263, + "grad_norm": 1.892219877227891, + "learning_rate": 5.464248581011002e-06, + "loss": 0.8214348554611206, + "step": 3789 + }, + { + "epoch": 1.3355066079295155, + "grad_norm": 1.9758909531924576, + "learning_rate": 5.459056762083318e-06, + "loss": 0.6372429132461548, + "step": 3790 + }, + { + "epoch": 1.3358590308370044, + "grad_norm": 1.849044346394621, + "learning_rate": 5.453866484664543e-06, + "loss": 0.5418422222137451, + "step": 3791 + }, + { + "epoch": 1.3362114537444933, + "grad_norm": 1.7395663492002502, + "learning_rate": 5.448677750516613e-06, + "loss": 0.6574567556381226, + "step": 3792 + }, + { + "epoch": 1.3365638766519825, + "grad_norm": 1.9976311809706857, + "learning_rate": 5.443490561400948e-06, + "loss": 0.5174030661582947, + "step": 3793 + }, + { + "epoch": 1.3369162995594714, + "grad_norm": 1.5627335899600845, + "learning_rate": 5.4383049190784275e-06, + "loss": 0.595477819442749, + "step": 3794 + }, + { + "epoch": 1.3372687224669604, + "grad_norm": 1.845680624563864, + "learning_rate": 5.4331208253094255e-06, + "loss": 0.6177364587783813, + "step": 3795 + }, + { + "epoch": 1.3376211453744493, + "grad_norm": 1.6348460055259042, + "learning_rate": 5.4279382818537774e-06, + "loss": 0.6106897592544556, + "step": 3796 + }, + { + "epoch": 1.3379735682819383, + "grad_norm": 1.8500671496295353, + "learning_rate": 5.422757290470795e-06, + "loss": 0.46700483560562134, + "step": 3797 + }, + { + "epoch": 1.3383259911894272, + "grad_norm": 1.952200717602712, + "learning_rate": 5.417577852919262e-06, + "loss": 0.5408231019973755, + "step": 3798 + }, + { + "epoch": 1.3386784140969163, + "grad_norm": 1.8733329229880296, + "learning_rate": 5.412399970957439e-06, + "loss": 0.6430809497833252, + "step": 3799 + }, + { + "epoch": 1.3390308370044053, + "grad_norm": 1.9515663922431925, + "learning_rate": 5.4072236463430535e-06, + "loss": 0.6817858219146729, + "step": 3800 + }, + { + "epoch": 1.3393832599118942, + "grad_norm": 1.7386331074635664, + "learning_rate": 5.402048880833308e-06, + "loss": 0.5492604970932007, + "step": 3801 + }, + { + "epoch": 1.3397356828193834, + "grad_norm": 1.9883458715986422, + "learning_rate": 5.39687567618487e-06, + "loss": 0.6148543357849121, + "step": 3802 + }, + { + "epoch": 1.3400881057268723, + "grad_norm": 1.7245960691315507, + "learning_rate": 5.391704034153894e-06, + "loss": 0.5921820402145386, + "step": 3803 + }, + { + "epoch": 1.3404405286343613, + "grad_norm": 1.8759210914719033, + "learning_rate": 5.386533956495974e-06, + "loss": 0.49728113412857056, + "step": 3804 + }, + { + "epoch": 1.3407929515418502, + "grad_norm": 1.7899218455267007, + "learning_rate": 5.381365444966205e-06, + "loss": 0.5944808125495911, + "step": 3805 + }, + { + "epoch": 1.3411453744493391, + "grad_norm": 1.6022996204023598, + "learning_rate": 5.376198501319128e-06, + "loss": 0.5197580456733704, + "step": 3806 + }, + { + "epoch": 1.341497797356828, + "grad_norm": 1.5953524266203611, + "learning_rate": 5.3710331273087625e-06, + "loss": 0.6229256391525269, + "step": 3807 + }, + { + "epoch": 1.3418502202643172, + "grad_norm": 2.0736813734241073, + "learning_rate": 5.365869324688591e-06, + "loss": 0.5305753946304321, + "step": 3808 + }, + { + "epoch": 1.3422026431718062, + "grad_norm": 1.4520191291543518, + "learning_rate": 5.360707095211566e-06, + "loss": 0.4002259373664856, + "step": 3809 + }, + { + "epoch": 1.342555066079295, + "grad_norm": 1.8821320745162777, + "learning_rate": 5.3555464406300965e-06, + "loss": 0.5211426615715027, + "step": 3810 + }, + { + "epoch": 1.3429074889867842, + "grad_norm": 1.7112007743194535, + "learning_rate": 5.350387362696077e-06, + "loss": 0.5998013019561768, + "step": 3811 + }, + { + "epoch": 1.3432599118942732, + "grad_norm": 1.6128635046491597, + "learning_rate": 5.345229863160839e-06, + "loss": 0.5330953598022461, + "step": 3812 + }, + { + "epoch": 1.3436123348017621, + "grad_norm": 1.6570398271033384, + "learning_rate": 5.340073943775206e-06, + "loss": 0.6999118328094482, + "step": 3813 + }, + { + "epoch": 1.343964757709251, + "grad_norm": 2.060346240780723, + "learning_rate": 5.334919606289446e-06, + "loss": 0.6286367177963257, + "step": 3814 + }, + { + "epoch": 1.34431718061674, + "grad_norm": 1.4130805934733843, + "learning_rate": 5.329766852453296e-06, + "loss": 0.5793008804321289, + "step": 3815 + }, + { + "epoch": 1.3446696035242292, + "grad_norm": 1.7815340287164039, + "learning_rate": 5.324615684015957e-06, + "loss": 0.5811383128166199, + "step": 3816 + }, + { + "epoch": 1.345022026431718, + "grad_norm": 1.8888368809882845, + "learning_rate": 5.319466102726087e-06, + "loss": 0.7389675378799438, + "step": 3817 + }, + { + "epoch": 1.345374449339207, + "grad_norm": 1.9482215135863048, + "learning_rate": 5.314318110331815e-06, + "loss": 0.6105868220329285, + "step": 3818 + }, + { + "epoch": 1.345726872246696, + "grad_norm": 1.648111237588601, + "learning_rate": 5.3091717085807235e-06, + "loss": 0.5979465842247009, + "step": 3819 + }, + { + "epoch": 1.3460792951541851, + "grad_norm": 2.100772248921902, + "learning_rate": 5.304026899219846e-06, + "loss": 0.6722681522369385, + "step": 3820 + }, + { + "epoch": 1.346431718061674, + "grad_norm": 1.5469717835195365, + "learning_rate": 5.298883683995697e-06, + "loss": 0.4687497913837433, + "step": 3821 + }, + { + "epoch": 1.346784140969163, + "grad_norm": 1.6982574361909266, + "learning_rate": 5.29374206465423e-06, + "loss": 0.563692569732666, + "step": 3822 + }, + { + "epoch": 1.347136563876652, + "grad_norm": 1.7298606992172854, + "learning_rate": 5.2886020429408716e-06, + "loss": 0.604897141456604, + "step": 3823 + }, + { + "epoch": 1.3474889867841409, + "grad_norm": 2.111770720101543, + "learning_rate": 5.283463620600493e-06, + "loss": 0.6270164251327515, + "step": 3824 + }, + { + "epoch": 1.34784140969163, + "grad_norm": 2.1238324371472954, + "learning_rate": 5.278326799377428e-06, + "loss": 0.6487830877304077, + "step": 3825 + }, + { + "epoch": 1.348193832599119, + "grad_norm": 1.58718768900561, + "learning_rate": 5.273191581015474e-06, + "loss": 0.5816935896873474, + "step": 3826 + }, + { + "epoch": 1.348546255506608, + "grad_norm": 1.72099904065486, + "learning_rate": 5.26805796725788e-06, + "loss": 0.6281115412712097, + "step": 3827 + }, + { + "epoch": 1.348898678414097, + "grad_norm": 2.0975447662151288, + "learning_rate": 5.2629259598473335e-06, + "loss": 0.5031973123550415, + "step": 3828 + }, + { + "epoch": 1.349251101321586, + "grad_norm": 1.6391975654545219, + "learning_rate": 5.257795560526005e-06, + "loss": 0.6220165491104126, + "step": 3829 + }, + { + "epoch": 1.349603524229075, + "grad_norm": 1.8177506583957952, + "learning_rate": 5.2526667710354995e-06, + "loss": 0.6451058387756348, + "step": 3830 + }, + { + "epoch": 1.3499559471365639, + "grad_norm": 2.000132155225934, + "learning_rate": 5.247539593116884e-06, + "loss": 0.7524863481521606, + "step": 3831 + }, + { + "epoch": 1.3503083700440528, + "grad_norm": 1.7855711080776688, + "learning_rate": 5.242414028510674e-06, + "loss": 0.6270921230316162, + "step": 3832 + }, + { + "epoch": 1.3506607929515417, + "grad_norm": 1.8779302666662292, + "learning_rate": 5.237290078956836e-06, + "loss": 0.6196550130844116, + "step": 3833 + }, + { + "epoch": 1.351013215859031, + "grad_norm": 1.932517845360487, + "learning_rate": 5.232167746194798e-06, + "loss": 0.8512230515480042, + "step": 3834 + }, + { + "epoch": 1.3513656387665198, + "grad_norm": 1.672868645098828, + "learning_rate": 5.227047031963435e-06, + "loss": 0.5196807980537415, + "step": 3835 + }, + { + "epoch": 1.3517180616740088, + "grad_norm": 1.890472281368116, + "learning_rate": 5.2219279380010565e-06, + "loss": 0.6713111400604248, + "step": 3836 + }, + { + "epoch": 1.352070484581498, + "grad_norm": 1.8891048300322977, + "learning_rate": 5.216810466045448e-06, + "loss": 0.7150874137878418, + "step": 3837 + }, + { + "epoch": 1.3524229074889869, + "grad_norm": 1.9379344809365882, + "learning_rate": 5.211694617833827e-06, + "loss": 0.5812375545501709, + "step": 3838 + }, + { + "epoch": 1.3527753303964758, + "grad_norm": 1.6232111313971074, + "learning_rate": 5.2065803951028675e-06, + "loss": 0.5842182040214539, + "step": 3839 + }, + { + "epoch": 1.3531277533039647, + "grad_norm": 1.7655789614212678, + "learning_rate": 5.201467799588685e-06, + "loss": 0.5432665348052979, + "step": 3840 + }, + { + "epoch": 1.3534801762114537, + "grad_norm": 1.610757257105171, + "learning_rate": 5.196356833026845e-06, + "loss": 0.551771879196167, + "step": 3841 + }, + { + "epoch": 1.3538325991189426, + "grad_norm": 2.0105503681662076, + "learning_rate": 5.19124749715237e-06, + "loss": 0.6961710453033447, + "step": 3842 + }, + { + "epoch": 1.3541850220264318, + "grad_norm": 1.9510922019810755, + "learning_rate": 5.18613979369972e-06, + "loss": 0.7105714678764343, + "step": 3843 + }, + { + "epoch": 1.3545374449339207, + "grad_norm": 1.9369232024679732, + "learning_rate": 5.181033724402789e-06, + "loss": 0.7100229263305664, + "step": 3844 + }, + { + "epoch": 1.3548898678414096, + "grad_norm": 1.6852711649451124, + "learning_rate": 5.175929290994941e-06, + "loss": 0.651812732219696, + "step": 3845 + }, + { + "epoch": 1.3552422907488988, + "grad_norm": 2.308449923325572, + "learning_rate": 5.170826495208967e-06, + "loss": 0.5194147825241089, + "step": 3846 + }, + { + "epoch": 1.3555947136563877, + "grad_norm": 1.6095794520986102, + "learning_rate": 5.16572533877711e-06, + "loss": 0.5939956307411194, + "step": 3847 + }, + { + "epoch": 1.3559471365638767, + "grad_norm": 1.7731843322868706, + "learning_rate": 5.160625823431051e-06, + "loss": 0.6434104442596436, + "step": 3848 + }, + { + "epoch": 1.3562995594713656, + "grad_norm": 1.9584483919337772, + "learning_rate": 5.155527950901914e-06, + "loss": 0.5256108045578003, + "step": 3849 + }, + { + "epoch": 1.3566519823788545, + "grad_norm": 1.5746637659323357, + "learning_rate": 5.150431722920277e-06, + "loss": 0.5632717609405518, + "step": 3850 + }, + { + "epoch": 1.3570044052863435, + "grad_norm": 1.8450205582439452, + "learning_rate": 5.145337141216149e-06, + "loss": 0.5964382886886597, + "step": 3851 + }, + { + "epoch": 1.3573568281938326, + "grad_norm": 1.9383063853676261, + "learning_rate": 5.140244207518971e-06, + "loss": 0.7268366813659668, + "step": 3852 + }, + { + "epoch": 1.3577092511013216, + "grad_norm": 2.357958765027834, + "learning_rate": 5.135152923557647e-06, + "loss": 0.7376477122306824, + "step": 3853 + }, + { + "epoch": 1.3580616740088105, + "grad_norm": 1.9573550951394243, + "learning_rate": 5.130063291060505e-06, + "loss": 0.50569748878479, + "step": 3854 + }, + { + "epoch": 1.3584140969162997, + "grad_norm": 1.684535591269265, + "learning_rate": 5.12497531175532e-06, + "loss": 0.5639374256134033, + "step": 3855 + }, + { + "epoch": 1.3587665198237886, + "grad_norm": 2.0009335012534146, + "learning_rate": 5.1198889873692994e-06, + "loss": 0.5051915645599365, + "step": 3856 + }, + { + "epoch": 1.3591189427312775, + "grad_norm": 1.979939818228197, + "learning_rate": 5.114804319629088e-06, + "loss": 0.4718795120716095, + "step": 3857 + }, + { + "epoch": 1.3594713656387665, + "grad_norm": 1.7040447839749338, + "learning_rate": 5.109721310260781e-06, + "loss": 0.5684067606925964, + "step": 3858 + }, + { + "epoch": 1.3598237885462554, + "grad_norm": 1.687205926430453, + "learning_rate": 5.104639960989903e-06, + "loss": 0.5757609605789185, + "step": 3859 + }, + { + "epoch": 1.3601762114537446, + "grad_norm": 1.637859976815221, + "learning_rate": 5.099560273541401e-06, + "loss": 0.5971167087554932, + "step": 3860 + }, + { + "epoch": 1.3605286343612335, + "grad_norm": 1.9766573766085018, + "learning_rate": 5.094482249639683e-06, + "loss": 0.6959896683692932, + "step": 3861 + }, + { + "epoch": 1.3608810572687224, + "grad_norm": 1.8397057454745067, + "learning_rate": 5.089405891008574e-06, + "loss": 0.6954548358917236, + "step": 3862 + }, + { + "epoch": 1.3612334801762114, + "grad_norm": 1.5747472561310782, + "learning_rate": 5.084331199371343e-06, + "loss": 0.5659986138343811, + "step": 3863 + }, + { + "epoch": 1.3615859030837005, + "grad_norm": 1.9340659365358734, + "learning_rate": 5.079258176450687e-06, + "loss": 0.5582559108734131, + "step": 3864 + }, + { + "epoch": 1.3619383259911895, + "grad_norm": 1.5684621947501252, + "learning_rate": 5.0741868239687395e-06, + "loss": 0.5337075591087341, + "step": 3865 + }, + { + "epoch": 1.3622907488986784, + "grad_norm": 1.8617666338346237, + "learning_rate": 5.069117143647075e-06, + "loss": 0.621441125869751, + "step": 3866 + }, + { + "epoch": 1.3626431718061673, + "grad_norm": 1.7285404952370873, + "learning_rate": 5.064049137206677e-06, + "loss": 0.5476670861244202, + "step": 3867 + }, + { + "epoch": 1.3629955947136563, + "grad_norm": 1.9444577342582248, + "learning_rate": 5.058982806367989e-06, + "loss": 0.5357356071472168, + "step": 3868 + }, + { + "epoch": 1.3633480176211454, + "grad_norm": 2.032867685216442, + "learning_rate": 5.053918152850868e-06, + "loss": 0.5722761750221252, + "step": 3869 + }, + { + "epoch": 1.3637004405286344, + "grad_norm": 1.8019521015311857, + "learning_rate": 5.048855178374606e-06, + "loss": 0.7271207571029663, + "step": 3870 + }, + { + "epoch": 1.3640528634361233, + "grad_norm": 2.149716528128109, + "learning_rate": 5.043793884657926e-06, + "loss": 0.6213557720184326, + "step": 3871 + }, + { + "epoch": 1.3644052863436125, + "grad_norm": 1.9750542918701046, + "learning_rate": 5.03873427341898e-06, + "loss": 0.6509476900100708, + "step": 3872 + }, + { + "epoch": 1.3647577092511014, + "grad_norm": 1.8266690493980986, + "learning_rate": 5.0336763463753425e-06, + "loss": 0.5321642756462097, + "step": 3873 + }, + { + "epoch": 1.3651101321585903, + "grad_norm": 1.8114804761469812, + "learning_rate": 5.028620105244035e-06, + "loss": 0.7237476110458374, + "step": 3874 + }, + { + "epoch": 1.3654625550660793, + "grad_norm": 2.014453779183698, + "learning_rate": 5.0235655517414805e-06, + "loss": 0.6653447151184082, + "step": 3875 + }, + { + "epoch": 1.3658149779735682, + "grad_norm": 1.843622237552059, + "learning_rate": 5.018512687583552e-06, + "loss": 0.6188938617706299, + "step": 3876 + }, + { + "epoch": 1.3661674008810571, + "grad_norm": 1.8211870806299153, + "learning_rate": 5.013461514485536e-06, + "loss": 0.6341606378555298, + "step": 3877 + }, + { + "epoch": 1.3665198237885463, + "grad_norm": 1.6224290182707664, + "learning_rate": 5.00841203416215e-06, + "loss": 0.6148994565010071, + "step": 3878 + }, + { + "epoch": 1.3668722466960352, + "grad_norm": 1.8692541577175399, + "learning_rate": 5.003364248327533e-06, + "loss": 0.6292222142219543, + "step": 3879 + }, + { + "epoch": 1.3672246696035242, + "grad_norm": 1.618170468267519, + "learning_rate": 4.998318158695255e-06, + "loss": 0.6648836135864258, + "step": 3880 + }, + { + "epoch": 1.3675770925110133, + "grad_norm": 6.866040476375875, + "learning_rate": 4.993273766978297e-06, + "loss": 0.5175273418426514, + "step": 3881 + }, + { + "epoch": 1.3679295154185023, + "grad_norm": 1.5661461645683938, + "learning_rate": 4.98823107488909e-06, + "loss": 0.5686253309249878, + "step": 3882 + }, + { + "epoch": 1.3682819383259912, + "grad_norm": 1.9697672783538545, + "learning_rate": 4.983190084139452e-06, + "loss": 0.6128156185150146, + "step": 3883 + }, + { + "epoch": 1.3686343612334801, + "grad_norm": 1.9331016188284555, + "learning_rate": 4.978150796440656e-06, + "loss": 0.6849625110626221, + "step": 3884 + }, + { + "epoch": 1.368986784140969, + "grad_norm": 1.5986771035358114, + "learning_rate": 4.973113213503379e-06, + "loss": 0.5735955238342285, + "step": 3885 + }, + { + "epoch": 1.369339207048458, + "grad_norm": 1.6049593584012303, + "learning_rate": 4.968077337037724e-06, + "loss": 0.4584425091743469, + "step": 3886 + }, + { + "epoch": 1.3696916299559472, + "grad_norm": 1.9525312670752564, + "learning_rate": 4.963043168753212e-06, + "loss": 0.547109067440033, + "step": 3887 + }, + { + "epoch": 1.3700440528634361, + "grad_norm": 2.113357180829694, + "learning_rate": 4.9580107103587895e-06, + "loss": 0.6966128349304199, + "step": 3888 + }, + { + "epoch": 1.370396475770925, + "grad_norm": 1.7817002019358994, + "learning_rate": 4.952979963562814e-06, + "loss": 0.6275819540023804, + "step": 3889 + }, + { + "epoch": 1.3707488986784142, + "grad_norm": 1.6096829752005641, + "learning_rate": 4.94795093007308e-06, + "loss": 0.5678467750549316, + "step": 3890 + }, + { + "epoch": 1.3711013215859031, + "grad_norm": 1.8874234747665013, + "learning_rate": 4.942923611596772e-06, + "loss": 0.6516115665435791, + "step": 3891 + }, + { + "epoch": 1.371453744493392, + "grad_norm": 1.8638529672264463, + "learning_rate": 4.937898009840518e-06, + "loss": 0.6279621124267578, + "step": 3892 + }, + { + "epoch": 1.371806167400881, + "grad_norm": 1.6187117518672614, + "learning_rate": 4.932874126510353e-06, + "loss": 0.6123322248458862, + "step": 3893 + }, + { + "epoch": 1.37215859030837, + "grad_norm": 1.6259761787603553, + "learning_rate": 4.927851963311726e-06, + "loss": 0.43412432074546814, + "step": 3894 + }, + { + "epoch": 1.372511013215859, + "grad_norm": 1.859998329311036, + "learning_rate": 4.922831521949507e-06, + "loss": 0.6582022905349731, + "step": 3895 + }, + { + "epoch": 1.372863436123348, + "grad_norm": 1.8966645456702385, + "learning_rate": 4.917812804127976e-06, + "loss": 0.6219466328620911, + "step": 3896 + }, + { + "epoch": 1.373215859030837, + "grad_norm": 2.056798959647299, + "learning_rate": 4.9127958115508365e-06, + "loss": 0.5352981090545654, + "step": 3897 + }, + { + "epoch": 1.373568281938326, + "grad_norm": 1.5240218181276974, + "learning_rate": 4.907780545921205e-06, + "loss": 0.47646182775497437, + "step": 3898 + }, + { + "epoch": 1.373920704845815, + "grad_norm": 1.6949945802187276, + "learning_rate": 4.902767008941594e-06, + "loss": 0.5335453748703003, + "step": 3899 + }, + { + "epoch": 1.374273127753304, + "grad_norm": 1.7931951401372748, + "learning_rate": 4.897755202313954e-06, + "loss": 0.576435923576355, + "step": 3900 + }, + { + "epoch": 1.374625550660793, + "grad_norm": 1.6675338707159029, + "learning_rate": 4.8927451277396365e-06, + "loss": 0.533431887626648, + "step": 3901 + }, + { + "epoch": 1.3749779735682819, + "grad_norm": 1.7439550653197133, + "learning_rate": 4.8877367869194035e-06, + "loss": 0.6892110109329224, + "step": 3902 + }, + { + "epoch": 1.3753303964757708, + "grad_norm": 1.9209875137364842, + "learning_rate": 4.8827301815534335e-06, + "loss": 0.7028052806854248, + "step": 3903 + }, + { + "epoch": 1.37568281938326, + "grad_norm": 1.8413166797931897, + "learning_rate": 4.877725313341306e-06, + "loss": 0.6883414387702942, + "step": 3904 + }, + { + "epoch": 1.376035242290749, + "grad_norm": 2.145518516472349, + "learning_rate": 4.8727221839820285e-06, + "loss": 0.6712944507598877, + "step": 3905 + }, + { + "epoch": 1.3763876651982379, + "grad_norm": 1.6297297090329885, + "learning_rate": 4.867720795174006e-06, + "loss": 0.6139085292816162, + "step": 3906 + }, + { + "epoch": 1.3767400881057268, + "grad_norm": 1.8425831405666082, + "learning_rate": 4.862721148615043e-06, + "loss": 0.6463953256607056, + "step": 3907 + }, + { + "epoch": 1.377092511013216, + "grad_norm": 1.768461759599311, + "learning_rate": 4.857723246002376e-06, + "loss": 0.6790587306022644, + "step": 3908 + }, + { + "epoch": 1.3774449339207049, + "grad_norm": 1.7177146369820009, + "learning_rate": 4.852727089032634e-06, + "loss": 0.4996854066848755, + "step": 3909 + }, + { + "epoch": 1.3777973568281938, + "grad_norm": 1.8098347886488457, + "learning_rate": 4.847732679401855e-06, + "loss": 0.5826590061187744, + "step": 3910 + }, + { + "epoch": 1.3781497797356828, + "grad_norm": 1.8997892974208295, + "learning_rate": 4.842740018805489e-06, + "loss": 0.5044558048248291, + "step": 3911 + }, + { + "epoch": 1.3785022026431717, + "grad_norm": 1.873679943847948, + "learning_rate": 4.837749108938381e-06, + "loss": 0.49022918939590454, + "step": 3912 + }, + { + "epoch": 1.3788546255506609, + "grad_norm": 1.9497488299017371, + "learning_rate": 4.832759951494798e-06, + "loss": 0.7034850120544434, + "step": 3913 + }, + { + "epoch": 1.3792070484581498, + "grad_norm": 1.8582811393472771, + "learning_rate": 4.827772548168408e-06, + "loss": 0.5835636854171753, + "step": 3914 + }, + { + "epoch": 1.3795594713656387, + "grad_norm": 1.8615896532434415, + "learning_rate": 4.822786900652262e-06, + "loss": 0.6000608205795288, + "step": 3915 + }, + { + "epoch": 1.3799118942731279, + "grad_norm": 2.003742345218382, + "learning_rate": 4.817803010638847e-06, + "loss": 0.6121091842651367, + "step": 3916 + }, + { + "epoch": 1.3802643171806168, + "grad_norm": 1.80308866184307, + "learning_rate": 4.812820879820034e-06, + "loss": 0.457197904586792, + "step": 3917 + }, + { + "epoch": 1.3806167400881058, + "grad_norm": 1.8962611537179284, + "learning_rate": 4.807840509887102e-06, + "loss": 0.6495843529701233, + "step": 3918 + }, + { + "epoch": 1.3809691629955947, + "grad_norm": 1.9212587769996015, + "learning_rate": 4.80286190253073e-06, + "loss": 0.6245059967041016, + "step": 3919 + }, + { + "epoch": 1.3813215859030836, + "grad_norm": 2.020688644956673, + "learning_rate": 4.797885059440998e-06, + "loss": 0.5648606419563293, + "step": 3920 + }, + { + "epoch": 1.3816740088105726, + "grad_norm": 1.93208096226899, + "learning_rate": 4.7929099823073945e-06, + "loss": 0.6593670845031738, + "step": 3921 + }, + { + "epoch": 1.3820264317180617, + "grad_norm": 1.8973564890389945, + "learning_rate": 4.787936672818807e-06, + "loss": 0.6400346159934998, + "step": 3922 + }, + { + "epoch": 1.3823788546255507, + "grad_norm": 1.8684904083901948, + "learning_rate": 4.782965132663505e-06, + "loss": 0.6042170524597168, + "step": 3923 + }, + { + "epoch": 1.3827312775330396, + "grad_norm": 1.8230700495851246, + "learning_rate": 4.777995363529184e-06, + "loss": 0.6224586963653564, + "step": 3924 + }, + { + "epoch": 1.3830837004405288, + "grad_norm": 2.09797321253942, + "learning_rate": 4.7730273671029235e-06, + "loss": 0.6944444179534912, + "step": 3925 + }, + { + "epoch": 1.3834361233480177, + "grad_norm": 1.976613089140818, + "learning_rate": 4.768061145071201e-06, + "loss": 0.5871950387954712, + "step": 3926 + }, + { + "epoch": 1.3837885462555066, + "grad_norm": 1.7713632438369786, + "learning_rate": 4.763096699119897e-06, + "loss": 0.6438909769058228, + "step": 3927 + }, + { + "epoch": 1.3841409691629956, + "grad_norm": 1.6141008005869943, + "learning_rate": 4.75813403093428e-06, + "loss": 0.6338443756103516, + "step": 3928 + }, + { + "epoch": 1.3844933920704845, + "grad_norm": 2.2680544531424753, + "learning_rate": 4.753173142199036e-06, + "loss": 0.6343874931335449, + "step": 3929 + }, + { + "epoch": 1.3848458149779734, + "grad_norm": 1.7233771229601555, + "learning_rate": 4.7482140345982174e-06, + "loss": 0.5383629202842712, + "step": 3930 + }, + { + "epoch": 1.3851982378854626, + "grad_norm": 1.8699549247596075, + "learning_rate": 4.743256709815289e-06, + "loss": 0.5365063548088074, + "step": 3931 + }, + { + "epoch": 1.3855506607929515, + "grad_norm": 2.2583515376147694, + "learning_rate": 4.738301169533116e-06, + "loss": 0.6310757398605347, + "step": 3932 + }, + { + "epoch": 1.3859030837004405, + "grad_norm": 2.1022070754037476, + "learning_rate": 4.733347415433946e-06, + "loss": 0.7609038949012756, + "step": 3933 + }, + { + "epoch": 1.3862555066079296, + "grad_norm": 2.174490642392946, + "learning_rate": 4.728395449199423e-06, + "loss": 0.5837516784667969, + "step": 3934 + }, + { + "epoch": 1.3866079295154186, + "grad_norm": 1.719340289699717, + "learning_rate": 4.7234452725105875e-06, + "loss": 0.6075407862663269, + "step": 3935 + }, + { + "epoch": 1.3869603524229075, + "grad_norm": 1.7651152509667416, + "learning_rate": 4.718496887047864e-06, + "loss": 0.5246843099594116, + "step": 3936 + }, + { + "epoch": 1.3873127753303964, + "grad_norm": 1.6874306455639787, + "learning_rate": 4.713550294491091e-06, + "loss": 0.6256884336471558, + "step": 3937 + }, + { + "epoch": 1.3876651982378854, + "grad_norm": 1.632156841956259, + "learning_rate": 4.708605496519467e-06, + "loss": 0.5039727687835693, + "step": 3938 + }, + { + "epoch": 1.3880176211453745, + "grad_norm": 2.0143508196146196, + "learning_rate": 4.703662494811599e-06, + "loss": 0.5302769541740417, + "step": 3939 + }, + { + "epoch": 1.3883700440528635, + "grad_norm": 1.6358403288542849, + "learning_rate": 4.698721291045491e-06, + "loss": 0.654889702796936, + "step": 3940 + }, + { + "epoch": 1.3887224669603524, + "grad_norm": 1.8724260838054423, + "learning_rate": 4.693781886898521e-06, + "loss": 0.5571156740188599, + "step": 3941 + }, + { + "epoch": 1.3890748898678413, + "grad_norm": 1.8352093678478665, + "learning_rate": 4.688844284047466e-06, + "loss": 0.489155113697052, + "step": 3942 + }, + { + "epoch": 1.3894273127753305, + "grad_norm": 2.3056906716340793, + "learning_rate": 4.683908484168487e-06, + "loss": 0.6422649621963501, + "step": 3943 + }, + { + "epoch": 1.3897797356828194, + "grad_norm": 2.1056674936107345, + "learning_rate": 4.67897448893713e-06, + "loss": 0.6800041794776917, + "step": 3944 + }, + { + "epoch": 1.3901321585903084, + "grad_norm": 1.9512416893069657, + "learning_rate": 4.674042300028345e-06, + "loss": 0.6091655492782593, + "step": 3945 + }, + { + "epoch": 1.3904845814977973, + "grad_norm": 1.5832960247380383, + "learning_rate": 4.669111919116442e-06, + "loss": 0.6217864751815796, + "step": 3946 + }, + { + "epoch": 1.3908370044052862, + "grad_norm": 1.9328669999328483, + "learning_rate": 4.664183347875144e-06, + "loss": 0.6140862703323364, + "step": 3947 + }, + { + "epoch": 1.3911894273127754, + "grad_norm": 1.5467868836495022, + "learning_rate": 4.659256587977542e-06, + "loss": 0.5485835075378418, + "step": 3948 + }, + { + "epoch": 1.3915418502202643, + "grad_norm": 1.9704789330010746, + "learning_rate": 4.654331641096118e-06, + "loss": 0.642849862575531, + "step": 3949 + }, + { + "epoch": 1.3918942731277533, + "grad_norm": 3.421035640959237, + "learning_rate": 4.649408508902739e-06, + "loss": 0.7084407806396484, + "step": 3950 + }, + { + "epoch": 1.3922466960352424, + "grad_norm": 1.780782004302536, + "learning_rate": 4.644487193068653e-06, + "loss": 0.4798510670661926, + "step": 3951 + }, + { + "epoch": 1.3925991189427314, + "grad_norm": 2.0571809281532056, + "learning_rate": 4.639567695264493e-06, + "loss": 0.6350974440574646, + "step": 3952 + }, + { + "epoch": 1.3929515418502203, + "grad_norm": 1.6636780012798107, + "learning_rate": 4.634650017160285e-06, + "loss": 0.6046940684318542, + "step": 3953 + }, + { + "epoch": 1.3933039647577092, + "grad_norm": 1.8656342511774384, + "learning_rate": 4.629734160425412e-06, + "loss": 0.5262438058853149, + "step": 3954 + }, + { + "epoch": 1.3936563876651982, + "grad_norm": 1.6602375526420536, + "learning_rate": 4.6248201267286666e-06, + "loss": 0.4836997985839844, + "step": 3955 + }, + { + "epoch": 1.394008810572687, + "grad_norm": 1.8387545975251456, + "learning_rate": 4.619907917738206e-06, + "loss": 0.5491573810577393, + "step": 3956 + }, + { + "epoch": 1.3943612334801763, + "grad_norm": 1.7103638500009937, + "learning_rate": 4.614997535121574e-06, + "loss": 0.5778772830963135, + "step": 3957 + }, + { + "epoch": 1.3947136563876652, + "grad_norm": 1.886204345973439, + "learning_rate": 4.61008898054569e-06, + "loss": 0.6235651969909668, + "step": 3958 + }, + { + "epoch": 1.3950660792951541, + "grad_norm": 1.533461324415723, + "learning_rate": 4.605182255676857e-06, + "loss": 0.5192956924438477, + "step": 3959 + }, + { + "epoch": 1.3954185022026433, + "grad_norm": 1.6490801359766816, + "learning_rate": 4.600277362180753e-06, + "loss": 0.5652563571929932, + "step": 3960 + }, + { + "epoch": 1.3957709251101322, + "grad_norm": 2.0491508628562594, + "learning_rate": 4.595374301722445e-06, + "loss": 0.6451884508132935, + "step": 3961 + }, + { + "epoch": 1.3961233480176212, + "grad_norm": 1.6267669051180629, + "learning_rate": 4.5904730759663555e-06, + "loss": 0.6358006000518799, + "step": 3962 + }, + { + "epoch": 1.39647577092511, + "grad_norm": 1.9868299068304147, + "learning_rate": 4.5855736865763104e-06, + "loss": 0.6122751832008362, + "step": 3963 + }, + { + "epoch": 1.396828193832599, + "grad_norm": 1.6563994945684704, + "learning_rate": 4.580676135215495e-06, + "loss": 0.5563797354698181, + "step": 3964 + }, + { + "epoch": 1.397180616740088, + "grad_norm": 1.7043306637307543, + "learning_rate": 4.575780423546476e-06, + "loss": 0.5915960669517517, + "step": 3965 + }, + { + "epoch": 1.3975330396475771, + "grad_norm": 2.2793683384994363, + "learning_rate": 4.570886553231196e-06, + "loss": 0.5755159854888916, + "step": 3966 + }, + { + "epoch": 1.397885462555066, + "grad_norm": 1.713166792254198, + "learning_rate": 4.565994525930967e-06, + "loss": 0.7017625570297241, + "step": 3967 + }, + { + "epoch": 1.398237885462555, + "grad_norm": 1.901331269180062, + "learning_rate": 4.5611043433064875e-06, + "loss": 0.7623441815376282, + "step": 3968 + }, + { + "epoch": 1.3985903083700442, + "grad_norm": 1.772343766995311, + "learning_rate": 4.556216007017822e-06, + "loss": 0.5561864376068115, + "step": 3969 + }, + { + "epoch": 1.398942731277533, + "grad_norm": 1.7107369517825557, + "learning_rate": 4.5513295187243975e-06, + "loss": 0.516582727432251, + "step": 3970 + }, + { + "epoch": 1.399295154185022, + "grad_norm": 1.6087287767761917, + "learning_rate": 4.5464448800850366e-06, + "loss": 0.6324976682662964, + "step": 3971 + }, + { + "epoch": 1.399647577092511, + "grad_norm": 1.660721417089598, + "learning_rate": 4.541562092757918e-06, + "loss": 0.5926251411437988, + "step": 3972 + }, + { + "epoch": 1.4, + "grad_norm": 1.7443423550845751, + "learning_rate": 4.536681158400598e-06, + "loss": 0.5677082538604736, + "step": 3973 + }, + { + "epoch": 1.400352422907489, + "grad_norm": 1.791823926745788, + "learning_rate": 4.531802078669997e-06, + "loss": 0.5267887115478516, + "step": 3974 + }, + { + "epoch": 1.400704845814978, + "grad_norm": 2.3840846637544617, + "learning_rate": 4.526924855222411e-06, + "loss": 0.6361796855926514, + "step": 3975 + }, + { + "epoch": 1.401057268722467, + "grad_norm": 1.9992656380929168, + "learning_rate": 4.522049489713513e-06, + "loss": 0.5906916856765747, + "step": 3976 + }, + { + "epoch": 1.4014096916299559, + "grad_norm": 1.932616358578933, + "learning_rate": 4.517175983798334e-06, + "loss": 0.647320568561554, + "step": 3977 + }, + { + "epoch": 1.401762114537445, + "grad_norm": 1.7297380971513312, + "learning_rate": 4.512304339131271e-06, + "loss": 0.6129240989685059, + "step": 3978 + }, + { + "epoch": 1.402114537444934, + "grad_norm": 1.8820056515419912, + "learning_rate": 4.507434557366106e-06, + "loss": 0.5550417900085449, + "step": 3979 + }, + { + "epoch": 1.402466960352423, + "grad_norm": 3.410101687197828, + "learning_rate": 4.502566640155972e-06, + "loss": 0.5677829384803772, + "step": 3980 + }, + { + "epoch": 1.4028193832599118, + "grad_norm": 2.037826582552855, + "learning_rate": 4.497700589153379e-06, + "loss": 0.6627114415168762, + "step": 3981 + }, + { + "epoch": 1.4031718061674008, + "grad_norm": 2.0278559165710197, + "learning_rate": 4.492836406010197e-06, + "loss": 0.7225712537765503, + "step": 3982 + }, + { + "epoch": 1.40352422907489, + "grad_norm": 1.6877243893704514, + "learning_rate": 4.487974092377661e-06, + "loss": 0.5259708762168884, + "step": 3983 + }, + { + "epoch": 1.4038766519823789, + "grad_norm": 1.930838228409862, + "learning_rate": 4.4831136499063856e-06, + "loss": 0.5509500503540039, + "step": 3984 + }, + { + "epoch": 1.4042290748898678, + "grad_norm": 1.862328702111506, + "learning_rate": 4.478255080246338e-06, + "loss": 0.5436242818832397, + "step": 3985 + }, + { + "epoch": 1.4045814977973567, + "grad_norm": 1.9252586062101578, + "learning_rate": 4.473398385046839e-06, + "loss": 0.591008186340332, + "step": 3986 + }, + { + "epoch": 1.404933920704846, + "grad_norm": 1.8551590253300663, + "learning_rate": 4.4685435659565975e-06, + "loss": 0.7463438510894775, + "step": 3987 + }, + { + "epoch": 1.4052863436123348, + "grad_norm": 2.7212267236094445, + "learning_rate": 4.46369062462367e-06, + "loss": 0.5672414898872375, + "step": 3988 + }, + { + "epoch": 1.4056387665198238, + "grad_norm": 1.9023461618951703, + "learning_rate": 4.458839562695481e-06, + "loss": 0.6022762060165405, + "step": 3989 + }, + { + "epoch": 1.4059911894273127, + "grad_norm": 2.975414442801074, + "learning_rate": 4.453990381818811e-06, + "loss": 0.8312792181968689, + "step": 3990 + }, + { + "epoch": 1.4063436123348017, + "grad_norm": 1.5291152049255947, + "learning_rate": 4.4491430836398055e-06, + "loss": 0.475655198097229, + "step": 3991 + }, + { + "epoch": 1.4066960352422908, + "grad_norm": 2.205738960261052, + "learning_rate": 4.444297669803981e-06, + "loss": 0.6317172050476074, + "step": 3992 + }, + { + "epoch": 1.4070484581497797, + "grad_norm": 1.7590033801874774, + "learning_rate": 4.439454141956194e-06, + "loss": 0.5412036180496216, + "step": 3993 + }, + { + "epoch": 1.4074008810572687, + "grad_norm": 1.8534848369039538, + "learning_rate": 4.434612501740671e-06, + "loss": 0.6401170492172241, + "step": 3994 + }, + { + "epoch": 1.4077533039647578, + "grad_norm": 1.6819739888663638, + "learning_rate": 4.429772750801007e-06, + "loss": 0.6175628900527954, + "step": 3995 + }, + { + "epoch": 1.4081057268722468, + "grad_norm": 1.9863542351176011, + "learning_rate": 4.424934890780142e-06, + "loss": 0.6875946521759033, + "step": 3996 + }, + { + "epoch": 1.4084581497797357, + "grad_norm": 1.6357928529424866, + "learning_rate": 4.420098923320378e-06, + "loss": 0.6404017210006714, + "step": 3997 + }, + { + "epoch": 1.4088105726872246, + "grad_norm": 2.096371594852834, + "learning_rate": 4.415264850063378e-06, + "loss": 0.7569783329963684, + "step": 3998 + }, + { + "epoch": 1.4091629955947136, + "grad_norm": 1.9373448832520324, + "learning_rate": 4.410432672650154e-06, + "loss": 0.6125228404998779, + "step": 3999 + }, + { + "epoch": 1.4095154185022025, + "grad_norm": 1.8206271046178746, + "learning_rate": 4.405602392721091e-06, + "loss": 0.6187582015991211, + "step": 4000 + }, + { + "epoch": 1.4098678414096917, + "grad_norm": 1.6622405329305723, + "learning_rate": 4.400774011915907e-06, + "loss": 0.6086148023605347, + "step": 4001 + }, + { + "epoch": 1.4102202643171806, + "grad_norm": 1.4174012456939833, + "learning_rate": 4.3959475318736885e-06, + "loss": 0.4140232801437378, + "step": 4002 + }, + { + "epoch": 1.4105726872246696, + "grad_norm": 1.836512159334361, + "learning_rate": 4.391122954232883e-06, + "loss": 0.5065237879753113, + "step": 4003 + }, + { + "epoch": 1.4109251101321587, + "grad_norm": 1.458932644295331, + "learning_rate": 4.386300280631279e-06, + "loss": 0.4817734658718109, + "step": 4004 + }, + { + "epoch": 1.4112775330396476, + "grad_norm": 1.6662288245729417, + "learning_rate": 4.381479512706025e-06, + "loss": 0.6339706778526306, + "step": 4005 + }, + { + "epoch": 1.4116299559471366, + "grad_norm": 2.1459595089971653, + "learning_rate": 4.376660652093621e-06, + "loss": 0.6581720113754272, + "step": 4006 + }, + { + "epoch": 1.4119823788546255, + "grad_norm": 2.1052256395432503, + "learning_rate": 4.3718437004299174e-06, + "loss": 0.722156286239624, + "step": 4007 + }, + { + "epoch": 1.4123348017621145, + "grad_norm": 2.007137048045836, + "learning_rate": 4.36702865935013e-06, + "loss": 0.5262913703918457, + "step": 4008 + }, + { + "epoch": 1.4126872246696034, + "grad_norm": 1.6239575731802327, + "learning_rate": 4.362215530488805e-06, + "loss": 0.6242132186889648, + "step": 4009 + }, + { + "epoch": 1.4130396475770926, + "grad_norm": 1.6412038783326008, + "learning_rate": 4.35740431547985e-06, + "loss": 0.48776593804359436, + "step": 4010 + }, + { + "epoch": 1.4133920704845815, + "grad_norm": 1.4539922592281447, + "learning_rate": 4.352595015956528e-06, + "loss": 0.5528746843338013, + "step": 4011 + }, + { + "epoch": 1.4137444933920704, + "grad_norm": 1.881555645901769, + "learning_rate": 4.347787633551444e-06, + "loss": 0.6282942295074463, + "step": 4012 + }, + { + "epoch": 1.4140969162995596, + "grad_norm": 1.997464157113011, + "learning_rate": 4.342982169896555e-06, + "loss": 0.6113284826278687, + "step": 4013 + }, + { + "epoch": 1.4144493392070485, + "grad_norm": 1.696170493669202, + "learning_rate": 4.3381786266231685e-06, + "loss": 0.5756875872612, + "step": 4014 + }, + { + "epoch": 1.4148017621145375, + "grad_norm": 1.8012350757266906, + "learning_rate": 4.333377005361931e-06, + "loss": 0.6180154085159302, + "step": 4015 + }, + { + "epoch": 1.4151541850220264, + "grad_norm": 2.2454634074572146, + "learning_rate": 4.328577307742855e-06, + "loss": 0.5728827118873596, + "step": 4016 + }, + { + "epoch": 1.4155066079295153, + "grad_norm": 1.7928891595746113, + "learning_rate": 4.323779535395278e-06, + "loss": 0.5248062014579773, + "step": 4017 + }, + { + "epoch": 1.4158590308370045, + "grad_norm": 1.7454680737255013, + "learning_rate": 4.318983689947895e-06, + "loss": 0.5938228368759155, + "step": 4018 + }, + { + "epoch": 1.4162114537444934, + "grad_norm": 1.8931460456480809, + "learning_rate": 4.3141897730287544e-06, + "loss": 0.7085045576095581, + "step": 4019 + }, + { + "epoch": 1.4165638766519824, + "grad_norm": 2.566425134177144, + "learning_rate": 4.309397786265235e-06, + "loss": 0.599969744682312, + "step": 4020 + }, + { + "epoch": 1.4169162995594713, + "grad_norm": 2.186511304730039, + "learning_rate": 4.30460773128407e-06, + "loss": 0.5784738063812256, + "step": 4021 + }, + { + "epoch": 1.4172687224669605, + "grad_norm": 1.8802349185240168, + "learning_rate": 4.299819609711332e-06, + "loss": 0.6492793560028076, + "step": 4022 + }, + { + "epoch": 1.4176211453744494, + "grad_norm": 1.6886854891683005, + "learning_rate": 4.2950334231724375e-06, + "loss": 0.6690749526023865, + "step": 4023 + }, + { + "epoch": 1.4179735682819383, + "grad_norm": 1.8482135160791267, + "learning_rate": 4.290249173292158e-06, + "loss": 0.5919139981269836, + "step": 4024 + }, + { + "epoch": 1.4183259911894273, + "grad_norm": 1.6202611135629348, + "learning_rate": 4.285466861694583e-06, + "loss": 0.5661630630493164, + "step": 4025 + }, + { + "epoch": 1.4186784140969162, + "grad_norm": 1.7328062744712673, + "learning_rate": 4.280686490003169e-06, + "loss": 0.547730565071106, + "step": 4026 + }, + { + "epoch": 1.4190308370044054, + "grad_norm": 1.7270546788274348, + "learning_rate": 4.2759080598406985e-06, + "loss": 0.6150445938110352, + "step": 4027 + }, + { + "epoch": 1.4193832599118943, + "grad_norm": 2.048539568947664, + "learning_rate": 4.271131572829303e-06, + "loss": 0.6522917747497559, + "step": 4028 + }, + { + "epoch": 1.4197356828193832, + "grad_norm": 1.952118534937186, + "learning_rate": 4.266357030590449e-06, + "loss": 0.8456230163574219, + "step": 4029 + }, + { + "epoch": 1.4200881057268724, + "grad_norm": 1.810792149813479, + "learning_rate": 4.261584434744945e-06, + "loss": 0.6059526801109314, + "step": 4030 + }, + { + "epoch": 1.4204405286343613, + "grad_norm": 1.8213808222910857, + "learning_rate": 4.256813786912937e-06, + "loss": 0.6289907693862915, + "step": 4031 + }, + { + "epoch": 1.4207929515418503, + "grad_norm": 1.5510911353998291, + "learning_rate": 4.252045088713919e-06, + "loss": 0.48954638838768005, + "step": 4032 + }, + { + "epoch": 1.4211453744493392, + "grad_norm": 2.020061779490103, + "learning_rate": 4.2472783417667055e-06, + "loss": 0.6999461054801941, + "step": 4033 + }, + { + "epoch": 1.4214977973568281, + "grad_norm": 1.9629261898681878, + "learning_rate": 4.242513547689466e-06, + "loss": 0.5610899925231934, + "step": 4034 + }, + { + "epoch": 1.421850220264317, + "grad_norm": 1.8415242379631616, + "learning_rate": 4.237750708099697e-06, + "loss": 0.6240172386169434, + "step": 4035 + }, + { + "epoch": 1.4222026431718062, + "grad_norm": 1.887889822972652, + "learning_rate": 4.2329898246142356e-06, + "loss": 0.6368240118026733, + "step": 4036 + }, + { + "epoch": 1.4225550660792952, + "grad_norm": 2.0839652521207483, + "learning_rate": 4.228230898849253e-06, + "loss": 0.6242600679397583, + "step": 4037 + }, + { + "epoch": 1.422907488986784, + "grad_norm": 1.7622749957844728, + "learning_rate": 4.223473932420255e-06, + "loss": 0.6446138620376587, + "step": 4038 + }, + { + "epoch": 1.4232599118942733, + "grad_norm": 1.8800444061446486, + "learning_rate": 4.218718926942081e-06, + "loss": 0.7108229398727417, + "step": 4039 + }, + { + "epoch": 1.4236123348017622, + "grad_norm": 1.7917659431298882, + "learning_rate": 4.213965884028919e-06, + "loss": 0.5279660820960999, + "step": 4040 + }, + { + "epoch": 1.4239647577092511, + "grad_norm": 1.7747691200912903, + "learning_rate": 4.209214805294264e-06, + "loss": 0.6422853469848633, + "step": 4041 + }, + { + "epoch": 1.42431718061674, + "grad_norm": 1.848339978168105, + "learning_rate": 4.2044656923509704e-06, + "loss": 0.6355602741241455, + "step": 4042 + }, + { + "epoch": 1.424669603524229, + "grad_norm": 1.7787421175687093, + "learning_rate": 4.19971854681121e-06, + "loss": 0.5351370573043823, + "step": 4043 + }, + { + "epoch": 1.425022026431718, + "grad_norm": 2.0300248809256987, + "learning_rate": 4.194973370286492e-06, + "loss": 0.5573978424072266, + "step": 4044 + }, + { + "epoch": 1.425374449339207, + "grad_norm": 1.9433750628346875, + "learning_rate": 4.1902301643876555e-06, + "loss": 0.5865412950515747, + "step": 4045 + }, + { + "epoch": 1.425726872246696, + "grad_norm": 2.102324249123369, + "learning_rate": 4.185488930724868e-06, + "loss": 0.6231919527053833, + "step": 4046 + }, + { + "epoch": 1.426079295154185, + "grad_norm": 1.7135783989067233, + "learning_rate": 4.180749670907638e-06, + "loss": 0.48964112997055054, + "step": 4047 + }, + { + "epoch": 1.4264317180616741, + "grad_norm": 2.0973459527664686, + "learning_rate": 4.176012386544796e-06, + "loss": 0.6299121975898743, + "step": 4048 + }, + { + "epoch": 1.426784140969163, + "grad_norm": 1.7239115182277114, + "learning_rate": 4.171277079244492e-06, + "loss": 0.5612789392471313, + "step": 4049 + }, + { + "epoch": 1.427136563876652, + "grad_norm": 1.7396324571675468, + "learning_rate": 4.166543750614227e-06, + "loss": 0.4315321147441864, + "step": 4050 + }, + { + "epoch": 1.427488986784141, + "grad_norm": 2.0031203112343094, + "learning_rate": 4.1618124022608146e-06, + "loss": 0.6300851702690125, + "step": 4051 + }, + { + "epoch": 1.4278414096916299, + "grad_norm": 1.7808675185736187, + "learning_rate": 4.1570830357904e-06, + "loss": 0.6258795261383057, + "step": 4052 + }, + { + "epoch": 1.4281938325991188, + "grad_norm": 1.9069027085637078, + "learning_rate": 4.152355652808457e-06, + "loss": 0.7364479303359985, + "step": 4053 + }, + { + "epoch": 1.428546255506608, + "grad_norm": 1.8474323145969993, + "learning_rate": 4.147630254919781e-06, + "loss": 0.44845038652420044, + "step": 4054 + }, + { + "epoch": 1.428898678414097, + "grad_norm": 1.6823890398766386, + "learning_rate": 4.142906843728504e-06, + "loss": 0.516815185546875, + "step": 4055 + }, + { + "epoch": 1.4292511013215858, + "grad_norm": 1.6276914964492604, + "learning_rate": 4.138185420838079e-06, + "loss": 0.6296960711479187, + "step": 4056 + }, + { + "epoch": 1.429603524229075, + "grad_norm": 1.728227730408027, + "learning_rate": 4.133465987851269e-06, + "loss": 0.5709103345870972, + "step": 4057 + }, + { + "epoch": 1.429955947136564, + "grad_norm": 1.7709951139170081, + "learning_rate": 4.128748546370186e-06, + "loss": 0.5672547817230225, + "step": 4058 + }, + { + "epoch": 1.4303083700440529, + "grad_norm": 1.8161591736426752, + "learning_rate": 4.124033097996252e-06, + "loss": 0.5927014350891113, + "step": 4059 + }, + { + "epoch": 1.4306607929515418, + "grad_norm": 1.75056683772296, + "learning_rate": 4.119319644330214e-06, + "loss": 0.7021238803863525, + "step": 4060 + }, + { + "epoch": 1.4310132158590307, + "grad_norm": 1.7949933259038664, + "learning_rate": 4.114608186972143e-06, + "loss": 0.5940784215927124, + "step": 4061 + }, + { + "epoch": 1.43136563876652, + "grad_norm": 1.7958424742287702, + "learning_rate": 4.109898727521429e-06, + "loss": 0.46511122584342957, + "step": 4062 + }, + { + "epoch": 1.4317180616740088, + "grad_norm": 1.7489789285307085, + "learning_rate": 4.105191267576797e-06, + "loss": 0.4710976481437683, + "step": 4063 + }, + { + "epoch": 1.4320704845814978, + "grad_norm": 1.650142742870973, + "learning_rate": 4.100485808736273e-06, + "loss": 0.5947977900505066, + "step": 4064 + }, + { + "epoch": 1.4324229074889867, + "grad_norm": 1.7620222249444284, + "learning_rate": 4.095782352597214e-06, + "loss": 0.6312115788459778, + "step": 4065 + }, + { + "epoch": 1.4327753303964759, + "grad_norm": 1.7689711305484843, + "learning_rate": 4.091080900756303e-06, + "loss": 0.5709977149963379, + "step": 4066 + }, + { + "epoch": 1.4331277533039648, + "grad_norm": 1.8903042666510779, + "learning_rate": 4.086381454809535e-06, + "loss": 0.6183716058731079, + "step": 4067 + }, + { + "epoch": 1.4334801762114537, + "grad_norm": 1.8677159370638265, + "learning_rate": 4.081684016352223e-06, + "loss": 0.5139745473861694, + "step": 4068 + }, + { + "epoch": 1.4338325991189427, + "grad_norm": 1.9141879794373917, + "learning_rate": 4.076988586979004e-06, + "loss": 0.6611173152923584, + "step": 4069 + }, + { + "epoch": 1.4341850220264316, + "grad_norm": 1.7276457807578136, + "learning_rate": 4.072295168283824e-06, + "loss": 0.616943359375, + "step": 4070 + }, + { + "epoch": 1.4345374449339208, + "grad_norm": 2.331740237042665, + "learning_rate": 4.067603761859965e-06, + "loss": 0.5388625264167786, + "step": 4071 + }, + { + "epoch": 1.4348898678414097, + "grad_norm": 1.9571975377572324, + "learning_rate": 4.062914369300002e-06, + "loss": 0.5523884892463684, + "step": 4072 + }, + { + "epoch": 1.4352422907488986, + "grad_norm": 1.8860165198416616, + "learning_rate": 4.058226992195838e-06, + "loss": 0.5610285997390747, + "step": 4073 + }, + { + "epoch": 1.4355947136563878, + "grad_norm": 1.8522832262316333, + "learning_rate": 4.0535416321387e-06, + "loss": 0.583917498588562, + "step": 4074 + }, + { + "epoch": 1.4359471365638767, + "grad_norm": 1.677482186323321, + "learning_rate": 4.048858290719115e-06, + "loss": 0.6025276184082031, + "step": 4075 + }, + { + "epoch": 1.4362995594713657, + "grad_norm": 1.8037188167117204, + "learning_rate": 4.044176969526936e-06, + "loss": 0.5643888711929321, + "step": 4076 + }, + { + "epoch": 1.4366519823788546, + "grad_norm": 1.709713655992042, + "learning_rate": 4.0394976701513235e-06, + "loss": 0.550167977809906, + "step": 4077 + }, + { + "epoch": 1.4370044052863435, + "grad_norm": 2.1319034629476747, + "learning_rate": 4.034820394180749e-06, + "loss": 0.6182876825332642, + "step": 4078 + }, + { + "epoch": 1.4373568281938325, + "grad_norm": 2.018408244379198, + "learning_rate": 4.030145143203016e-06, + "loss": 0.5197434425354004, + "step": 4079 + }, + { + "epoch": 1.4377092511013216, + "grad_norm": 2.037308833831004, + "learning_rate": 4.025471918805214e-06, + "loss": 0.5351034998893738, + "step": 4080 + }, + { + "epoch": 1.4380616740088106, + "grad_norm": 1.988655670021041, + "learning_rate": 4.020800722573758e-06, + "loss": 0.5576729774475098, + "step": 4081 + }, + { + "epoch": 1.4384140969162995, + "grad_norm": 2.03830396836609, + "learning_rate": 4.016131556094381e-06, + "loss": 0.5557611584663391, + "step": 4082 + }, + { + "epoch": 1.4387665198237887, + "grad_norm": 1.6841558782049018, + "learning_rate": 4.011464420952115e-06, + "loss": 0.5300010442733765, + "step": 4083 + }, + { + "epoch": 1.4391189427312776, + "grad_norm": 2.5196291624747387, + "learning_rate": 4.0067993187313085e-06, + "loss": 0.5254991054534912, + "step": 4084 + }, + { + "epoch": 1.4394713656387665, + "grad_norm": 1.9569129587138865, + "learning_rate": 4.002136251015617e-06, + "loss": 0.5044848322868347, + "step": 4085 + }, + { + "epoch": 1.4398237885462555, + "grad_norm": 1.7587820286029368, + "learning_rate": 3.997475219388005e-06, + "loss": 0.6422302722930908, + "step": 4086 + }, + { + "epoch": 1.4401762114537444, + "grad_norm": 1.7785161433093049, + "learning_rate": 3.992816225430758e-06, + "loss": 0.5502497553825378, + "step": 4087 + }, + { + "epoch": 1.4405286343612334, + "grad_norm": 1.9272648866171629, + "learning_rate": 3.988159270725448e-06, + "loss": 0.7479537129402161, + "step": 4088 + }, + { + "epoch": 1.4408810572687225, + "grad_norm": 1.8882665464741835, + "learning_rate": 3.983504356852967e-06, + "loss": 0.5418091416358948, + "step": 4089 + }, + { + "epoch": 1.4412334801762114, + "grad_norm": 2.1909054908738805, + "learning_rate": 3.978851485393519e-06, + "loss": 0.5262568593025208, + "step": 4090 + }, + { + "epoch": 1.4415859030837004, + "grad_norm": 1.7855475608149034, + "learning_rate": 3.974200657926607e-06, + "loss": 0.5419692397117615, + "step": 4091 + }, + { + "epoch": 1.4419383259911895, + "grad_norm": 1.84908442821801, + "learning_rate": 3.9695518760310425e-06, + "loss": 0.5202164649963379, + "step": 4092 + }, + { + "epoch": 1.4422907488986785, + "grad_norm": 1.6256093479781946, + "learning_rate": 3.96490514128494e-06, + "loss": 0.7232608795166016, + "step": 4093 + }, + { + "epoch": 1.4426431718061674, + "grad_norm": 3.2107784732452473, + "learning_rate": 3.960260455265721e-06, + "loss": 0.5899156332015991, + "step": 4094 + }, + { + "epoch": 1.4429955947136563, + "grad_norm": 1.9995831956411032, + "learning_rate": 3.95561781955012e-06, + "loss": 0.629068911075592, + "step": 4095 + }, + { + "epoch": 1.4433480176211453, + "grad_norm": 1.9520751138167456, + "learning_rate": 3.950977235714154e-06, + "loss": 0.5584920644760132, + "step": 4096 + }, + { + "epoch": 1.4437004405286344, + "grad_norm": 1.7280125643736322, + "learning_rate": 3.9463387053331685e-06, + "loss": 0.713936984539032, + "step": 4097 + }, + { + "epoch": 1.4440528634361234, + "grad_norm": 2.7226452019662357, + "learning_rate": 3.9417022299817944e-06, + "loss": 0.7157076001167297, + "step": 4098 + }, + { + "epoch": 1.4444052863436123, + "grad_norm": 1.940369638230087, + "learning_rate": 3.937067811233972e-06, + "loss": 0.6540844440460205, + "step": 4099 + }, + { + "epoch": 1.4447577092511013, + "grad_norm": 1.6342043838390767, + "learning_rate": 3.9324354506629425e-06, + "loss": 0.5350022315979004, + "step": 4100 + }, + { + "epoch": 1.4451101321585904, + "grad_norm": 1.9186113150470587, + "learning_rate": 3.9278051498412475e-06, + "loss": 0.6852695941925049, + "step": 4101 + }, + { + "epoch": 1.4454625550660793, + "grad_norm": 1.8060312138879744, + "learning_rate": 3.923176910340728e-06, + "loss": 0.6059536337852478, + "step": 4102 + }, + { + "epoch": 1.4458149779735683, + "grad_norm": 1.6721278909458728, + "learning_rate": 3.918550733732536e-06, + "loss": 0.5787979364395142, + "step": 4103 + }, + { + "epoch": 1.4461674008810572, + "grad_norm": 1.8059605647431092, + "learning_rate": 3.9139266215871e-06, + "loss": 0.6068835258483887, + "step": 4104 + }, + { + "epoch": 1.4465198237885462, + "grad_norm": 1.7804694224195132, + "learning_rate": 3.909304575474175e-06, + "loss": 0.5123663544654846, + "step": 4105 + }, + { + "epoch": 1.4468722466960353, + "grad_norm": 1.832785857954117, + "learning_rate": 3.9046845969627975e-06, + "loss": 0.6285420656204224, + "step": 4106 + }, + { + "epoch": 1.4472246696035242, + "grad_norm": 1.8029701329975896, + "learning_rate": 3.9000666876213056e-06, + "loss": 0.6186035871505737, + "step": 4107 + }, + { + "epoch": 1.4475770925110132, + "grad_norm": 2.8121411727628174, + "learning_rate": 3.895450849017336e-06, + "loss": 0.6222661733627319, + "step": 4108 + }, + { + "epoch": 1.4479295154185021, + "grad_norm": 1.7965214044078308, + "learning_rate": 3.890837082717822e-06, + "loss": 0.5889515280723572, + "step": 4109 + }, + { + "epoch": 1.4482819383259913, + "grad_norm": 1.8839124618745182, + "learning_rate": 3.8862253902889925e-06, + "loss": 0.6160309314727783, + "step": 4110 + }, + { + "epoch": 1.4486343612334802, + "grad_norm": 1.7651875927016676, + "learning_rate": 3.881615773296381e-06, + "loss": 0.48093074560165405, + "step": 4111 + }, + { + "epoch": 1.4489867841409692, + "grad_norm": 1.8283039880345147, + "learning_rate": 3.877008233304796e-06, + "loss": 0.5851131677627563, + "step": 4112 + }, + { + "epoch": 1.449339207048458, + "grad_norm": 1.7366010221761805, + "learning_rate": 3.872402771878365e-06, + "loss": 0.5322093963623047, + "step": 4113 + }, + { + "epoch": 1.449691629955947, + "grad_norm": 1.7342840660368584, + "learning_rate": 3.8677993905804956e-06, + "loss": 0.652804970741272, + "step": 4114 + }, + { + "epoch": 1.4500440528634362, + "grad_norm": 1.9583669696557284, + "learning_rate": 3.863198090973891e-06, + "loss": 0.5494996309280396, + "step": 4115 + }, + { + "epoch": 1.4503964757709251, + "grad_norm": 1.910811405312081, + "learning_rate": 3.8585988746205505e-06, + "loss": 0.5641331672668457, + "step": 4116 + }, + { + "epoch": 1.450748898678414, + "grad_norm": 1.7616537450177998, + "learning_rate": 3.854001743081764e-06, + "loss": 0.5415998697280884, + "step": 4117 + }, + { + "epoch": 1.4511013215859032, + "grad_norm": 1.599490372210091, + "learning_rate": 3.849406697918113e-06, + "loss": 0.4399813711643219, + "step": 4118 + }, + { + "epoch": 1.4514537444933922, + "grad_norm": 2.0642862733318115, + "learning_rate": 3.84481374068948e-06, + "loss": 0.6228655576705933, + "step": 4119 + }, + { + "epoch": 1.451806167400881, + "grad_norm": 1.650547077673145, + "learning_rate": 3.8402228729550195e-06, + "loss": 0.5575108528137207, + "step": 4120 + }, + { + "epoch": 1.45215859030837, + "grad_norm": 2.4780057667058704, + "learning_rate": 3.835634096273197e-06, + "loss": 0.5705434679985046, + "step": 4121 + }, + { + "epoch": 1.452511013215859, + "grad_norm": 2.1620556917486184, + "learning_rate": 3.831047412201758e-06, + "loss": 0.5649456977844238, + "step": 4122 + }, + { + "epoch": 1.452863436123348, + "grad_norm": 1.9734169166383557, + "learning_rate": 3.826462822297736e-06, + "loss": 0.5656554698944092, + "step": 4123 + }, + { + "epoch": 1.453215859030837, + "grad_norm": 1.8883507101257415, + "learning_rate": 3.82188032811746e-06, + "loss": 0.6565591096878052, + "step": 4124 + }, + { + "epoch": 1.453568281938326, + "grad_norm": 1.9823635297408013, + "learning_rate": 3.817299931216537e-06, + "loss": 0.6553423404693604, + "step": 4125 + }, + { + "epoch": 1.453920704845815, + "grad_norm": 1.8362785094722764, + "learning_rate": 3.812721633149883e-06, + "loss": 0.5401671528816223, + "step": 4126 + }, + { + "epoch": 1.454273127753304, + "grad_norm": 2.008049720412482, + "learning_rate": 3.808145435471674e-06, + "loss": 0.7275381088256836, + "step": 4127 + }, + { + "epoch": 1.454625550660793, + "grad_norm": 1.827455905179675, + "learning_rate": 3.80357133973539e-06, + "loss": 0.6384310722351074, + "step": 4128 + }, + { + "epoch": 1.454977973568282, + "grad_norm": 1.986935058055083, + "learning_rate": 3.7989993474937993e-06, + "loss": 0.7783250212669373, + "step": 4129 + }, + { + "epoch": 1.455330396475771, + "grad_norm": 2.1923612655628624, + "learning_rate": 3.7944294602989473e-06, + "loss": 0.752954363822937, + "step": 4130 + }, + { + "epoch": 1.4556828193832598, + "grad_norm": 1.801491937261316, + "learning_rate": 3.789861679702169e-06, + "loss": 0.6099411249160767, + "step": 4131 + }, + { + "epoch": 1.4560352422907488, + "grad_norm": 2.12230143233965, + "learning_rate": 3.7852960072540845e-06, + "loss": 0.6608012914657593, + "step": 4132 + }, + { + "epoch": 1.456387665198238, + "grad_norm": 1.836228759881875, + "learning_rate": 3.7807324445045924e-06, + "loss": 0.5119853615760803, + "step": 4133 + }, + { + "epoch": 1.4567400881057269, + "grad_norm": 2.036719543857632, + "learning_rate": 3.7761709930028923e-06, + "loss": 0.6353520750999451, + "step": 4134 + }, + { + "epoch": 1.4570925110132158, + "grad_norm": 1.9234147822597618, + "learning_rate": 3.7716116542974434e-06, + "loss": 0.6427614688873291, + "step": 4135 + }, + { + "epoch": 1.457444933920705, + "grad_norm": 2.34139645382815, + "learning_rate": 3.7670544299360003e-06, + "loss": 0.6205203533172607, + "step": 4136 + }, + { + "epoch": 1.457797356828194, + "grad_norm": 1.940401751978381, + "learning_rate": 3.7624993214656046e-06, + "loss": 0.5957762002944946, + "step": 4137 + }, + { + "epoch": 1.4581497797356828, + "grad_norm": 1.8842452122457418, + "learning_rate": 3.7579463304325714e-06, + "loss": 0.6698043346405029, + "step": 4138 + }, + { + "epoch": 1.4585022026431718, + "grad_norm": 1.825534553754035, + "learning_rate": 3.7533954583824982e-06, + "loss": 0.5947796106338501, + "step": 4139 + }, + { + "epoch": 1.4588546255506607, + "grad_norm": 1.7817965501913557, + "learning_rate": 3.7488467068602664e-06, + "loss": 0.5905463695526123, + "step": 4140 + }, + { + "epoch": 1.4592070484581499, + "grad_norm": 1.8530726474927524, + "learning_rate": 3.74430007741003e-06, + "loss": 0.6218722462654114, + "step": 4141 + }, + { + "epoch": 1.4595594713656388, + "grad_norm": 1.9872212615104103, + "learning_rate": 3.739755571575241e-06, + "loss": 0.6124013066291809, + "step": 4142 + }, + { + "epoch": 1.4599118942731277, + "grad_norm": 1.8897226451904012, + "learning_rate": 3.7352131908986046e-06, + "loss": 0.5816842317581177, + "step": 4143 + }, + { + "epoch": 1.4602643171806167, + "grad_norm": 1.780742815029414, + "learning_rate": 3.7306729369221197e-06, + "loss": 0.5225531458854675, + "step": 4144 + }, + { + "epoch": 1.4606167400881058, + "grad_norm": 1.5899946748394236, + "learning_rate": 3.7261348111870663e-06, + "loss": 0.4536696672439575, + "step": 4145 + }, + { + "epoch": 1.4609691629955948, + "grad_norm": 1.6530094281559282, + "learning_rate": 3.7215988152339968e-06, + "loss": 0.5777832269668579, + "step": 4146 + }, + { + "epoch": 1.4613215859030837, + "grad_norm": 2.0042576738233993, + "learning_rate": 3.717064950602737e-06, + "loss": 0.5964622497558594, + "step": 4147 + }, + { + "epoch": 1.4616740088105726, + "grad_norm": 1.634683701176406, + "learning_rate": 3.7125332188323937e-06, + "loss": 0.47224390506744385, + "step": 4148 + }, + { + "epoch": 1.4620264317180616, + "grad_norm": 2.0219703130043474, + "learning_rate": 3.708003621461347e-06, + "loss": 0.5989271402359009, + "step": 4149 + }, + { + "epoch": 1.4623788546255507, + "grad_norm": 1.7865027495889427, + "learning_rate": 3.7034761600272627e-06, + "loss": 0.6171919107437134, + "step": 4150 + }, + { + "epoch": 1.4627312775330397, + "grad_norm": 1.742831115289917, + "learning_rate": 3.6989508360670647e-06, + "loss": 0.7064418792724609, + "step": 4151 + }, + { + "epoch": 1.4630837004405286, + "grad_norm": 2.236539087690149, + "learning_rate": 3.6944276511169577e-06, + "loss": 0.6055941581726074, + "step": 4152 + }, + { + "epoch": 1.4634361233480178, + "grad_norm": 1.7433765587507288, + "learning_rate": 3.689906606712429e-06, + "loss": 0.4550645351409912, + "step": 4153 + }, + { + "epoch": 1.4637885462555067, + "grad_norm": 1.929810725161399, + "learning_rate": 3.68538770438823e-06, + "loss": 0.5958502292633057, + "step": 4154 + }, + { + "epoch": 1.4641409691629956, + "grad_norm": 2.057185852502653, + "learning_rate": 3.680870945678389e-06, + "loss": 0.5574297904968262, + "step": 4155 + }, + { + "epoch": 1.4644933920704846, + "grad_norm": 1.443537567568116, + "learning_rate": 3.676356332116202e-06, + "loss": 0.46494683623313904, + "step": 4156 + }, + { + "epoch": 1.4648458149779735, + "grad_norm": 1.7341220293452018, + "learning_rate": 3.671843865234238e-06, + "loss": 0.5549977421760559, + "step": 4157 + }, + { + "epoch": 1.4651982378854624, + "grad_norm": 1.7585158502615206, + "learning_rate": 3.6673335465643488e-06, + "loss": 0.5620779395103455, + "step": 4158 + }, + { + "epoch": 1.4655506607929516, + "grad_norm": 1.845448976603358, + "learning_rate": 3.662825377637638e-06, + "loss": 0.5945389270782471, + "step": 4159 + }, + { + "epoch": 1.4659030837004405, + "grad_norm": 1.9218401758762256, + "learning_rate": 3.6583193599844867e-06, + "loss": 0.6923668384552002, + "step": 4160 + }, + { + "epoch": 1.4662555066079295, + "grad_norm": 2.16834694145402, + "learning_rate": 3.653815495134557e-06, + "loss": 0.6848515868186951, + "step": 4161 + }, + { + "epoch": 1.4666079295154186, + "grad_norm": 1.922504159473904, + "learning_rate": 3.649313784616765e-06, + "loss": 0.640125036239624, + "step": 4162 + }, + { + "epoch": 1.4669603524229076, + "grad_norm": 1.816415927402479, + "learning_rate": 3.6448142299593026e-06, + "loss": 0.6879653930664062, + "step": 4163 + }, + { + "epoch": 1.4673127753303965, + "grad_norm": 1.9534619637738762, + "learning_rate": 3.6403168326896286e-06, + "loss": 0.6757794618606567, + "step": 4164 + }, + { + "epoch": 1.4676651982378854, + "grad_norm": 1.7476054801499117, + "learning_rate": 3.6358215943344664e-06, + "loss": 0.6405826807022095, + "step": 4165 + }, + { + "epoch": 1.4680176211453744, + "grad_norm": 2.1448885390527064, + "learning_rate": 3.6313285164198187e-06, + "loss": 0.692136287689209, + "step": 4166 + }, + { + "epoch": 1.4683700440528633, + "grad_norm": 1.8449983835752888, + "learning_rate": 3.626837600470935e-06, + "loss": 0.6305568218231201, + "step": 4167 + }, + { + "epoch": 1.4687224669603525, + "grad_norm": 2.1026866185280264, + "learning_rate": 3.6223488480123427e-06, + "loss": 0.7040449380874634, + "step": 4168 + }, + { + "epoch": 1.4690748898678414, + "grad_norm": 1.5463095765444386, + "learning_rate": 3.6178622605678403e-06, + "loss": 0.5064427852630615, + "step": 4169 + }, + { + "epoch": 1.4694273127753303, + "grad_norm": 1.6549157120829303, + "learning_rate": 3.6133778396604813e-06, + "loss": 0.46597155928611755, + "step": 4170 + }, + { + "epoch": 1.4697797356828195, + "grad_norm": 1.9774014610728103, + "learning_rate": 3.6088955868125864e-06, + "loss": 0.5764908790588379, + "step": 4171 + }, + { + "epoch": 1.4701321585903084, + "grad_norm": 1.9347279402338318, + "learning_rate": 3.6044155035457405e-06, + "loss": 0.5808656215667725, + "step": 4172 + }, + { + "epoch": 1.4704845814977974, + "grad_norm": 2.0116811142135202, + "learning_rate": 3.599937591380791e-06, + "loss": 0.5439287424087524, + "step": 4173 + }, + { + "epoch": 1.4708370044052863, + "grad_norm": 1.5674669602592264, + "learning_rate": 3.595461851837857e-06, + "loss": 0.5943965911865234, + "step": 4174 + }, + { + "epoch": 1.4711894273127752, + "grad_norm": 1.8847509954427386, + "learning_rate": 3.590988286436302e-06, + "loss": 0.631833016872406, + "step": 4175 + }, + { + "epoch": 1.4715418502202642, + "grad_norm": 1.9232774716266652, + "learning_rate": 3.5865168966947718e-06, + "loss": 0.514176070690155, + "step": 4176 + }, + { + "epoch": 1.4718942731277533, + "grad_norm": 1.7211351925277203, + "learning_rate": 3.582047684131159e-06, + "loss": 0.584772527217865, + "step": 4177 + }, + { + "epoch": 1.4722466960352423, + "grad_norm": 1.7726013207799318, + "learning_rate": 3.5775806502626244e-06, + "loss": 0.5085974931716919, + "step": 4178 + }, + { + "epoch": 1.4725991189427312, + "grad_norm": 2.1422494719737464, + "learning_rate": 3.573115796605584e-06, + "loss": 0.62562495470047, + "step": 4179 + }, + { + "epoch": 1.4729515418502204, + "grad_norm": 1.9507950967896, + "learning_rate": 3.5686531246757206e-06, + "loss": 0.5815824270248413, + "step": 4180 + }, + { + "epoch": 1.4733039647577093, + "grad_norm": 1.8811159721586839, + "learning_rate": 3.5641926359879663e-06, + "loss": 0.6639705300331116, + "step": 4181 + }, + { + "epoch": 1.4736563876651982, + "grad_norm": 1.8978737039698366, + "learning_rate": 3.5597343320565293e-06, + "loss": 0.6265684962272644, + "step": 4182 + }, + { + "epoch": 1.4740088105726872, + "grad_norm": 1.877895350809495, + "learning_rate": 3.5552782143948504e-06, + "loss": 0.6113626956939697, + "step": 4183 + }, + { + "epoch": 1.4743612334801761, + "grad_norm": 1.8492974346484832, + "learning_rate": 3.550824284515655e-06, + "loss": 0.5247244834899902, + "step": 4184 + }, + { + "epoch": 1.4747136563876653, + "grad_norm": 1.871370335191458, + "learning_rate": 3.5463725439309083e-06, + "loss": 0.5524521470069885, + "step": 4185 + }, + { + "epoch": 1.4750660792951542, + "grad_norm": 1.9955136913094453, + "learning_rate": 3.5419229941518384e-06, + "loss": 0.5462251305580139, + "step": 4186 + }, + { + "epoch": 1.4754185022026431, + "grad_norm": 1.6609337480864497, + "learning_rate": 3.5374756366889297e-06, + "loss": 0.6500638723373413, + "step": 4187 + }, + { + "epoch": 1.475770925110132, + "grad_norm": 2.0744035602538586, + "learning_rate": 3.5330304730519216e-06, + "loss": 0.4445904791355133, + "step": 4188 + }, + { + "epoch": 1.4761233480176212, + "grad_norm": 1.7788816335434026, + "learning_rate": 3.5285875047498075e-06, + "loss": 0.6068017482757568, + "step": 4189 + }, + { + "epoch": 1.4764757709251102, + "grad_norm": 1.683605461123042, + "learning_rate": 3.5241467332908384e-06, + "loss": 0.5577334761619568, + "step": 4190 + }, + { + "epoch": 1.4768281938325991, + "grad_norm": 1.9605228698426533, + "learning_rate": 3.5197081601825135e-06, + "loss": 0.6596503257751465, + "step": 4191 + }, + { + "epoch": 1.477180616740088, + "grad_norm": 1.9912955738456768, + "learning_rate": 3.5152717869315965e-06, + "loss": 0.6260303258895874, + "step": 4192 + }, + { + "epoch": 1.477533039647577, + "grad_norm": 2.010278739994815, + "learning_rate": 3.510837615044097e-06, + "loss": 0.5467355251312256, + "step": 4193 + }, + { + "epoch": 1.4778854625550661, + "grad_norm": 2.516516212561754, + "learning_rate": 3.506405646025276e-06, + "loss": 0.5306693911552429, + "step": 4194 + }, + { + "epoch": 1.478237885462555, + "grad_norm": 1.7497505820795882, + "learning_rate": 3.5019758813796513e-06, + "loss": 0.6130725741386414, + "step": 4195 + }, + { + "epoch": 1.478590308370044, + "grad_norm": 2.2199157894914143, + "learning_rate": 3.4975483226109874e-06, + "loss": 0.6656272411346436, + "step": 4196 + }, + { + "epoch": 1.4789427312775332, + "grad_norm": 1.8654097488268417, + "learning_rate": 3.4931229712223047e-06, + "loss": 0.6018439531326294, + "step": 4197 + }, + { + "epoch": 1.479295154185022, + "grad_norm": 2.0982915779378137, + "learning_rate": 3.488699828715871e-06, + "loss": 0.6635257005691528, + "step": 4198 + }, + { + "epoch": 1.479647577092511, + "grad_norm": 1.8412514150393455, + "learning_rate": 3.4842788965932038e-06, + "loss": 0.5760075449943542, + "step": 4199 + }, + { + "epoch": 1.48, + "grad_norm": 1.7009881043074442, + "learning_rate": 3.4798601763550778e-06, + "loss": 0.6951982975006104, + "step": 4200 + }, + { + "epoch": 1.480352422907489, + "grad_norm": 1.880170776358824, + "learning_rate": 3.475443669501508e-06, + "loss": 0.6574405431747437, + "step": 4201 + }, + { + "epoch": 1.4807048458149779, + "grad_norm": 1.8075997179509888, + "learning_rate": 3.4710293775317593e-06, + "loss": 0.5912263989448547, + "step": 4202 + }, + { + "epoch": 1.481057268722467, + "grad_norm": 1.7703606198961421, + "learning_rate": 3.4666173019443485e-06, + "loss": 0.5169661045074463, + "step": 4203 + }, + { + "epoch": 1.481409691629956, + "grad_norm": 1.6923587460137135, + "learning_rate": 3.4622074442370345e-06, + "loss": 0.5707660913467407, + "step": 4204 + }, + { + "epoch": 1.4817621145374449, + "grad_norm": 1.7929036165873167, + "learning_rate": 3.4577998059068354e-06, + "loss": 0.5856584310531616, + "step": 4205 + }, + { + "epoch": 1.482114537444934, + "grad_norm": 2.0144464412272636, + "learning_rate": 3.4533943884499975e-06, + "loss": 0.6306010484695435, + "step": 4206 + }, + { + "epoch": 1.482466960352423, + "grad_norm": 1.9708292107625427, + "learning_rate": 3.4489911933620245e-06, + "loss": 0.6177140474319458, + "step": 4207 + }, + { + "epoch": 1.482819383259912, + "grad_norm": 1.6187910458828605, + "learning_rate": 3.4445902221376694e-06, + "loss": 0.5527759790420532, + "step": 4208 + }, + { + "epoch": 1.4831718061674009, + "grad_norm": 1.792049785406371, + "learning_rate": 3.440191476270922e-06, + "loss": 0.6838431358337402, + "step": 4209 + }, + { + "epoch": 1.4835242290748898, + "grad_norm": 2.0640892173970933, + "learning_rate": 3.4357949572550196e-06, + "loss": 0.4876987636089325, + "step": 4210 + }, + { + "epoch": 1.4838766519823787, + "grad_norm": 1.66358265635652, + "learning_rate": 3.4314006665824427e-06, + "loss": 0.5639296770095825, + "step": 4211 + }, + { + "epoch": 1.4842290748898679, + "grad_norm": 1.9264745517709694, + "learning_rate": 3.427008605744915e-06, + "loss": 0.4189109802246094, + "step": 4212 + }, + { + "epoch": 1.4845814977973568, + "grad_norm": 1.7041726412059042, + "learning_rate": 3.422618776233413e-06, + "loss": 0.6602882146835327, + "step": 4213 + }, + { + "epoch": 1.4849339207048458, + "grad_norm": 2.105857994769297, + "learning_rate": 3.4182311795381373e-06, + "loss": 0.7642478942871094, + "step": 4214 + }, + { + "epoch": 1.485286343612335, + "grad_norm": 1.703090383184888, + "learning_rate": 3.41384581714854e-06, + "loss": 0.5550031065940857, + "step": 4215 + }, + { + "epoch": 1.4856387665198239, + "grad_norm": 1.956165427853548, + "learning_rate": 3.4094626905533223e-06, + "loss": 0.7036092281341553, + "step": 4216 + }, + { + "epoch": 1.4859911894273128, + "grad_norm": 1.9055824872661757, + "learning_rate": 3.4050818012404165e-06, + "loss": 0.693780779838562, + "step": 4217 + }, + { + "epoch": 1.4863436123348017, + "grad_norm": 1.750544621689218, + "learning_rate": 3.4007031506969977e-06, + "loss": 0.6315299868583679, + "step": 4218 + }, + { + "epoch": 1.4866960352422907, + "grad_norm": 2.0036955114247355, + "learning_rate": 3.396326740409481e-06, + "loss": 0.47849225997924805, + "step": 4219 + }, + { + "epoch": 1.4870484581497798, + "grad_norm": 1.9433930854735686, + "learning_rate": 3.3919525718635195e-06, + "loss": 0.6200336217880249, + "step": 4220 + }, + { + "epoch": 1.4874008810572688, + "grad_norm": 1.7540152253976415, + "learning_rate": 3.3875806465440152e-06, + "loss": 0.7594903707504272, + "step": 4221 + }, + { + "epoch": 1.4877533039647577, + "grad_norm": 1.8336468945254887, + "learning_rate": 3.383210965935093e-06, + "loss": 0.47159409523010254, + "step": 4222 + }, + { + "epoch": 1.4881057268722466, + "grad_norm": 1.6169320059495966, + "learning_rate": 3.3788435315201216e-06, + "loss": 0.5272495150566101, + "step": 4223 + }, + { + "epoch": 1.4884581497797358, + "grad_norm": 1.8268891947791475, + "learning_rate": 3.3744783447817177e-06, + "loss": 0.43847334384918213, + "step": 4224 + }, + { + "epoch": 1.4888105726872247, + "grad_norm": 1.7778298915864024, + "learning_rate": 3.370115407201724e-06, + "loss": 0.656914234161377, + "step": 4225 + }, + { + "epoch": 1.4891629955947137, + "grad_norm": 1.9329427480605288, + "learning_rate": 3.3657547202612128e-06, + "loss": 0.6379527449607849, + "step": 4226 + }, + { + "epoch": 1.4895154185022026, + "grad_norm": 2.0864597290427103, + "learning_rate": 3.3613962854405136e-06, + "loss": 0.6254120469093323, + "step": 4227 + }, + { + "epoch": 1.4898678414096915, + "grad_norm": 1.9356514762449182, + "learning_rate": 3.35704010421917e-06, + "loss": 0.6567566990852356, + "step": 4228 + }, + { + "epoch": 1.4902202643171807, + "grad_norm": 1.7537675986626187, + "learning_rate": 3.352686178075981e-06, + "loss": 0.5121499300003052, + "step": 4229 + }, + { + "epoch": 1.4905726872246696, + "grad_norm": 1.5674669685831402, + "learning_rate": 3.3483345084889595e-06, + "loss": 0.5727466344833374, + "step": 4230 + }, + { + "epoch": 1.4909251101321586, + "grad_norm": 2.005473410378335, + "learning_rate": 3.3439850969353614e-06, + "loss": 0.6013318300247192, + "step": 4231 + }, + { + "epoch": 1.4912775330396475, + "grad_norm": 1.8391832358116647, + "learning_rate": 3.3396379448916836e-06, + "loss": 0.6350653767585754, + "step": 4232 + }, + { + "epoch": 1.4916299559471367, + "grad_norm": 1.8543607360516363, + "learning_rate": 3.335293053833645e-06, + "loss": 0.7072123885154724, + "step": 4233 + }, + { + "epoch": 1.4919823788546256, + "grad_norm": 1.7154205163032374, + "learning_rate": 3.330950425236201e-06, + "loss": 0.6208378076553345, + "step": 4234 + }, + { + "epoch": 1.4923348017621145, + "grad_norm": 2.0942315596519667, + "learning_rate": 3.3266100605735397e-06, + "loss": 0.656146764755249, + "step": 4235 + }, + { + "epoch": 1.4926872246696035, + "grad_norm": 1.953931788636606, + "learning_rate": 3.322271961319076e-06, + "loss": 0.6145347952842712, + "step": 4236 + }, + { + "epoch": 1.4930396475770924, + "grad_norm": 1.7528969029549952, + "learning_rate": 3.3179361289454694e-06, + "loss": 0.5876312255859375, + "step": 4237 + }, + { + "epoch": 1.4933920704845816, + "grad_norm": 1.601290490679199, + "learning_rate": 3.3136025649245897e-06, + "loss": 0.48365384340286255, + "step": 4238 + }, + { + "epoch": 1.4937444933920705, + "grad_norm": 2.189978154300805, + "learning_rate": 3.3092712707275467e-06, + "loss": 0.566576361656189, + "step": 4239 + }, + { + "epoch": 1.4940969162995594, + "grad_norm": 1.9878421762040837, + "learning_rate": 3.3049422478246886e-06, + "loss": 0.6982032060623169, + "step": 4240 + }, + { + "epoch": 1.4944493392070486, + "grad_norm": 1.9039317781349454, + "learning_rate": 3.3006154976855787e-06, + "loss": 0.5802686214447021, + "step": 4241 + }, + { + "epoch": 1.4948017621145375, + "grad_norm": 1.8292762393050834, + "learning_rate": 3.296291021779016e-06, + "loss": 0.6656724214553833, + "step": 4242 + }, + { + "epoch": 1.4951541850220265, + "grad_norm": 1.8194685949700777, + "learning_rate": 3.2919688215730227e-06, + "loss": 0.5081402063369751, + "step": 4243 + }, + { + "epoch": 1.4955066079295154, + "grad_norm": 1.858251792062496, + "learning_rate": 3.28764889853485e-06, + "loss": 0.6963785290718079, + "step": 4244 + }, + { + "epoch": 1.4958590308370043, + "grad_norm": 1.6076782907375928, + "learning_rate": 3.283331254130987e-06, + "loss": 0.4953869581222534, + "step": 4245 + }, + { + "epoch": 1.4962114537444933, + "grad_norm": 1.7009256372822803, + "learning_rate": 3.2790158898271283e-06, + "loss": 0.5495179295539856, + "step": 4246 + }, + { + "epoch": 1.4965638766519824, + "grad_norm": 1.5282320768079813, + "learning_rate": 3.274702807088208e-06, + "loss": 0.6238610148429871, + "step": 4247 + }, + { + "epoch": 1.4969162995594714, + "grad_norm": 1.7145305152154042, + "learning_rate": 3.270392007378389e-06, + "loss": 0.557083249092102, + "step": 4248 + }, + { + "epoch": 1.4972687224669603, + "grad_norm": 1.8458971793579602, + "learning_rate": 3.2660834921610495e-06, + "loss": 0.6317561864852905, + "step": 4249 + }, + { + "epoch": 1.4976211453744495, + "grad_norm": 2.025909664851984, + "learning_rate": 3.2617772628987974e-06, + "loss": 0.5957529544830322, + "step": 4250 + }, + { + "epoch": 1.4979735682819384, + "grad_norm": 1.8950835934769208, + "learning_rate": 3.2574733210534637e-06, + "loss": 0.7661205530166626, + "step": 4251 + }, + { + "epoch": 1.4983259911894273, + "grad_norm": 1.6920068443310292, + "learning_rate": 3.2531716680861024e-06, + "loss": 0.5616782903671265, + "step": 4252 + }, + { + "epoch": 1.4986784140969163, + "grad_norm": 1.770897830706882, + "learning_rate": 3.2488723054569905e-06, + "loss": 0.5679990649223328, + "step": 4253 + }, + { + "epoch": 1.4990308370044052, + "grad_norm": 1.8387315322882807, + "learning_rate": 3.2445752346256244e-06, + "loss": 0.6355923414230347, + "step": 4254 + }, + { + "epoch": 1.4993832599118941, + "grad_norm": 1.5734033631685667, + "learning_rate": 3.2402804570507316e-06, + "loss": 0.5050745010375977, + "step": 4255 + }, + { + "epoch": 1.4997356828193833, + "grad_norm": 1.9535070131295427, + "learning_rate": 3.2359879741902557e-06, + "loss": 0.6585286855697632, + "step": 4256 + }, + { + "epoch": 1.5000881057268722, + "grad_norm": 1.9369843836982625, + "learning_rate": 3.2316977875013567e-06, + "loss": 0.5108245611190796, + "step": 4257 + }, + { + "epoch": 1.5004405286343614, + "grad_norm": 1.7460361732263896, + "learning_rate": 3.2274098984404223e-06, + "loss": 0.5270702838897705, + "step": 4258 + }, + { + "epoch": 1.5007929515418503, + "grad_norm": 2.09582870313145, + "learning_rate": 3.223124308463057e-06, + "loss": 0.6421051025390625, + "step": 4259 + }, + { + "epoch": 1.5011453744493393, + "grad_norm": 2.0173715825527454, + "learning_rate": 3.218841019024084e-06, + "loss": 0.6040945053100586, + "step": 4260 + }, + { + "epoch": 1.5014977973568282, + "grad_norm": 3.5488261180155023, + "learning_rate": 3.214560031577548e-06, + "loss": 0.6389988660812378, + "step": 4261 + }, + { + "epoch": 1.5018502202643171, + "grad_norm": 2.1449229280338096, + "learning_rate": 3.210281347576707e-06, + "loss": 0.6474273800849915, + "step": 4262 + }, + { + "epoch": 1.502202643171806, + "grad_norm": 2.0726789637634666, + "learning_rate": 3.206004968474048e-06, + "loss": 0.7020560503005981, + "step": 4263 + }, + { + "epoch": 1.502555066079295, + "grad_norm": 1.7677587583992656, + "learning_rate": 3.2017308957212644e-06, + "loss": 0.574647068977356, + "step": 4264 + }, + { + "epoch": 1.5029074889867842, + "grad_norm": 1.8152121117445819, + "learning_rate": 3.1974591307692724e-06, + "loss": 0.6912944316864014, + "step": 4265 + }, + { + "epoch": 1.503259911894273, + "grad_norm": 1.7825438750387144, + "learning_rate": 3.1931896750682036e-06, + "loss": 0.7738592028617859, + "step": 4266 + }, + { + "epoch": 1.5036123348017623, + "grad_norm": 1.7835054391965142, + "learning_rate": 3.188922530067402e-06, + "loss": 0.6418012380599976, + "step": 4267 + }, + { + "epoch": 1.5039647577092512, + "grad_norm": 2.0481798246782628, + "learning_rate": 3.1846576972154343e-06, + "loss": 0.639055609703064, + "step": 4268 + }, + { + "epoch": 1.5043171806167401, + "grad_norm": 1.8365579809471801, + "learning_rate": 3.1803951779600774e-06, + "loss": 0.5512406229972839, + "step": 4269 + }, + { + "epoch": 1.504669603524229, + "grad_norm": 1.7182877192220278, + "learning_rate": 3.1761349737483194e-06, + "loss": 0.5838354229927063, + "step": 4270 + }, + { + "epoch": 1.505022026431718, + "grad_norm": 1.5090233544437164, + "learning_rate": 3.1718770860263747e-06, + "loss": 0.5903568267822266, + "step": 4271 + }, + { + "epoch": 1.505374449339207, + "grad_norm": 1.761348463041135, + "learning_rate": 3.1676215162396604e-06, + "loss": 0.5610073804855347, + "step": 4272 + }, + { + "epoch": 1.5057268722466959, + "grad_norm": 1.9899291186285208, + "learning_rate": 3.163368265832809e-06, + "loss": 0.6543136835098267, + "step": 4273 + }, + { + "epoch": 1.506079295154185, + "grad_norm": 1.9484911821126696, + "learning_rate": 3.1591173362496686e-06, + "loss": 0.6586440801620483, + "step": 4274 + }, + { + "epoch": 1.506431718061674, + "grad_norm": 1.7389367867721892, + "learning_rate": 3.1548687289332958e-06, + "loss": 0.5360713601112366, + "step": 4275 + }, + { + "epoch": 1.5067841409691631, + "grad_norm": 2.1157677007043243, + "learning_rate": 3.1506224453259615e-06, + "loss": 0.6695356369018555, + "step": 4276 + }, + { + "epoch": 1.507136563876652, + "grad_norm": 1.7594436585853632, + "learning_rate": 3.146378486869146e-06, + "loss": 0.5708016753196716, + "step": 4277 + }, + { + "epoch": 1.507488986784141, + "grad_norm": 1.997964983412431, + "learning_rate": 3.142136855003538e-06, + "loss": 0.5412342548370361, + "step": 4278 + }, + { + "epoch": 1.50784140969163, + "grad_norm": 1.645092688511499, + "learning_rate": 3.1378975511690468e-06, + "loss": 0.5392874479293823, + "step": 4279 + }, + { + "epoch": 1.5081938325991189, + "grad_norm": 2.1591157791946256, + "learning_rate": 3.133660576804781e-06, + "loss": 0.6559237241744995, + "step": 4280 + }, + { + "epoch": 1.5085462555066078, + "grad_norm": 1.6625372644532221, + "learning_rate": 3.1294259333490597e-06, + "loss": 0.49973511695861816, + "step": 4281 + }, + { + "epoch": 1.5088986784140968, + "grad_norm": 1.9292311285357981, + "learning_rate": 3.1251936222394152e-06, + "loss": 0.5458316206932068, + "step": 4282 + }, + { + "epoch": 1.509251101321586, + "grad_norm": 1.7771232071244591, + "learning_rate": 3.120963644912579e-06, + "loss": 0.628986656665802, + "step": 4283 + }, + { + "epoch": 1.5096035242290748, + "grad_norm": 1.956059007614116, + "learning_rate": 3.1167360028045103e-06, + "loss": 0.6234235167503357, + "step": 4284 + }, + { + "epoch": 1.509955947136564, + "grad_norm": 1.832929038299875, + "learning_rate": 3.112510697350348e-06, + "loss": 0.49892476201057434, + "step": 4285 + }, + { + "epoch": 1.510308370044053, + "grad_norm": 1.859590789761001, + "learning_rate": 3.1082877299844562e-06, + "loss": 0.42951709032058716, + "step": 4286 + }, + { + "epoch": 1.5106607929515419, + "grad_norm": 1.9658176092994974, + "learning_rate": 3.1040671021404045e-06, + "loss": 0.6392263770103455, + "step": 4287 + }, + { + "epoch": 1.5110132158590308, + "grad_norm": 1.9240075529588605, + "learning_rate": 3.099848815250964e-06, + "loss": 0.6198933124542236, + "step": 4288 + }, + { + "epoch": 1.5113656387665197, + "grad_norm": 2.6550374581713436, + "learning_rate": 3.0956328707481055e-06, + "loss": 0.7626048922538757, + "step": 4289 + }, + { + "epoch": 1.5117180616740087, + "grad_norm": 2.057470135822257, + "learning_rate": 3.0914192700630175e-06, + "loss": 0.5245747566223145, + "step": 4290 + }, + { + "epoch": 1.5120704845814978, + "grad_norm": 2.016409834872785, + "learning_rate": 3.0872080146260818e-06, + "loss": 0.6788556575775146, + "step": 4291 + }, + { + "epoch": 1.5124229074889868, + "grad_norm": 1.8970717527543317, + "learning_rate": 3.082999105866897e-06, + "loss": 0.6224241852760315, + "step": 4292 + }, + { + "epoch": 1.512775330396476, + "grad_norm": 1.8828342237083628, + "learning_rate": 3.0787925452142477e-06, + "loss": 0.706061840057373, + "step": 4293 + }, + { + "epoch": 1.5131277533039649, + "grad_norm": 1.8530285911040203, + "learning_rate": 3.07458833409613e-06, + "loss": 0.7075262665748596, + "step": 4294 + }, + { + "epoch": 1.5134801762114538, + "grad_norm": 1.8075779914700747, + "learning_rate": 3.0703864739397494e-06, + "loss": 0.4912101626396179, + "step": 4295 + }, + { + "epoch": 1.5138325991189427, + "grad_norm": 1.908543179959353, + "learning_rate": 3.066186966171507e-06, + "loss": 0.6530265808105469, + "step": 4296 + }, + { + "epoch": 1.5141850220264317, + "grad_norm": 2.12821134565194, + "learning_rate": 3.0619898122169946e-06, + "loss": 0.4905887246131897, + "step": 4297 + }, + { + "epoch": 1.5145374449339206, + "grad_norm": 1.626400447189927, + "learning_rate": 3.057795013501025e-06, + "loss": 0.5025225281715393, + "step": 4298 + }, + { + "epoch": 1.5148898678414096, + "grad_norm": 1.945132129374327, + "learning_rate": 3.0536025714475946e-06, + "loss": 0.5769479274749756, + "step": 4299 + }, + { + "epoch": 1.5152422907488987, + "grad_norm": 2.0281621255217526, + "learning_rate": 3.049412487479919e-06, + "loss": 0.6275384426116943, + "step": 4300 + }, + { + "epoch": 1.5155947136563876, + "grad_norm": 1.6860684781531563, + "learning_rate": 3.04522476302039e-06, + "loss": 0.5555096864700317, + "step": 4301 + }, + { + "epoch": 1.5159471365638768, + "grad_norm": 1.9260394424858205, + "learning_rate": 3.0410393994906096e-06, + "loss": 0.5605635643005371, + "step": 4302 + }, + { + "epoch": 1.5162995594713657, + "grad_norm": 2.127824876873509, + "learning_rate": 3.0368563983113864e-06, + "loss": 0.6006621718406677, + "step": 4303 + }, + { + "epoch": 1.5166519823788547, + "grad_norm": 2.012171091410243, + "learning_rate": 3.0326757609027147e-06, + "loss": 0.5288259983062744, + "step": 4304 + }, + { + "epoch": 1.5170044052863436, + "grad_norm": 1.7034257525965926, + "learning_rate": 3.0284974886837903e-06, + "loss": 0.5671676993370056, + "step": 4305 + }, + { + "epoch": 1.5173568281938326, + "grad_norm": 2.496889571382279, + "learning_rate": 3.0243215830730075e-06, + "loss": 0.6072134971618652, + "step": 4306 + }, + { + "epoch": 1.5177092511013215, + "grad_norm": 1.726261889224961, + "learning_rate": 3.020148045487953e-06, + "loss": 0.6010481119155884, + "step": 4307 + }, + { + "epoch": 1.5180616740088104, + "grad_norm": 1.6250908189476003, + "learning_rate": 3.0159768773454225e-06, + "loss": 0.6126751899719238, + "step": 4308 + }, + { + "epoch": 1.5184140969162996, + "grad_norm": 1.6123380534859018, + "learning_rate": 3.011808080061387e-06, + "loss": 0.5408819317817688, + "step": 4309 + }, + { + "epoch": 1.5187665198237885, + "grad_norm": 1.6792977324898095, + "learning_rate": 3.0076416550510255e-06, + "loss": 0.6528562307357788, + "step": 4310 + }, + { + "epoch": 1.5191189427312777, + "grad_norm": 1.6431948485087644, + "learning_rate": 3.003477603728715e-06, + "loss": 0.6355241537094116, + "step": 4311 + }, + { + "epoch": 1.5194713656387666, + "grad_norm": 1.7630338655444058, + "learning_rate": 2.9993159275080174e-06, + "loss": 0.5511878728866577, + "step": 4312 + }, + { + "epoch": 1.5198237885462555, + "grad_norm": 1.9093354982688662, + "learning_rate": 2.9951566278016943e-06, + "loss": 0.5066816806793213, + "step": 4313 + }, + { + "epoch": 1.5201762114537445, + "grad_norm": 1.676344611272679, + "learning_rate": 2.9909997060216966e-06, + "loss": 0.5636533498764038, + "step": 4314 + }, + { + "epoch": 1.5205286343612334, + "grad_norm": 1.8885420705538216, + "learning_rate": 2.9868451635791706e-06, + "loss": 0.49742352962493896, + "step": 4315 + }, + { + "epoch": 1.5208810572687224, + "grad_norm": 2.013877525146858, + "learning_rate": 2.9826930018844533e-06, + "loss": 0.7264617681503296, + "step": 4316 + }, + { + "epoch": 1.5212334801762113, + "grad_norm": 1.8792043539230026, + "learning_rate": 2.978543222347076e-06, + "loss": 0.5342350006103516, + "step": 4317 + }, + { + "epoch": 1.5215859030837005, + "grad_norm": 1.7569176385310192, + "learning_rate": 2.9743958263757554e-06, + "loss": 0.4324883818626404, + "step": 4318 + }, + { + "epoch": 1.5219383259911894, + "grad_norm": 1.8546496052344164, + "learning_rate": 2.970250815378409e-06, + "loss": 0.5867510437965393, + "step": 4319 + }, + { + "epoch": 1.5222907488986785, + "grad_norm": 1.8415605839915816, + "learning_rate": 2.966108190762138e-06, + "loss": 0.7176594734191895, + "step": 4320 + }, + { + "epoch": 1.5226431718061675, + "grad_norm": 1.967906535494615, + "learning_rate": 2.9619679539332337e-06, + "loss": 0.5810995101928711, + "step": 4321 + }, + { + "epoch": 1.5229955947136564, + "grad_norm": 2.478705006420029, + "learning_rate": 2.957830106297177e-06, + "loss": 0.6262675523757935, + "step": 4322 + }, + { + "epoch": 1.5233480176211454, + "grad_norm": 2.1743670559442245, + "learning_rate": 2.9536946492586383e-06, + "loss": 0.7743325233459473, + "step": 4323 + }, + { + "epoch": 1.5237004405286343, + "grad_norm": 1.7129659102014092, + "learning_rate": 2.9495615842214776e-06, + "loss": 0.7706553936004639, + "step": 4324 + }, + { + "epoch": 1.5240528634361232, + "grad_norm": 1.6835245148440698, + "learning_rate": 2.9454309125887405e-06, + "loss": 0.5982425808906555, + "step": 4325 + }, + { + "epoch": 1.5244052863436124, + "grad_norm": 1.8547174799711497, + "learning_rate": 2.9413026357626596e-06, + "loss": 0.5580830574035645, + "step": 4326 + }, + { + "epoch": 1.5247577092511013, + "grad_norm": 1.8954299514318398, + "learning_rate": 2.937176755144662e-06, + "loss": 0.5316063165664673, + "step": 4327 + }, + { + "epoch": 1.5251101321585903, + "grad_norm": 1.7578719545795178, + "learning_rate": 2.9330532721353523e-06, + "loss": 0.574161171913147, + "step": 4328 + }, + { + "epoch": 1.5254625550660794, + "grad_norm": 1.7055567103896054, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.5339558720588684, + "step": 4329 + }, + { + "epoch": 1.5258149779735684, + "grad_norm": 1.7071106155323514, + "learning_rate": 2.9248135045411607e-06, + "loss": 0.594109296798706, + "step": 4330 + }, + { + "epoch": 1.5261674008810573, + "grad_norm": 2.0022142230843873, + "learning_rate": 2.9206972227534237e-06, + "loss": 0.5953024625778198, + "step": 4331 + }, + { + "epoch": 1.5265198237885462, + "grad_norm": 1.6369885387081085, + "learning_rate": 2.916583344168663e-06, + "loss": 0.5142296552658081, + "step": 4332 + }, + { + "epoch": 1.5268722466960352, + "grad_norm": 1.7205930689665365, + "learning_rate": 2.912471870183411e-06, + "loss": 0.5796314477920532, + "step": 4333 + }, + { + "epoch": 1.527224669603524, + "grad_norm": 1.984086822092815, + "learning_rate": 2.9083628021933886e-06, + "loss": 0.7202566862106323, + "step": 4334 + }, + { + "epoch": 1.5275770925110133, + "grad_norm": 2.2205082372485756, + "learning_rate": 2.9042561415934956e-06, + "loss": 0.6684188842773438, + "step": 4335 + }, + { + "epoch": 1.5279295154185022, + "grad_norm": 1.7000543577524454, + "learning_rate": 2.9001518897778147e-06, + "loss": 0.5377634763717651, + "step": 4336 + }, + { + "epoch": 1.5282819383259914, + "grad_norm": 1.7985805373418047, + "learning_rate": 2.8960500481396115e-06, + "loss": 0.5780486464500427, + "step": 4337 + }, + { + "epoch": 1.5286343612334803, + "grad_norm": 1.7528900086241466, + "learning_rate": 2.891950618071333e-06, + "loss": 0.6020476818084717, + "step": 4338 + }, + { + "epoch": 1.5289867841409692, + "grad_norm": 2.1939247460310303, + "learning_rate": 2.8878536009646106e-06, + "loss": 0.6076337099075317, + "step": 4339 + }, + { + "epoch": 1.5293392070484582, + "grad_norm": 1.9795227787355654, + "learning_rate": 2.883758998210251e-06, + "loss": 0.6370673179626465, + "step": 4340 + }, + { + "epoch": 1.529691629955947, + "grad_norm": 1.8686242611734982, + "learning_rate": 2.879666811198244e-06, + "loss": 0.41594892740249634, + "step": 4341 + }, + { + "epoch": 1.530044052863436, + "grad_norm": 1.9135398095116771, + "learning_rate": 2.8755770413177632e-06, + "loss": 0.4506857693195343, + "step": 4342 + }, + { + "epoch": 1.530396475770925, + "grad_norm": 1.730478313082556, + "learning_rate": 2.8714896899571575e-06, + "loss": 0.5883188247680664, + "step": 4343 + }, + { + "epoch": 1.5307488986784141, + "grad_norm": 1.9672614741507624, + "learning_rate": 2.8674047585039545e-06, + "loss": 0.6327757239341736, + "step": 4344 + }, + { + "epoch": 1.531101321585903, + "grad_norm": 1.958310227558085, + "learning_rate": 2.863322248344862e-06, + "loss": 0.6241307258605957, + "step": 4345 + }, + { + "epoch": 1.5314537444933922, + "grad_norm": 1.9476460174005816, + "learning_rate": 2.859242160865764e-06, + "loss": 0.6982603669166565, + "step": 4346 + }, + { + "epoch": 1.5318061674008812, + "grad_norm": 1.8365437525119523, + "learning_rate": 2.8551644974517236e-06, + "loss": 0.6293624639511108, + "step": 4347 + }, + { + "epoch": 1.53215859030837, + "grad_norm": 1.8744498682554205, + "learning_rate": 2.85108925948698e-06, + "loss": 0.5630898475646973, + "step": 4348 + }, + { + "epoch": 1.532511013215859, + "grad_norm": 1.9156047998547734, + "learning_rate": 2.847016448354948e-06, + "loss": 0.5300726294517517, + "step": 4349 + }, + { + "epoch": 1.532863436123348, + "grad_norm": 1.763805411540431, + "learning_rate": 2.8429460654382257e-06, + "loss": 0.6302311420440674, + "step": 4350 + }, + { + "epoch": 1.533215859030837, + "grad_norm": 2.0169994553834467, + "learning_rate": 2.8388781121185815e-06, + "loss": 0.5063371658325195, + "step": 4351 + }, + { + "epoch": 1.5335682819383258, + "grad_norm": 1.995201540637565, + "learning_rate": 2.8348125897769496e-06, + "loss": 0.6116877198219299, + "step": 4352 + }, + { + "epoch": 1.533920704845815, + "grad_norm": 1.7792408625607217, + "learning_rate": 2.830749499793458e-06, + "loss": 0.5671982169151306, + "step": 4353 + }, + { + "epoch": 1.534273127753304, + "grad_norm": 1.927509688688397, + "learning_rate": 2.826688843547395e-06, + "loss": 0.6537752747535706, + "step": 4354 + }, + { + "epoch": 1.534625550660793, + "grad_norm": 2.0558781996543805, + "learning_rate": 2.8226306224172283e-06, + "loss": 0.6608545780181885, + "step": 4355 + }, + { + "epoch": 1.534977973568282, + "grad_norm": 2.2266474146630655, + "learning_rate": 2.8185748377805977e-06, + "loss": 0.7038587331771851, + "step": 4356 + }, + { + "epoch": 1.535330396475771, + "grad_norm": 2.084521400671401, + "learning_rate": 2.8145214910143128e-06, + "loss": 0.7422336339950562, + "step": 4357 + }, + { + "epoch": 1.53568281938326, + "grad_norm": 1.7545985325343467, + "learning_rate": 2.8104705834943625e-06, + "loss": 0.5739270448684692, + "step": 4358 + }, + { + "epoch": 1.5360352422907488, + "grad_norm": 1.8063052751864486, + "learning_rate": 2.8064221165959073e-06, + "loss": 0.6429908275604248, + "step": 4359 + }, + { + "epoch": 1.5363876651982378, + "grad_norm": 2.071223390835828, + "learning_rate": 2.802376091693264e-06, + "loss": 0.5660578012466431, + "step": 4360 + }, + { + "epoch": 1.5367400881057267, + "grad_norm": 1.7313945668822706, + "learning_rate": 2.798332510159942e-06, + "loss": 0.4507398009300232, + "step": 4361 + }, + { + "epoch": 1.5370925110132159, + "grad_norm": 1.638677595892734, + "learning_rate": 2.7942913733686063e-06, + "loss": 0.5107634663581848, + "step": 4362 + }, + { + "epoch": 1.5374449339207048, + "grad_norm": 1.8351099655043759, + "learning_rate": 2.790252682691106e-06, + "loss": 0.505529522895813, + "step": 4363 + }, + { + "epoch": 1.537797356828194, + "grad_norm": 1.7171378177734038, + "learning_rate": 2.7862164394984405e-06, + "loss": 0.459098219871521, + "step": 4364 + }, + { + "epoch": 1.538149779735683, + "grad_norm": 1.6256824509042396, + "learning_rate": 2.782182645160789e-06, + "loss": 0.5200169086456299, + "step": 4365 + }, + { + "epoch": 1.5385022026431718, + "grad_norm": 2.1593491644665908, + "learning_rate": 2.778151301047506e-06, + "loss": 0.6723796725273132, + "step": 4366 + }, + { + "epoch": 1.5388546255506608, + "grad_norm": 1.7552269414614434, + "learning_rate": 2.7741224085271067e-06, + "loss": 0.5385584831237793, + "step": 4367 + }, + { + "epoch": 1.5392070484581497, + "grad_norm": 1.6358174263890735, + "learning_rate": 2.770095968967267e-06, + "loss": 0.5766934156417847, + "step": 4368 + }, + { + "epoch": 1.5395594713656386, + "grad_norm": 1.6116842273066272, + "learning_rate": 2.766071983734845e-06, + "loss": 0.6303011178970337, + "step": 4369 + }, + { + "epoch": 1.5399118942731278, + "grad_norm": 2.0294439046284736, + "learning_rate": 2.7620504541958525e-06, + "loss": 0.6192827224731445, + "step": 4370 + }, + { + "epoch": 1.5402643171806167, + "grad_norm": 1.9731545400175885, + "learning_rate": 2.758031381715485e-06, + "loss": 0.543215811252594, + "step": 4371 + }, + { + "epoch": 1.5406167400881057, + "grad_norm": 1.8102023756492311, + "learning_rate": 2.7540147676580808e-06, + "loss": 0.6364312171936035, + "step": 4372 + }, + { + "epoch": 1.5409691629955948, + "grad_norm": 1.7440307883728075, + "learning_rate": 2.750000613387157e-06, + "loss": 0.5625254511833191, + "step": 4373 + }, + { + "epoch": 1.5413215859030838, + "grad_norm": 2.1646055145888377, + "learning_rate": 2.7459889202654e-06, + "loss": 0.7304128408432007, + "step": 4374 + }, + { + "epoch": 1.5416740088105727, + "grad_norm": 2.1627384337401296, + "learning_rate": 2.7419796896546536e-06, + "loss": 0.676097571849823, + "step": 4375 + }, + { + "epoch": 1.5420264317180616, + "grad_norm": 1.9373952441867042, + "learning_rate": 2.7379729229159193e-06, + "loss": 0.7024539709091187, + "step": 4376 + }, + { + "epoch": 1.5423788546255506, + "grad_norm": 1.6778176206961017, + "learning_rate": 2.7339686214093774e-06, + "loss": 0.6357964277267456, + "step": 4377 + }, + { + "epoch": 1.5427312775330395, + "grad_norm": 1.8606991682829432, + "learning_rate": 2.729966786494361e-06, + "loss": 0.5254555940628052, + "step": 4378 + }, + { + "epoch": 1.5430837004405287, + "grad_norm": 1.527570009912515, + "learning_rate": 2.7259674195293697e-06, + "loss": 0.4899883270263672, + "step": 4379 + }, + { + "epoch": 1.5434361233480176, + "grad_norm": 1.725531709071361, + "learning_rate": 2.721970521872063e-06, + "loss": 0.5750056505203247, + "step": 4380 + }, + { + "epoch": 1.5437885462555068, + "grad_norm": 1.8900737960638598, + "learning_rate": 2.71797609487926e-06, + "loss": 0.5852059125900269, + "step": 4381 + }, + { + "epoch": 1.5441409691629957, + "grad_norm": 1.8258629839457563, + "learning_rate": 2.71398413990695e-06, + "loss": 0.6360914707183838, + "step": 4382 + }, + { + "epoch": 1.5444933920704846, + "grad_norm": 1.7586915096989222, + "learning_rate": 2.7099946583102764e-06, + "loss": 0.5120062828063965, + "step": 4383 + }, + { + "epoch": 1.5448458149779736, + "grad_norm": 2.068877348919367, + "learning_rate": 2.706007651443543e-06, + "loss": 0.5798901319503784, + "step": 4384 + }, + { + "epoch": 1.5451982378854625, + "grad_norm": 2.014366735127449, + "learning_rate": 2.702023120660213e-06, + "loss": 0.5112065076828003, + "step": 4385 + }, + { + "epoch": 1.5455506607929514, + "grad_norm": 1.9281653354114374, + "learning_rate": 2.6980410673129133e-06, + "loss": 0.6136611700057983, + "step": 4386 + }, + { + "epoch": 1.5459030837004404, + "grad_norm": 1.6841076662412324, + "learning_rate": 2.694061492753426e-06, + "loss": 0.5944457054138184, + "step": 4387 + }, + { + "epoch": 1.5462555066079295, + "grad_norm": 1.9404009079173157, + "learning_rate": 2.690084398332692e-06, + "loss": 0.5931667685508728, + "step": 4388 + }, + { + "epoch": 1.5466079295154185, + "grad_norm": 1.607840859056915, + "learning_rate": 2.686109785400809e-06, + "loss": 0.6112217307090759, + "step": 4389 + }, + { + "epoch": 1.5469603524229076, + "grad_norm": 1.9090904865448288, + "learning_rate": 2.68213765530704e-06, + "loss": 0.549437940120697, + "step": 4390 + }, + { + "epoch": 1.5473127753303966, + "grad_norm": 1.9826888565576624, + "learning_rate": 2.6781680093997965e-06, + "loss": 0.674758791923523, + "step": 4391 + }, + { + "epoch": 1.5476651982378855, + "grad_norm": 1.874293916028551, + "learning_rate": 2.6742008490266504e-06, + "loss": 0.6015446186065674, + "step": 4392 + }, + { + "epoch": 1.5480176211453744, + "grad_norm": 2.2556212033260223, + "learning_rate": 2.6702361755343278e-06, + "loss": 0.5512514710426331, + "step": 4393 + }, + { + "epoch": 1.5483700440528634, + "grad_norm": 2.607818594949077, + "learning_rate": 2.666273990268713e-06, + "loss": 0.6443158984184265, + "step": 4394 + }, + { + "epoch": 1.5487224669603523, + "grad_norm": 1.840692212890546, + "learning_rate": 2.6623142945748447e-06, + "loss": 0.5682512521743774, + "step": 4395 + }, + { + "epoch": 1.5490748898678413, + "grad_norm": 2.173690182254911, + "learning_rate": 2.658357089796917e-06, + "loss": 0.5544074773788452, + "step": 4396 + }, + { + "epoch": 1.5494273127753304, + "grad_norm": 1.726846155573174, + "learning_rate": 2.6544023772782736e-06, + "loss": 0.5811636447906494, + "step": 4397 + }, + { + "epoch": 1.5497797356828193, + "grad_norm": 1.790573455353959, + "learning_rate": 2.650450158361422e-06, + "loss": 0.4696553647518158, + "step": 4398 + }, + { + "epoch": 1.5501321585903085, + "grad_norm": 1.918353319441468, + "learning_rate": 2.6465004343880153e-06, + "loss": 0.6897521615028381, + "step": 4399 + }, + { + "epoch": 1.5504845814977974, + "grad_norm": 1.9780672696205217, + "learning_rate": 2.6425532066988613e-06, + "loss": 0.6154924631118774, + "step": 4400 + }, + { + "epoch": 1.5508370044052864, + "grad_norm": 2.0803038103367815, + "learning_rate": 2.6386084766339214e-06, + "loss": 0.5333596467971802, + "step": 4401 + }, + { + "epoch": 1.5511894273127753, + "grad_norm": 1.8190921046801005, + "learning_rate": 2.634666245532309e-06, + "loss": 0.6633985042572021, + "step": 4402 + }, + { + "epoch": 1.5515418502202643, + "grad_norm": 1.6722563074159322, + "learning_rate": 2.630726514732289e-06, + "loss": 0.7913509607315063, + "step": 4403 + }, + { + "epoch": 1.5518942731277532, + "grad_norm": 1.8118598393520884, + "learning_rate": 2.6267892855712763e-06, + "loss": 0.5776455402374268, + "step": 4404 + }, + { + "epoch": 1.5522466960352423, + "grad_norm": 1.68862603841886, + "learning_rate": 2.6228545593858357e-06, + "loss": 0.5912357568740845, + "step": 4405 + }, + { + "epoch": 1.5525991189427313, + "grad_norm": 2.1281645633634274, + "learning_rate": 2.618922337511689e-06, + "loss": 0.49319127202033997, + "step": 4406 + }, + { + "epoch": 1.5529515418502202, + "grad_norm": 1.9165140700223777, + "learning_rate": 2.6149926212837016e-06, + "loss": 0.5805023908615112, + "step": 4407 + }, + { + "epoch": 1.5533039647577094, + "grad_norm": 1.8889646736612442, + "learning_rate": 2.6110654120358902e-06, + "loss": 0.5635806918144226, + "step": 4408 + }, + { + "epoch": 1.5536563876651983, + "grad_norm": 1.6927734818193383, + "learning_rate": 2.6071407111014178e-06, + "loss": 0.5006709694862366, + "step": 4409 + }, + { + "epoch": 1.5540088105726872, + "grad_norm": 2.1152391301202695, + "learning_rate": 2.6032185198126005e-06, + "loss": 0.6035311818122864, + "step": 4410 + }, + { + "epoch": 1.5543612334801762, + "grad_norm": 1.7863466908276826, + "learning_rate": 2.599298839500899e-06, + "loss": 0.5978977680206299, + "step": 4411 + }, + { + "epoch": 1.5547136563876651, + "grad_norm": 1.8741259739913476, + "learning_rate": 2.5953816714969194e-06, + "loss": 0.6330617070198059, + "step": 4412 + }, + { + "epoch": 1.555066079295154, + "grad_norm": 2.090756152665107, + "learning_rate": 2.591467017130426e-06, + "loss": 0.6541750431060791, + "step": 4413 + }, + { + "epoch": 1.5554185022026432, + "grad_norm": 2.229850729984303, + "learning_rate": 2.5875548777303204e-06, + "loss": 0.5503655076026917, + "step": 4414 + }, + { + "epoch": 1.5557709251101322, + "grad_norm": 1.7715926792210983, + "learning_rate": 2.583645254624645e-06, + "loss": 0.5117509365081787, + "step": 4415 + }, + { + "epoch": 1.5561233480176213, + "grad_norm": 2.2945620883910953, + "learning_rate": 2.5797381491406027e-06, + "loss": 0.6699894070625305, + "step": 4416 + }, + { + "epoch": 1.5564757709251102, + "grad_norm": 2.0695182526571765, + "learning_rate": 2.5758335626045308e-06, + "loss": 0.6870071291923523, + "step": 4417 + }, + { + "epoch": 1.5568281938325992, + "grad_norm": 1.7821456199762375, + "learning_rate": 2.571931496341916e-06, + "loss": 0.7680954933166504, + "step": 4418 + }, + { + "epoch": 1.5571806167400881, + "grad_norm": 2.2345593449058203, + "learning_rate": 2.568031951677389e-06, + "loss": 0.6504727602005005, + "step": 4419 + }, + { + "epoch": 1.557533039647577, + "grad_norm": 1.6796846625470907, + "learning_rate": 2.5641349299347196e-06, + "loss": 0.7101249098777771, + "step": 4420 + }, + { + "epoch": 1.557885462555066, + "grad_norm": 1.6800594114237326, + "learning_rate": 2.560240432436831e-06, + "loss": 0.5734864473342896, + "step": 4421 + }, + { + "epoch": 1.558237885462555, + "grad_norm": 2.065356657851052, + "learning_rate": 2.5563484605057854e-06, + "loss": 0.48660311102867126, + "step": 4422 + }, + { + "epoch": 1.558590308370044, + "grad_norm": 2.0678483817870847, + "learning_rate": 2.552459015462776e-06, + "loss": 0.6442986726760864, + "step": 4423 + }, + { + "epoch": 1.558942731277533, + "grad_norm": 1.721146259770593, + "learning_rate": 2.548572098628158e-06, + "loss": 0.5871995091438293, + "step": 4424 + }, + { + "epoch": 1.5592951541850222, + "grad_norm": 1.8900651182173844, + "learning_rate": 2.544687711321415e-06, + "loss": 0.5899579524993896, + "step": 4425 + }, + { + "epoch": 1.5596475770925111, + "grad_norm": 1.589021195095579, + "learning_rate": 2.540805854861177e-06, + "loss": 0.571341872215271, + "step": 4426 + }, + { + "epoch": 1.56, + "grad_norm": 2.074587814486514, + "learning_rate": 2.5369265305652112e-06, + "loss": 0.6297308206558228, + "step": 4427 + }, + { + "epoch": 1.560352422907489, + "grad_norm": 1.957815284803115, + "learning_rate": 2.5330497397504274e-06, + "loss": 0.6277692317962646, + "step": 4428 + }, + { + "epoch": 1.560704845814978, + "grad_norm": 1.8075270549654299, + "learning_rate": 2.5291754837328787e-06, + "loss": 0.5124595165252686, + "step": 4429 + }, + { + "epoch": 1.5610572687224669, + "grad_norm": 2.027466093132035, + "learning_rate": 2.5253037638277557e-06, + "loss": 0.6777669191360474, + "step": 4430 + }, + { + "epoch": 1.5614096916299558, + "grad_norm": 2.299371691906574, + "learning_rate": 2.521434581349378e-06, + "loss": 0.7380247116088867, + "step": 4431 + }, + { + "epoch": 1.561762114537445, + "grad_norm": 2.0566157739817825, + "learning_rate": 2.5175679376112206e-06, + "loss": 0.6605849266052246, + "step": 4432 + }, + { + "epoch": 1.562114537444934, + "grad_norm": 1.7899790415054606, + "learning_rate": 2.5137038339258837e-06, + "loss": 0.5688329935073853, + "step": 4433 + }, + { + "epoch": 1.562466960352423, + "grad_norm": 2.1227992795896258, + "learning_rate": 2.5098422716051197e-06, + "loss": 0.6731508374214172, + "step": 4434 + }, + { + "epoch": 1.562819383259912, + "grad_norm": 1.766889438914358, + "learning_rate": 2.505983251959798e-06, + "loss": 0.5177330374717712, + "step": 4435 + }, + { + "epoch": 1.563171806167401, + "grad_norm": 1.793841264632356, + "learning_rate": 2.502126776299938e-06, + "loss": 0.5307918787002563, + "step": 4436 + }, + { + "epoch": 1.5635242290748899, + "grad_norm": 1.8402321267228738, + "learning_rate": 2.4982728459346974e-06, + "loss": 0.59647536277771, + "step": 4437 + }, + { + "epoch": 1.5638766519823788, + "grad_norm": 2.049156650890273, + "learning_rate": 2.494421462172365e-06, + "loss": 0.6215553283691406, + "step": 4438 + }, + { + "epoch": 1.5642290748898677, + "grad_norm": 1.7976631043220852, + "learning_rate": 2.490572626320359e-06, + "loss": 0.49461615085601807, + "step": 4439 + }, + { + "epoch": 1.5645814977973567, + "grad_norm": 2.4138380625358757, + "learning_rate": 2.486726339685247e-06, + "loss": 0.6625338196754456, + "step": 4440 + }, + { + "epoch": 1.5649339207048458, + "grad_norm": 1.5979739892152505, + "learning_rate": 2.4828826035727214e-06, + "loss": 0.4059983193874359, + "step": 4441 + }, + { + "epoch": 1.5652863436123348, + "grad_norm": 1.7298713789472393, + "learning_rate": 2.47904141928761e-06, + "loss": 0.6234895586967468, + "step": 4442 + }, + { + "epoch": 1.565638766519824, + "grad_norm": 1.8282339040044808, + "learning_rate": 2.4752027881338757e-06, + "loss": 0.513421893119812, + "step": 4443 + }, + { + "epoch": 1.5659911894273129, + "grad_norm": 2.0213648562049693, + "learning_rate": 2.4713667114146123e-06, + "loss": 0.6168510913848877, + "step": 4444 + }, + { + "epoch": 1.5663436123348018, + "grad_norm": 1.8904853102151467, + "learning_rate": 2.4675331904320533e-06, + "loss": 0.5474672317504883, + "step": 4445 + }, + { + "epoch": 1.5666960352422907, + "grad_norm": 2.020157324166176, + "learning_rate": 2.46370222648756e-06, + "loss": 0.7464281916618347, + "step": 4446 + }, + { + "epoch": 1.5670484581497797, + "grad_norm": 1.8187430699226648, + "learning_rate": 2.4598738208816155e-06, + "loss": 0.5890274047851562, + "step": 4447 + }, + { + "epoch": 1.5674008810572686, + "grad_norm": 2.0160604417207293, + "learning_rate": 2.4560479749138554e-06, + "loss": 0.7577700018882751, + "step": 4448 + }, + { + "epoch": 1.5677533039647578, + "grad_norm": 1.6711759350664435, + "learning_rate": 2.4522246898830302e-06, + "loss": 0.5374037027359009, + "step": 4449 + }, + { + "epoch": 1.5681057268722467, + "grad_norm": 1.7947512315133625, + "learning_rate": 2.4484039670870286e-06, + "loss": 0.44840407371520996, + "step": 4450 + }, + { + "epoch": 1.5684581497797356, + "grad_norm": 1.8087906354095658, + "learning_rate": 2.4445858078228647e-06, + "loss": 0.5144427418708801, + "step": 4451 + }, + { + "epoch": 1.5688105726872248, + "grad_norm": 1.7889124821216469, + "learning_rate": 2.440770213386684e-06, + "loss": 0.39119952917099, + "step": 4452 + }, + { + "epoch": 1.5691629955947137, + "grad_norm": 1.6376212389282347, + "learning_rate": 2.436957185073766e-06, + "loss": 0.5287434458732605, + "step": 4453 + }, + { + "epoch": 1.5695154185022027, + "grad_norm": 2.2578778571267315, + "learning_rate": 2.4331467241785157e-06, + "loss": 0.568587064743042, + "step": 4454 + }, + { + "epoch": 1.5698678414096916, + "grad_norm": 3.2977149916111608, + "learning_rate": 2.429338831994458e-06, + "loss": 0.5522792339324951, + "step": 4455 + }, + { + "epoch": 1.5702202643171805, + "grad_norm": 1.7594156491061212, + "learning_rate": 2.425533509814262e-06, + "loss": 0.48070845007896423, + "step": 4456 + }, + { + "epoch": 1.5705726872246695, + "grad_norm": 1.631888097687176, + "learning_rate": 2.4217307589297135e-06, + "loss": 0.44293439388275146, + "step": 4457 + }, + { + "epoch": 1.5709251101321586, + "grad_norm": 1.933449446432769, + "learning_rate": 2.4179305806317266e-06, + "loss": 0.5753301382064819, + "step": 4458 + }, + { + "epoch": 1.5712775330396476, + "grad_norm": 1.9958241636570169, + "learning_rate": 2.414132976210346e-06, + "loss": 0.5873000025749207, + "step": 4459 + }, + { + "epoch": 1.5716299559471367, + "grad_norm": 2.2068877987049955, + "learning_rate": 2.410337946954736e-06, + "loss": 0.6084823608398438, + "step": 4460 + }, + { + "epoch": 1.5719823788546257, + "grad_norm": 1.743876311662913, + "learning_rate": 2.4065454941531963e-06, + "loss": 0.541124165058136, + "step": 4461 + }, + { + "epoch": 1.5723348017621146, + "grad_norm": 1.8080812306830252, + "learning_rate": 2.4027556190931446e-06, + "loss": 0.5170080661773682, + "step": 4462 + }, + { + "epoch": 1.5726872246696035, + "grad_norm": 1.817245899938438, + "learning_rate": 2.398968323061125e-06, + "loss": 0.5613514184951782, + "step": 4463 + }, + { + "epoch": 1.5730396475770925, + "grad_norm": 1.7097401781842303, + "learning_rate": 2.395183607342807e-06, + "loss": 0.6645728349685669, + "step": 4464 + }, + { + "epoch": 1.5733920704845814, + "grad_norm": 1.8730205237982336, + "learning_rate": 2.391401473222983e-06, + "loss": 0.7077093124389648, + "step": 4465 + }, + { + "epoch": 1.5737444933920703, + "grad_norm": 1.7460518248753176, + "learning_rate": 2.387621921985571e-06, + "loss": 0.5687523484230042, + "step": 4466 + }, + { + "epoch": 1.5740969162995595, + "grad_norm": 1.9850945169232843, + "learning_rate": 2.38384495491361e-06, + "loss": 0.5837362408638, + "step": 4467 + }, + { + "epoch": 1.5744493392070484, + "grad_norm": 2.051593268912329, + "learning_rate": 2.3800705732892615e-06, + "loss": 0.5552037358283997, + "step": 4468 + }, + { + "epoch": 1.5748017621145376, + "grad_norm": 1.8128967121473578, + "learning_rate": 2.376298778393814e-06, + "loss": 0.5502952337265015, + "step": 4469 + }, + { + "epoch": 1.5751541850220265, + "grad_norm": 1.958629504700592, + "learning_rate": 2.3725295715076734e-06, + "loss": 0.5621509552001953, + "step": 4470 + }, + { + "epoch": 1.5755066079295155, + "grad_norm": 2.20917213599842, + "learning_rate": 2.3687629539103676e-06, + "loss": 0.6703782081604004, + "step": 4471 + }, + { + "epoch": 1.5758590308370044, + "grad_norm": 1.6659443121840707, + "learning_rate": 2.3649989268805453e-06, + "loss": 0.5681235194206238, + "step": 4472 + }, + { + "epoch": 1.5762114537444933, + "grad_norm": 1.6009126465101926, + "learning_rate": 2.361237491695978e-06, + "loss": 0.611667811870575, + "step": 4473 + }, + { + "epoch": 1.5765638766519823, + "grad_norm": 1.7200740539010873, + "learning_rate": 2.3574786496335546e-06, + "loss": 0.5758671760559082, + "step": 4474 + }, + { + "epoch": 1.5769162995594712, + "grad_norm": 1.9125579541010735, + "learning_rate": 2.3537224019692863e-06, + "loss": 0.4865596294403076, + "step": 4475 + }, + { + "epoch": 1.5772687224669604, + "grad_norm": 1.8564502689111453, + "learning_rate": 2.3499687499782976e-06, + "loss": 0.6356204152107239, + "step": 4476 + }, + { + "epoch": 1.5776211453744493, + "grad_norm": 2.1421860610476022, + "learning_rate": 2.346217694934847e-06, + "loss": 0.7177166938781738, + "step": 4477 + }, + { + "epoch": 1.5779735682819385, + "grad_norm": 1.5480906826266605, + "learning_rate": 2.3424692381122882e-06, + "loss": 0.5727916955947876, + "step": 4478 + }, + { + "epoch": 1.5783259911894274, + "grad_norm": 1.8719733775312895, + "learning_rate": 2.3387233807831144e-06, + "loss": 0.4904511570930481, + "step": 4479 + }, + { + "epoch": 1.5786784140969163, + "grad_norm": 1.781780296857209, + "learning_rate": 2.3349801242189262e-06, + "loss": 0.6029622554779053, + "step": 4480 + }, + { + "epoch": 1.5790308370044053, + "grad_norm": 1.7377028122196188, + "learning_rate": 2.3312394696904404e-06, + "loss": 0.6462864875793457, + "step": 4481 + }, + { + "epoch": 1.5793832599118942, + "grad_norm": 2.2050402923740555, + "learning_rate": 2.327501418467495e-06, + "loss": 0.6000367403030396, + "step": 4482 + }, + { + "epoch": 1.5797356828193831, + "grad_norm": 1.8056795992302546, + "learning_rate": 2.3237659718190398e-06, + "loss": 0.5498829483985901, + "step": 4483 + }, + { + "epoch": 1.580088105726872, + "grad_norm": 1.9193344841770834, + "learning_rate": 2.320033131013142e-06, + "loss": 0.5445006489753723, + "step": 4484 + }, + { + "epoch": 1.5804405286343612, + "grad_norm": 1.737360484366453, + "learning_rate": 2.316302897316992e-06, + "loss": 0.4878338575363159, + "step": 4485 + }, + { + "epoch": 1.5807929515418502, + "grad_norm": 1.9395301127212525, + "learning_rate": 2.3125752719968763e-06, + "loss": 0.473583459854126, + "step": 4486 + }, + { + "epoch": 1.5811453744493393, + "grad_norm": 1.885736275905952, + "learning_rate": 2.308850256318218e-06, + "loss": 0.6530570983886719, + "step": 4487 + }, + { + "epoch": 1.5814977973568283, + "grad_norm": 1.9957270393411881, + "learning_rate": 2.30512785154554e-06, + "loss": 0.6925215721130371, + "step": 4488 + }, + { + "epoch": 1.5818502202643172, + "grad_norm": 2.319012517660613, + "learning_rate": 2.3014080589424837e-06, + "loss": 0.6210705637931824, + "step": 4489 + }, + { + "epoch": 1.5822026431718061, + "grad_norm": 1.9814470349632005, + "learning_rate": 2.2976908797718013e-06, + "loss": 0.5843231678009033, + "step": 4490 + }, + { + "epoch": 1.582555066079295, + "grad_norm": 1.8411432529202023, + "learning_rate": 2.2939763152953576e-06, + "loss": 0.7014307379722595, + "step": 4491 + }, + { + "epoch": 1.582907488986784, + "grad_norm": 2.432500927945977, + "learning_rate": 2.2902643667741386e-06, + "loss": 0.563744843006134, + "step": 4492 + }, + { + "epoch": 1.5832599118942732, + "grad_norm": 2.0467865020897227, + "learning_rate": 2.286555035468233e-06, + "loss": 0.6067275404930115, + "step": 4493 + }, + { + "epoch": 1.5836123348017621, + "grad_norm": 1.4471777617782167, + "learning_rate": 2.282848322636836e-06, + "loss": 0.5471328496932983, + "step": 4494 + }, + { + "epoch": 1.583964757709251, + "grad_norm": 1.8188988721843682, + "learning_rate": 2.2791442295382693e-06, + "loss": 0.4994550943374634, + "step": 4495 + }, + { + "epoch": 1.5843171806167402, + "grad_norm": 1.9672025899108128, + "learning_rate": 2.275442757429954e-06, + "loss": 0.6064262390136719, + "step": 4496 + }, + { + "epoch": 1.5846696035242291, + "grad_norm": 1.8109350365291292, + "learning_rate": 2.2717439075684268e-06, + "loss": 0.5119039416313171, + "step": 4497 + }, + { + "epoch": 1.585022026431718, + "grad_norm": 2.2031235285356883, + "learning_rate": 2.26804768120933e-06, + "loss": 0.7276502251625061, + "step": 4498 + }, + { + "epoch": 1.585374449339207, + "grad_norm": 2.0480046358265827, + "learning_rate": 2.264354079607416e-06, + "loss": 0.6175409555435181, + "step": 4499 + }, + { + "epoch": 1.585726872246696, + "grad_norm": 2.165546737643913, + "learning_rate": 2.2606631040165517e-06, + "loss": 0.6289592981338501, + "step": 4500 + }, + { + "epoch": 1.5860792951541849, + "grad_norm": 1.626913781336784, + "learning_rate": 2.2569747556897103e-06, + "loss": 0.5802761316299438, + "step": 4501 + }, + { + "epoch": 1.586431718061674, + "grad_norm": 1.6717876401169283, + "learning_rate": 2.2532890358789604e-06, + "loss": 0.5883978605270386, + "step": 4502 + }, + { + "epoch": 1.586784140969163, + "grad_norm": 1.7433478934489002, + "learning_rate": 2.2496059458355e-06, + "loss": 0.6915061473846436, + "step": 4503 + }, + { + "epoch": 1.5871365638766521, + "grad_norm": 1.7904879000491816, + "learning_rate": 2.2459254868096194e-06, + "loss": 0.6255539655685425, + "step": 4504 + }, + { + "epoch": 1.587488986784141, + "grad_norm": 2.0290072373401706, + "learning_rate": 2.2422476600507203e-06, + "loss": 0.6788307428359985, + "step": 4505 + }, + { + "epoch": 1.58784140969163, + "grad_norm": 1.8646329547804459, + "learning_rate": 2.2385724668073104e-06, + "loss": 0.5651443004608154, + "step": 4506 + }, + { + "epoch": 1.588193832599119, + "grad_norm": 1.6858252262208455, + "learning_rate": 2.2348999083270005e-06, + "loss": 0.5308901071548462, + "step": 4507 + }, + { + "epoch": 1.5885462555066079, + "grad_norm": 2.3264820621642084, + "learning_rate": 2.2312299858565156e-06, + "loss": 0.60570228099823, + "step": 4508 + }, + { + "epoch": 1.5888986784140968, + "grad_norm": 1.8330509972931788, + "learning_rate": 2.22756270064168e-06, + "loss": 0.6544185876846313, + "step": 4509 + }, + { + "epoch": 1.5892511013215858, + "grad_norm": 1.7565673285953047, + "learning_rate": 2.2238980539274156e-06, + "loss": 0.667883038520813, + "step": 4510 + }, + { + "epoch": 1.589603524229075, + "grad_norm": 1.7707733782287267, + "learning_rate": 2.2202360469577622e-06, + "loss": 0.647671103477478, + "step": 4511 + }, + { + "epoch": 1.5899559471365639, + "grad_norm": 1.8031539733499908, + "learning_rate": 2.216576680975856e-06, + "loss": 0.6990867257118225, + "step": 4512 + }, + { + "epoch": 1.590308370044053, + "grad_norm": 1.6913080596921681, + "learning_rate": 2.212919957223938e-06, + "loss": 0.6292023658752441, + "step": 4513 + }, + { + "epoch": 1.590660792951542, + "grad_norm": 2.0512598736304763, + "learning_rate": 2.2092658769433504e-06, + "loss": 0.638721227645874, + "step": 4514 + }, + { + "epoch": 1.5910132158590309, + "grad_norm": 2.0710919586830365, + "learning_rate": 2.2056144413745396e-06, + "loss": 0.5622225403785706, + "step": 4515 + }, + { + "epoch": 1.5913656387665198, + "grad_norm": 1.9225600729192178, + "learning_rate": 2.2019656517570576e-06, + "loss": 0.44093507528305054, + "step": 4516 + }, + { + "epoch": 1.5917180616740088, + "grad_norm": 1.9689195876449703, + "learning_rate": 2.198319509329556e-06, + "loss": 0.6889619827270508, + "step": 4517 + }, + { + "epoch": 1.5920704845814977, + "grad_norm": 1.8723694409082583, + "learning_rate": 2.1946760153297773e-06, + "loss": 0.5873552560806274, + "step": 4518 + }, + { + "epoch": 1.5924229074889866, + "grad_norm": 2.3733819724747245, + "learning_rate": 2.191035170994584e-06, + "loss": 0.7172325849533081, + "step": 4519 + }, + { + "epoch": 1.5927753303964758, + "grad_norm": 1.5631566998768178, + "learning_rate": 2.187396977559927e-06, + "loss": 0.520845890045166, + "step": 4520 + }, + { + "epoch": 1.5931277533039647, + "grad_norm": 1.5657344992000655, + "learning_rate": 2.1837614362608574e-06, + "loss": 0.5241606831550598, + "step": 4521 + }, + { + "epoch": 1.5934801762114539, + "grad_norm": 2.0290302307971433, + "learning_rate": 2.1801285483315303e-06, + "loss": 0.583808422088623, + "step": 4522 + }, + { + "epoch": 1.5938325991189428, + "grad_norm": 1.829890026298915, + "learning_rate": 2.1764983150051955e-06, + "loss": 0.4648814797401428, + "step": 4523 + }, + { + "epoch": 1.5941850220264318, + "grad_norm": 1.9603824667877958, + "learning_rate": 2.1728707375142087e-06, + "loss": 0.590090274810791, + "step": 4524 + }, + { + "epoch": 1.5945374449339207, + "grad_norm": 2.0292397946897527, + "learning_rate": 2.16924581709002e-06, + "loss": 0.6554102897644043, + "step": 4525 + }, + { + "epoch": 1.5948898678414096, + "grad_norm": 2.011864917811992, + "learning_rate": 2.1656235549631677e-06, + "loss": 0.5880511999130249, + "step": 4526 + }, + { + "epoch": 1.5952422907488986, + "grad_norm": 1.6246832017365502, + "learning_rate": 2.1620039523633074e-06, + "loss": 0.5779908299446106, + "step": 4527 + }, + { + "epoch": 1.5955947136563877, + "grad_norm": 1.9147900218294176, + "learning_rate": 2.1583870105191775e-06, + "loss": 0.5030412673950195, + "step": 4528 + }, + { + "epoch": 1.5959471365638767, + "grad_norm": 1.9632795275127009, + "learning_rate": 2.1547727306586173e-06, + "loss": 0.5667461156845093, + "step": 4529 + }, + { + "epoch": 1.5962995594713656, + "grad_norm": 2.3190730605108882, + "learning_rate": 2.151161114008563e-06, + "loss": 0.6820607781410217, + "step": 4530 + }, + { + "epoch": 1.5966519823788548, + "grad_norm": 1.7640709477354637, + "learning_rate": 2.1475521617950425e-06, + "loss": 0.6165209412574768, + "step": 4531 + }, + { + "epoch": 1.5970044052863437, + "grad_norm": 1.897918487033638, + "learning_rate": 2.1439458752431887e-06, + "loss": 0.5987168550491333, + "step": 4532 + }, + { + "epoch": 1.5973568281938326, + "grad_norm": 1.8946893490374197, + "learning_rate": 2.1403422555772226e-06, + "loss": 0.5161086320877075, + "step": 4533 + }, + { + "epoch": 1.5977092511013216, + "grad_norm": 1.817150642667859, + "learning_rate": 2.1367413040204543e-06, + "loss": 0.5216903686523438, + "step": 4534 + }, + { + "epoch": 1.5980616740088105, + "grad_norm": 1.7820775067820096, + "learning_rate": 2.133143021795302e-06, + "loss": 0.5664666891098022, + "step": 4535 + }, + { + "epoch": 1.5984140969162994, + "grad_norm": 1.8205676682468495, + "learning_rate": 2.129547410123268e-06, + "loss": 0.501051127910614, + "step": 4536 + }, + { + "epoch": 1.5987665198237886, + "grad_norm": 1.5799563385798543, + "learning_rate": 2.1259544702249515e-06, + "loss": 0.5466792583465576, + "step": 4537 + }, + { + "epoch": 1.5991189427312775, + "grad_norm": 1.9007615560911546, + "learning_rate": 2.122364203320043e-06, + "loss": 0.5295613408088684, + "step": 4538 + }, + { + "epoch": 1.5994713656387667, + "grad_norm": 1.6670646942482272, + "learning_rate": 2.1187766106273224e-06, + "loss": 0.5406922101974487, + "step": 4539 + }, + { + "epoch": 1.5998237885462556, + "grad_norm": 2.0700620230157125, + "learning_rate": 2.1151916933646764e-06, + "loss": 0.5908178687095642, + "step": 4540 + }, + { + "epoch": 1.6001762114537446, + "grad_norm": 1.8405525752725544, + "learning_rate": 2.1116094527490594e-06, + "loss": 0.6207743883132935, + "step": 4541 + }, + { + "epoch": 1.6005286343612335, + "grad_norm": 2.7642600887250652, + "learning_rate": 2.1080298899965413e-06, + "loss": 0.5655614137649536, + "step": 4542 + }, + { + "epoch": 1.6008810572687224, + "grad_norm": 1.5764846584358823, + "learning_rate": 2.104453006322268e-06, + "loss": 0.6019319295883179, + "step": 4543 + }, + { + "epoch": 1.6012334801762114, + "grad_norm": 1.8499785252270624, + "learning_rate": 2.1008788029404794e-06, + "loss": 0.6109766364097595, + "step": 4544 + }, + { + "epoch": 1.6015859030837003, + "grad_norm": 1.8285934792669327, + "learning_rate": 2.0973072810645078e-06, + "loss": 0.5309078693389893, + "step": 4545 + }, + { + "epoch": 1.6019383259911895, + "grad_norm": 1.7116030885611606, + "learning_rate": 2.093738441906774e-06, + "loss": 0.5440298318862915, + "step": 4546 + }, + { + "epoch": 1.6022907488986784, + "grad_norm": 1.6012955775631803, + "learning_rate": 2.0901722866787842e-06, + "loss": 0.46502384543418884, + "step": 4547 + }, + { + "epoch": 1.6026431718061676, + "grad_norm": 1.7999501734847188, + "learning_rate": 2.086608816591146e-06, + "loss": 0.4822906255722046, + "step": 4548 + }, + { + "epoch": 1.6029955947136565, + "grad_norm": 1.8169323717501906, + "learning_rate": 2.083048032853534e-06, + "loss": 0.6382625699043274, + "step": 4549 + }, + { + "epoch": 1.6033480176211454, + "grad_norm": 1.7542851479568786, + "learning_rate": 2.0794899366747334e-06, + "loss": 0.6070914268493652, + "step": 4550 + }, + { + "epoch": 1.6037004405286344, + "grad_norm": 1.8496689505105712, + "learning_rate": 2.0759345292626045e-06, + "loss": 0.5953283309936523, + "step": 4551 + }, + { + "epoch": 1.6040528634361233, + "grad_norm": 1.6448363622587787, + "learning_rate": 2.0723818118240958e-06, + "loss": 0.47553640604019165, + "step": 4552 + }, + { + "epoch": 1.6044052863436122, + "grad_norm": 2.007835441279153, + "learning_rate": 2.0688317855652463e-06, + "loss": 0.7020712494850159, + "step": 4553 + }, + { + "epoch": 1.6047577092511012, + "grad_norm": 1.739770344308816, + "learning_rate": 2.0652844516911776e-06, + "loss": 0.5998836159706116, + "step": 4554 + }, + { + "epoch": 1.6051101321585903, + "grad_norm": 1.7690620328907303, + "learning_rate": 2.0617398114060983e-06, + "loss": 0.6501786708831787, + "step": 4555 + }, + { + "epoch": 1.6054625550660793, + "grad_norm": 1.7628232586759778, + "learning_rate": 2.0581978659133097e-06, + "loss": 0.6444278955459595, + "step": 4556 + }, + { + "epoch": 1.6058149779735684, + "grad_norm": 1.8812364367093761, + "learning_rate": 2.0546586164151827e-06, + "loss": 0.6756579875946045, + "step": 4557 + }, + { + "epoch": 1.6061674008810574, + "grad_norm": 1.9541887465796286, + "learning_rate": 2.051122064113189e-06, + "loss": 0.6043737530708313, + "step": 4558 + }, + { + "epoch": 1.6065198237885463, + "grad_norm": 1.7992795463772795, + "learning_rate": 2.047588210207877e-06, + "loss": 0.6504104137420654, + "step": 4559 + }, + { + "epoch": 1.6068722466960352, + "grad_norm": 1.8447157864854533, + "learning_rate": 2.044057055898879e-06, + "loss": 0.6586685180664062, + "step": 4560 + }, + { + "epoch": 1.6072246696035242, + "grad_norm": 1.6895598009184531, + "learning_rate": 2.0405286023849125e-06, + "loss": 0.4463368058204651, + "step": 4561 + }, + { + "epoch": 1.607577092511013, + "grad_norm": 1.626067629091748, + "learning_rate": 2.037002850863777e-06, + "loss": 0.5208157896995544, + "step": 4562 + }, + { + "epoch": 1.607929515418502, + "grad_norm": 2.325947552099387, + "learning_rate": 2.033479802532354e-06, + "loss": 0.612602174282074, + "step": 4563 + }, + { + "epoch": 1.6082819383259912, + "grad_norm": 1.8677335810734068, + "learning_rate": 2.0299594585866166e-06, + "loss": 0.6871482133865356, + "step": 4564 + }, + { + "epoch": 1.6086343612334801, + "grad_norm": 2.1450630320575863, + "learning_rate": 2.0264418202215998e-06, + "loss": 0.5770177245140076, + "step": 4565 + }, + { + "epoch": 1.6089867841409693, + "grad_norm": 2.0018570918486263, + "learning_rate": 2.0229268886314413e-06, + "loss": 0.600841224193573, + "step": 4566 + }, + { + "epoch": 1.6093392070484582, + "grad_norm": 1.4951834973656204, + "learning_rate": 2.0194146650093494e-06, + "loss": 0.47742071747779846, + "step": 4567 + }, + { + "epoch": 1.6096916299559472, + "grad_norm": 1.932667797658379, + "learning_rate": 2.015905150547612e-06, + "loss": 0.5528711080551147, + "step": 4568 + }, + { + "epoch": 1.610044052863436, + "grad_norm": 1.7893968437532208, + "learning_rate": 2.0123983464376028e-06, + "loss": 0.6892603635787964, + "step": 4569 + }, + { + "epoch": 1.610396475770925, + "grad_norm": 2.0432539431091405, + "learning_rate": 2.0088942538697687e-06, + "loss": 0.593653678894043, + "step": 4570 + }, + { + "epoch": 1.610748898678414, + "grad_norm": 1.913622035178548, + "learning_rate": 2.005392874033646e-06, + "loss": 0.5570813417434692, + "step": 4571 + }, + { + "epoch": 1.6111013215859031, + "grad_norm": 1.7912413841249368, + "learning_rate": 2.0018942081178426e-06, + "loss": 0.6723357439041138, + "step": 4572 + }, + { + "epoch": 1.611453744493392, + "grad_norm": 1.8833118579628767, + "learning_rate": 1.9983982573100413e-06, + "loss": 0.5333940982818604, + "step": 4573 + }, + { + "epoch": 1.611806167400881, + "grad_norm": 2.1881508790927358, + "learning_rate": 1.9949050227970148e-06, + "loss": 0.6404193043708801, + "step": 4574 + }, + { + "epoch": 1.6121585903083702, + "grad_norm": 1.9103565569987608, + "learning_rate": 1.991414505764605e-06, + "loss": 0.6831241250038147, + "step": 4575 + }, + { + "epoch": 1.612511013215859, + "grad_norm": 2.3229832844307063, + "learning_rate": 1.9879267073977337e-06, + "loss": 0.6741847991943359, + "step": 4576 + }, + { + "epoch": 1.612863436123348, + "grad_norm": 1.843434925588856, + "learning_rate": 1.9844416288804004e-06, + "loss": 0.5234787464141846, + "step": 4577 + }, + { + "epoch": 1.613215859030837, + "grad_norm": 1.931234115746558, + "learning_rate": 1.9809592713956782e-06, + "loss": 0.6462803483009338, + "step": 4578 + }, + { + "epoch": 1.613568281938326, + "grad_norm": 2.495392945939654, + "learning_rate": 1.977479636125724e-06, + "loss": 0.612025797367096, + "step": 4579 + }, + { + "epoch": 1.6139207048458148, + "grad_norm": 1.6414504893846202, + "learning_rate": 1.9740027242517668e-06, + "loss": 0.5065322518348694, + "step": 4580 + }, + { + "epoch": 1.614273127753304, + "grad_norm": 1.9613495904560583, + "learning_rate": 1.9705285369540994e-06, + "loss": 0.4986911714076996, + "step": 4581 + }, + { + "epoch": 1.614625550660793, + "grad_norm": 2.1185650604413926, + "learning_rate": 1.967057075412111e-06, + "loss": 0.6030969619750977, + "step": 4582 + }, + { + "epoch": 1.614977973568282, + "grad_norm": 1.8032946015429019, + "learning_rate": 1.963588340804251e-06, + "loss": 0.6116718649864197, + "step": 4583 + }, + { + "epoch": 1.615330396475771, + "grad_norm": 1.9008591407855147, + "learning_rate": 1.960122334308047e-06, + "loss": 0.8064850568771362, + "step": 4584 + }, + { + "epoch": 1.61568281938326, + "grad_norm": 2.130250646945173, + "learning_rate": 1.9566590571000997e-06, + "loss": 0.7416974306106567, + "step": 4585 + }, + { + "epoch": 1.616035242290749, + "grad_norm": 2.0285944926888604, + "learning_rate": 1.9531985103560813e-06, + "loss": 0.48169833421707153, + "step": 4586 + }, + { + "epoch": 1.6163876651982378, + "grad_norm": 8.08226040018375, + "learning_rate": 1.949740695250746e-06, + "loss": 0.7766422629356384, + "step": 4587 + }, + { + "epoch": 1.6167400881057268, + "grad_norm": 1.6227557131714891, + "learning_rate": 1.9462856129579144e-06, + "loss": 0.3793888986110687, + "step": 4588 + }, + { + "epoch": 1.6170925110132157, + "grad_norm": 1.6662726387585254, + "learning_rate": 1.94283326465047e-06, + "loss": 0.6129955053329468, + "step": 4589 + }, + { + "epoch": 1.6174449339207049, + "grad_norm": 1.927411767174183, + "learning_rate": 1.9393836515003874e-06, + "loss": 0.7420347929000854, + "step": 4590 + }, + { + "epoch": 1.6177973568281938, + "grad_norm": 1.810002162071199, + "learning_rate": 1.9359367746786993e-06, + "loss": 0.49013108015060425, + "step": 4591 + }, + { + "epoch": 1.618149779735683, + "grad_norm": 1.8150752517575908, + "learning_rate": 1.932492635355513e-06, + "loss": 0.5198413133621216, + "step": 4592 + }, + { + "epoch": 1.618502202643172, + "grad_norm": 1.9402976415289777, + "learning_rate": 1.929051234700007e-06, + "loss": 0.6031092405319214, + "step": 4593 + }, + { + "epoch": 1.6188546255506608, + "grad_norm": 2.041490312444486, + "learning_rate": 1.9256125738804264e-06, + "loss": 0.6269406080245972, + "step": 4594 + }, + { + "epoch": 1.6192070484581498, + "grad_norm": 1.801972947869227, + "learning_rate": 1.922176654064096e-06, + "loss": 0.4518774747848511, + "step": 4595 + }, + { + "epoch": 1.6195594713656387, + "grad_norm": 1.8680481961289441, + "learning_rate": 1.9187434764174027e-06, + "loss": 0.6199424862861633, + "step": 4596 + }, + { + "epoch": 1.6199118942731277, + "grad_norm": 2.634014207343412, + "learning_rate": 1.9153130421057955e-06, + "loss": 0.5155355930328369, + "step": 4597 + }, + { + "epoch": 1.6202643171806166, + "grad_norm": 1.8081505074484028, + "learning_rate": 1.9118853522938087e-06, + "loss": 0.6188424229621887, + "step": 4598 + }, + { + "epoch": 1.6206167400881057, + "grad_norm": 1.8999856535081827, + "learning_rate": 1.908460408145033e-06, + "loss": 0.5807337164878845, + "step": 4599 + }, + { + "epoch": 1.6209691629955947, + "grad_norm": 1.6142171687185456, + "learning_rate": 1.9050382108221311e-06, + "loss": 0.5258378982543945, + "step": 4600 + }, + { + "epoch": 1.6213215859030838, + "grad_norm": 1.9194714558474444, + "learning_rate": 1.9016187614868308e-06, + "loss": 0.6612311601638794, + "step": 4601 + }, + { + "epoch": 1.6216740088105728, + "grad_norm": 1.7849999472385678, + "learning_rate": 1.8982020612999285e-06, + "loss": 0.611383855342865, + "step": 4602 + }, + { + "epoch": 1.6220264317180617, + "grad_norm": 1.7599275323638883, + "learning_rate": 1.894788111421294e-06, + "loss": 0.6111105680465698, + "step": 4603 + }, + { + "epoch": 1.6223788546255506, + "grad_norm": 2.061255928544227, + "learning_rate": 1.8913769130098504e-06, + "loss": 0.7554557919502258, + "step": 4604 + }, + { + "epoch": 1.6227312775330396, + "grad_norm": 1.7818402726516558, + "learning_rate": 1.887968467223591e-06, + "loss": 0.597324013710022, + "step": 4605 + }, + { + "epoch": 1.6230837004405285, + "grad_norm": 2.3192399293978014, + "learning_rate": 1.8845627752195839e-06, + "loss": 0.6232750415802002, + "step": 4606 + }, + { + "epoch": 1.6234361233480175, + "grad_norm": 1.7697166073683794, + "learning_rate": 1.8811598381539543e-06, + "loss": 0.45699936151504517, + "step": 4607 + }, + { + "epoch": 1.6237885462555066, + "grad_norm": 1.9980768091261172, + "learning_rate": 1.87775965718189e-06, + "loss": 0.5307953953742981, + "step": 4608 + }, + { + "epoch": 1.6241409691629956, + "grad_norm": 1.8817640717556428, + "learning_rate": 1.8743622334576495e-06, + "loss": 0.6013764142990112, + "step": 4609 + }, + { + "epoch": 1.6244933920704847, + "grad_norm": 2.0614740198183066, + "learning_rate": 1.8709675681345485e-06, + "loss": 0.5143340826034546, + "step": 4610 + }, + { + "epoch": 1.6248458149779736, + "grad_norm": 1.6895900050976231, + "learning_rate": 1.8675756623649788e-06, + "loss": 0.506861686706543, + "step": 4611 + }, + { + "epoch": 1.6251982378854626, + "grad_norm": 2.223885866703504, + "learning_rate": 1.8641865173003793e-06, + "loss": 0.6807849407196045, + "step": 4612 + }, + { + "epoch": 1.6255506607929515, + "grad_norm": 1.8930990565263293, + "learning_rate": 1.8608001340912573e-06, + "loss": 0.592629075050354, + "step": 4613 + }, + { + "epoch": 1.6259030837004405, + "grad_norm": 2.032831166123834, + "learning_rate": 1.8574165138871925e-06, + "loss": 0.5669249296188354, + "step": 4614 + }, + { + "epoch": 1.6262555066079294, + "grad_norm": 1.9071887451281335, + "learning_rate": 1.8540356578368135e-06, + "loss": 0.7123057246208191, + "step": 4615 + }, + { + "epoch": 1.6266079295154185, + "grad_norm": 1.7499585996323015, + "learning_rate": 1.8506575670878168e-06, + "loss": 0.5844429731369019, + "step": 4616 + }, + { + "epoch": 1.6269603524229075, + "grad_norm": 1.8176797951508414, + "learning_rate": 1.8472822427869597e-06, + "loss": 0.661457359790802, + "step": 4617 + }, + { + "epoch": 1.6273127753303964, + "grad_norm": 1.9714232511915755, + "learning_rate": 1.8439096860800565e-06, + "loss": 0.6944575905799866, + "step": 4618 + }, + { + "epoch": 1.6276651982378856, + "grad_norm": 1.9471855664955058, + "learning_rate": 1.8405398981119927e-06, + "loss": 0.5818712115287781, + "step": 4619 + }, + { + "epoch": 1.6280176211453745, + "grad_norm": 1.8573981084806426, + "learning_rate": 1.8371728800266964e-06, + "loss": 0.6373921632766724, + "step": 4620 + }, + { + "epoch": 1.6283700440528635, + "grad_norm": 1.8455409169726698, + "learning_rate": 1.8338086329671734e-06, + "loss": 0.4629862904548645, + "step": 4621 + }, + { + "epoch": 1.6287224669603524, + "grad_norm": 2.1547215929268306, + "learning_rate": 1.8304471580754779e-06, + "loss": 0.6537790894508362, + "step": 4622 + }, + { + "epoch": 1.6290748898678413, + "grad_norm": 1.9071168587624383, + "learning_rate": 1.8270884564927272e-06, + "loss": 0.527474582195282, + "step": 4623 + }, + { + "epoch": 1.6294273127753303, + "grad_norm": 1.9134019886674338, + "learning_rate": 1.8237325293590934e-06, + "loss": 0.48941463232040405, + "step": 4624 + }, + { + "epoch": 1.6297797356828194, + "grad_norm": 1.7797372995747724, + "learning_rate": 1.8203793778138123e-06, + "loss": 0.6276243925094604, + "step": 4625 + }, + { + "epoch": 1.6301321585903084, + "grad_norm": 2.175835170708709, + "learning_rate": 1.8170290029951708e-06, + "loss": 0.6339844465255737, + "step": 4626 + }, + { + "epoch": 1.6304845814977975, + "grad_norm": 1.8667689453086813, + "learning_rate": 1.813681406040524e-06, + "loss": 0.517188549041748, + "step": 4627 + }, + { + "epoch": 1.6308370044052865, + "grad_norm": 1.8956914399941025, + "learning_rate": 1.8103365880862667e-06, + "loss": 0.576552152633667, + "step": 4628 + }, + { + "epoch": 1.6311894273127754, + "grad_norm": 1.7936413452903872, + "learning_rate": 1.8069945502678688e-06, + "loss": 0.5703557729721069, + "step": 4629 + }, + { + "epoch": 1.6315418502202643, + "grad_norm": 1.9048409586347532, + "learning_rate": 1.8036552937198447e-06, + "loss": 0.538072943687439, + "step": 4630 + }, + { + "epoch": 1.6318942731277533, + "grad_norm": 1.6721149802212347, + "learning_rate": 1.8003188195757693e-06, + "loss": 0.4144761562347412, + "step": 4631 + }, + { + "epoch": 1.6322466960352422, + "grad_norm": 2.056410628146389, + "learning_rate": 1.7969851289682704e-06, + "loss": 0.5357951521873474, + "step": 4632 + }, + { + "epoch": 1.6325991189427311, + "grad_norm": 1.9601913826257962, + "learning_rate": 1.7936542230290333e-06, + "loss": 0.6158766746520996, + "step": 4633 + }, + { + "epoch": 1.6329515418502203, + "grad_norm": 2.018782202231636, + "learning_rate": 1.790326102888794e-06, + "loss": 0.7278525233268738, + "step": 4634 + }, + { + "epoch": 1.6333039647577092, + "grad_norm": 1.8937378067838377, + "learning_rate": 1.787000769677354e-06, + "loss": 0.5113881230354309, + "step": 4635 + }, + { + "epoch": 1.6336563876651984, + "grad_norm": 2.2218997592930987, + "learning_rate": 1.7836782245235485e-06, + "loss": 0.6247432827949524, + "step": 4636 + }, + { + "epoch": 1.6340088105726873, + "grad_norm": 1.9409043558834718, + "learning_rate": 1.7803584685552877e-06, + "loss": 0.513325572013855, + "step": 4637 + }, + { + "epoch": 1.6343612334801763, + "grad_norm": 2.023194297584799, + "learning_rate": 1.7770415028995213e-06, + "loss": 0.4980276823043823, + "step": 4638 + }, + { + "epoch": 1.6347136563876652, + "grad_norm": 1.8669544509684106, + "learning_rate": 1.7737273286822565e-06, + "loss": 0.5832515954971313, + "step": 4639 + }, + { + "epoch": 1.6350660792951541, + "grad_norm": 1.7519671458346908, + "learning_rate": 1.7704159470285532e-06, + "loss": 0.6030116081237793, + "step": 4640 + }, + { + "epoch": 1.635418502202643, + "grad_norm": 2.26980120712081, + "learning_rate": 1.7671073590625188e-06, + "loss": 0.5494866371154785, + "step": 4641 + }, + { + "epoch": 1.635770925110132, + "grad_norm": 1.8803060042220399, + "learning_rate": 1.7638015659073216e-06, + "loss": 0.617791473865509, + "step": 4642 + }, + { + "epoch": 1.6361233480176212, + "grad_norm": 1.8809591920257003, + "learning_rate": 1.760498568685175e-06, + "loss": 0.5213589668273926, + "step": 4643 + }, + { + "epoch": 1.63647577092511, + "grad_norm": 1.7835752431606857, + "learning_rate": 1.7571983685173367e-06, + "loss": 0.5114192962646484, + "step": 4644 + }, + { + "epoch": 1.6368281938325993, + "grad_norm": 1.8264916856765907, + "learning_rate": 1.7539009665241291e-06, + "loss": 0.6207156181335449, + "step": 4645 + }, + { + "epoch": 1.6371806167400882, + "grad_norm": 1.7037955383522276, + "learning_rate": 1.750606363824915e-06, + "loss": 0.5893350839614868, + "step": 4646 + }, + { + "epoch": 1.6375330396475771, + "grad_norm": 2.0239756750398077, + "learning_rate": 1.7473145615381092e-06, + "loss": 0.6453898549079895, + "step": 4647 + }, + { + "epoch": 1.637885462555066, + "grad_norm": 1.623565893456343, + "learning_rate": 1.7440255607811773e-06, + "loss": 0.5098680853843689, + "step": 4648 + }, + { + "epoch": 1.638237885462555, + "grad_norm": 1.9009179186379688, + "learning_rate": 1.7407393626706305e-06, + "loss": 0.5841408967971802, + "step": 4649 + }, + { + "epoch": 1.638590308370044, + "grad_norm": 1.8903189372223002, + "learning_rate": 1.7374559683220337e-06, + "loss": 0.5593127012252808, + "step": 4650 + }, + { + "epoch": 1.638942731277533, + "grad_norm": 1.9192509501465884, + "learning_rate": 1.7341753788499983e-06, + "loss": 0.6885190606117249, + "step": 4651 + }, + { + "epoch": 1.639295154185022, + "grad_norm": 2.019948918382337, + "learning_rate": 1.730897595368175e-06, + "loss": 0.6271092891693115, + "step": 4652 + }, + { + "epoch": 1.639647577092511, + "grad_norm": 1.8193728432309102, + "learning_rate": 1.7276226189892763e-06, + "loss": 0.6035536527633667, + "step": 4653 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 1.876741558260643, + "learning_rate": 1.724350450825052e-06, + "loss": 0.49980080127716064, + "step": 4654 + }, + { + "epoch": 1.640352422907489, + "grad_norm": 1.945483701689467, + "learning_rate": 1.721081091986303e-06, + "loss": 0.6056489944458008, + "step": 4655 + }, + { + "epoch": 1.640704845814978, + "grad_norm": 1.998934183218588, + "learning_rate": 1.717814543582873e-06, + "loss": 0.5611459016799927, + "step": 4656 + }, + { + "epoch": 1.641057268722467, + "grad_norm": 1.8501618159787931, + "learning_rate": 1.7145508067236515e-06, + "loss": 0.5655262470245361, + "step": 4657 + }, + { + "epoch": 1.6414096916299559, + "grad_norm": 2.17470073262635, + "learning_rate": 1.7112898825165814e-06, + "loss": 0.7793601751327515, + "step": 4658 + }, + { + "epoch": 1.6417621145374448, + "grad_norm": 1.8078904709838137, + "learning_rate": 1.7080317720686434e-06, + "loss": 0.6587018370628357, + "step": 4659 + }, + { + "epoch": 1.642114537444934, + "grad_norm": 2.0052578395520313, + "learning_rate": 1.7047764764858598e-06, + "loss": 0.5546305775642395, + "step": 4660 + }, + { + "epoch": 1.642466960352423, + "grad_norm": 2.2168924782846844, + "learning_rate": 1.7015239968733066e-06, + "loss": 0.6215736865997314, + "step": 4661 + }, + { + "epoch": 1.642819383259912, + "grad_norm": 1.95167913439103, + "learning_rate": 1.6982743343350983e-06, + "loss": 0.5772532224655151, + "step": 4662 + }, + { + "epoch": 1.643171806167401, + "grad_norm": 1.9049742666250684, + "learning_rate": 1.6950274899743947e-06, + "loss": 0.567034900188446, + "step": 4663 + }, + { + "epoch": 1.64352422907489, + "grad_norm": 1.6486603082479945, + "learning_rate": 1.6917834648933985e-06, + "loss": 0.5306716561317444, + "step": 4664 + }, + { + "epoch": 1.6438766519823789, + "grad_norm": 1.923372734442966, + "learning_rate": 1.688542260193351e-06, + "loss": 0.6691634654998779, + "step": 4665 + }, + { + "epoch": 1.6442290748898678, + "grad_norm": 1.9073972200097022, + "learning_rate": 1.6853038769745466e-06, + "loss": 0.6071977615356445, + "step": 4666 + }, + { + "epoch": 1.6445814977973567, + "grad_norm": 3.0113580201176355, + "learning_rate": 1.6820683163363161e-06, + "loss": 0.743544340133667, + "step": 4667 + }, + { + "epoch": 1.6449339207048457, + "grad_norm": 2.2198521832647864, + "learning_rate": 1.6788355793770238e-06, + "loss": 0.5745127201080322, + "step": 4668 + }, + { + "epoch": 1.6452863436123348, + "grad_norm": 2.083730313741091, + "learning_rate": 1.6756056671940902e-06, + "loss": 0.5153336524963379, + "step": 4669 + }, + { + "epoch": 1.6456387665198238, + "grad_norm": 1.888215895134721, + "learning_rate": 1.6723785808839666e-06, + "loss": 0.5780388116836548, + "step": 4670 + }, + { + "epoch": 1.645991189427313, + "grad_norm": 1.975333041709577, + "learning_rate": 1.6691543215421513e-06, + "loss": 0.601921796798706, + "step": 4671 + }, + { + "epoch": 1.6463436123348019, + "grad_norm": 1.8402715148458082, + "learning_rate": 1.6659328902631766e-06, + "loss": 0.6636123657226562, + "step": 4672 + }, + { + "epoch": 1.6466960352422908, + "grad_norm": 1.804292320266694, + "learning_rate": 1.6627142881406188e-06, + "loss": 0.45225393772125244, + "step": 4673 + }, + { + "epoch": 1.6470484581497797, + "grad_norm": 1.862693343451114, + "learning_rate": 1.6594985162670984e-06, + "loss": 0.6406756043434143, + "step": 4674 + }, + { + "epoch": 1.6474008810572687, + "grad_norm": 2.11645792406816, + "learning_rate": 1.6562855757342632e-06, + "loss": 0.6735906600952148, + "step": 4675 + }, + { + "epoch": 1.6477533039647576, + "grad_norm": 1.9503356292211693, + "learning_rate": 1.6530754676328064e-06, + "loss": 0.515188992023468, + "step": 4676 + }, + { + "epoch": 1.6481057268722465, + "grad_norm": 1.721977079638204, + "learning_rate": 1.6498681930524652e-06, + "loss": 0.5976129174232483, + "step": 4677 + }, + { + "epoch": 1.6484581497797357, + "grad_norm": 1.9285425022468947, + "learning_rate": 1.6466637530820074e-06, + "loss": 0.7367427945137024, + "step": 4678 + }, + { + "epoch": 1.6488105726872246, + "grad_norm": 2.073959448612198, + "learning_rate": 1.6434621488092385e-06, + "loss": 0.5173717737197876, + "step": 4679 + }, + { + "epoch": 1.6491629955947138, + "grad_norm": 2.1289983497571745, + "learning_rate": 1.6402633813210056e-06, + "loss": 0.7961066961288452, + "step": 4680 + }, + { + "epoch": 1.6495154185022027, + "grad_norm": 2.1150632325299488, + "learning_rate": 1.637067451703187e-06, + "loss": 0.8271595239639282, + "step": 4681 + }, + { + "epoch": 1.6498678414096917, + "grad_norm": 1.9513356704584446, + "learning_rate": 1.6338743610407103e-06, + "loss": 0.6818888783454895, + "step": 4682 + }, + { + "epoch": 1.6502202643171806, + "grad_norm": 2.182931567425792, + "learning_rate": 1.6306841104175219e-06, + "loss": 0.5168178677558899, + "step": 4683 + }, + { + "epoch": 1.6505726872246695, + "grad_norm": 1.8122401400933128, + "learning_rate": 1.627496700916612e-06, + "loss": 0.5792043209075928, + "step": 4684 + }, + { + "epoch": 1.6509251101321585, + "grad_norm": 1.8346977982265331, + "learning_rate": 1.624312133620013e-06, + "loss": 0.6099069118499756, + "step": 4685 + }, + { + "epoch": 1.6512775330396474, + "grad_norm": 1.7489569966562013, + "learning_rate": 1.6211304096087832e-06, + "loss": 0.4562867283821106, + "step": 4686 + }, + { + "epoch": 1.6516299559471366, + "grad_norm": 1.767208393167573, + "learning_rate": 1.61795152996302e-06, + "loss": 0.48648780584335327, + "step": 4687 + }, + { + "epoch": 1.6519823788546255, + "grad_norm": 1.619888597224146, + "learning_rate": 1.6147754957618533e-06, + "loss": 0.5351820588111877, + "step": 4688 + }, + { + "epoch": 1.6523348017621147, + "grad_norm": 1.5845894367063569, + "learning_rate": 1.6116023080834442e-06, + "loss": 0.4646923542022705, + "step": 4689 + }, + { + "epoch": 1.6526872246696036, + "grad_norm": 2.0533783205545304, + "learning_rate": 1.608431968005001e-06, + "loss": 0.6257984638214111, + "step": 4690 + }, + { + "epoch": 1.6530396475770925, + "grad_norm": 1.6714052981831828, + "learning_rate": 1.605264476602747e-06, + "loss": 0.5646224021911621, + "step": 4691 + }, + { + "epoch": 1.6533920704845815, + "grad_norm": 1.9704920715227376, + "learning_rate": 1.6020998349519457e-06, + "loss": 0.6074661612510681, + "step": 4692 + }, + { + "epoch": 1.6537444933920704, + "grad_norm": 2.119532209280586, + "learning_rate": 1.598938044126901e-06, + "loss": 0.703096866607666, + "step": 4693 + }, + { + "epoch": 1.6540969162995593, + "grad_norm": 2.0281924961410436, + "learning_rate": 1.5957791052009397e-06, + "loss": 0.6677542924880981, + "step": 4694 + }, + { + "epoch": 1.6544493392070485, + "grad_norm": 2.2554606939897313, + "learning_rate": 1.5926230192464232e-06, + "loss": 0.755639910697937, + "step": 4695 + }, + { + "epoch": 1.6548017621145374, + "grad_norm": 2.0543326225263705, + "learning_rate": 1.5894697873347442e-06, + "loss": 0.7008202075958252, + "step": 4696 + }, + { + "epoch": 1.6551541850220264, + "grad_norm": 1.7892378339322623, + "learning_rate": 1.5863194105363244e-06, + "loss": 0.5049681067466736, + "step": 4697 + }, + { + "epoch": 1.6555066079295155, + "grad_norm": 1.8394208195845667, + "learning_rate": 1.583171889920626e-06, + "loss": 0.7415407299995422, + "step": 4698 + }, + { + "epoch": 1.6558590308370045, + "grad_norm": 2.0103355889821404, + "learning_rate": 1.5800272265561256e-06, + "loss": 0.7949470281600952, + "step": 4699 + }, + { + "epoch": 1.6562114537444934, + "grad_norm": 2.2401604191268456, + "learning_rate": 1.5768854215103435e-06, + "loss": 0.5892510414123535, + "step": 4700 + }, + { + "epoch": 1.6565638766519823, + "grad_norm": 2.1732638193025076, + "learning_rate": 1.5737464758498243e-06, + "loss": 0.5357394814491272, + "step": 4701 + }, + { + "epoch": 1.6569162995594713, + "grad_norm": 1.92797804038562, + "learning_rate": 1.5706103906401416e-06, + "loss": 0.6078016757965088, + "step": 4702 + }, + { + "epoch": 1.6572687224669602, + "grad_norm": 2.019695184899454, + "learning_rate": 1.5674771669459e-06, + "loss": 0.5858899354934692, + "step": 4703 + }, + { + "epoch": 1.6576211453744494, + "grad_norm": 1.819706102358174, + "learning_rate": 1.5643468058307287e-06, + "loss": 0.5783329010009766, + "step": 4704 + }, + { + "epoch": 1.6579735682819383, + "grad_norm": 1.8104985438999985, + "learning_rate": 1.561219308357288e-06, + "loss": 0.5412800312042236, + "step": 4705 + }, + { + "epoch": 1.6583259911894275, + "grad_norm": 1.5193820753894371, + "learning_rate": 1.5580946755872727e-06, + "loss": 0.5609365701675415, + "step": 4706 + }, + { + "epoch": 1.6586784140969164, + "grad_norm": 2.2157168701611427, + "learning_rate": 1.554972908581388e-06, + "loss": 0.45193177461624146, + "step": 4707 + }, + { + "epoch": 1.6590308370044053, + "grad_norm": 1.885008861796424, + "learning_rate": 1.5518540083993838e-06, + "loss": 0.6402257680892944, + "step": 4708 + }, + { + "epoch": 1.6593832599118943, + "grad_norm": 1.906792902482494, + "learning_rate": 1.5487379761000276e-06, + "loss": 0.5956071615219116, + "step": 4709 + }, + { + "epoch": 1.6597356828193832, + "grad_norm": 1.5229764109639101, + "learning_rate": 1.5456248127411156e-06, + "loss": 0.5975273847579956, + "step": 4710 + }, + { + "epoch": 1.6600881057268722, + "grad_norm": 2.2860844716103514, + "learning_rate": 1.54251451937947e-06, + "loss": 0.6914929151535034, + "step": 4711 + }, + { + "epoch": 1.660440528634361, + "grad_norm": 2.141875122923791, + "learning_rate": 1.5394070970709384e-06, + "loss": 0.5867592096328735, + "step": 4712 + }, + { + "epoch": 1.6607929515418502, + "grad_norm": 1.96612759503979, + "learning_rate": 1.5363025468703917e-06, + "loss": 0.6448687314987183, + "step": 4713 + }, + { + "epoch": 1.6611453744493392, + "grad_norm": 1.8452223088884994, + "learning_rate": 1.5332008698317348e-06, + "loss": 0.5870485305786133, + "step": 4714 + }, + { + "epoch": 1.6614977973568283, + "grad_norm": 1.9043935409080608, + "learning_rate": 1.5301020670078803e-06, + "loss": 0.6336855888366699, + "step": 4715 + }, + { + "epoch": 1.6618502202643173, + "grad_norm": 1.9247468731228787, + "learning_rate": 1.527006139450784e-06, + "loss": 0.5924787521362305, + "step": 4716 + }, + { + "epoch": 1.6622026431718062, + "grad_norm": 1.5860620334804822, + "learning_rate": 1.523913088211415e-06, + "loss": 0.5817830562591553, + "step": 4717 + }, + { + "epoch": 1.6625550660792952, + "grad_norm": 1.8285246452015176, + "learning_rate": 1.5208229143397657e-06, + "loss": 0.5836390852928162, + "step": 4718 + }, + { + "epoch": 1.662907488986784, + "grad_norm": 1.5094327417455158, + "learning_rate": 1.5177356188848558e-06, + "loss": 0.47110515832901, + "step": 4719 + }, + { + "epoch": 1.663259911894273, + "grad_norm": 2.018838906344594, + "learning_rate": 1.5146512028947225e-06, + "loss": 0.6376635432243347, + "step": 4720 + }, + { + "epoch": 1.663612334801762, + "grad_norm": 1.7847798861513196, + "learning_rate": 1.5115696674164349e-06, + "loss": 0.6399784088134766, + "step": 4721 + }, + { + "epoch": 1.6639647577092511, + "grad_norm": 2.2125247577405127, + "learning_rate": 1.5084910134960773e-06, + "loss": 0.5891954898834229, + "step": 4722 + }, + { + "epoch": 1.66431718061674, + "grad_norm": 1.5827717360956535, + "learning_rate": 1.5054152421787505e-06, + "loss": 0.6358054876327515, + "step": 4723 + }, + { + "epoch": 1.6646696035242292, + "grad_norm": 1.855029533228232, + "learning_rate": 1.5023423545085892e-06, + "loss": 0.5072367191314697, + "step": 4724 + }, + { + "epoch": 1.6650220264317181, + "grad_norm": 1.8866512172900913, + "learning_rate": 1.4992723515287423e-06, + "loss": 0.5549881458282471, + "step": 4725 + }, + { + "epoch": 1.665374449339207, + "grad_norm": 1.5386308243299962, + "learning_rate": 1.4962052342813804e-06, + "loss": 0.4833364188671112, + "step": 4726 + }, + { + "epoch": 1.665726872246696, + "grad_norm": 1.837283227568624, + "learning_rate": 1.4931410038076922e-06, + "loss": 0.6183017492294312, + "step": 4727 + }, + { + "epoch": 1.666079295154185, + "grad_norm": 1.8220970545699078, + "learning_rate": 1.4900796611478885e-06, + "loss": 0.4956335127353668, + "step": 4728 + }, + { + "epoch": 1.666431718061674, + "grad_norm": 2.0221134241832552, + "learning_rate": 1.4870212073412027e-06, + "loss": 0.7345337271690369, + "step": 4729 + }, + { + "epoch": 1.6667841409691628, + "grad_norm": 1.5143426871240313, + "learning_rate": 1.4839656434258864e-06, + "loss": 0.5324833393096924, + "step": 4730 + }, + { + "epoch": 1.667136563876652, + "grad_norm": 1.7373474898452002, + "learning_rate": 1.4809129704392e-06, + "loss": 0.5702322125434875, + "step": 4731 + }, + { + "epoch": 1.667488986784141, + "grad_norm": 1.7374551868496027, + "learning_rate": 1.4778631894174389e-06, + "loss": 0.46188884973526, + "step": 4732 + }, + { + "epoch": 1.66784140969163, + "grad_norm": 1.76911142349316, + "learning_rate": 1.474816301395906e-06, + "loss": 0.5505924224853516, + "step": 4733 + }, + { + "epoch": 1.668193832599119, + "grad_norm": 1.9422422566247162, + "learning_rate": 1.4717723074089251e-06, + "loss": 0.5889710187911987, + "step": 4734 + }, + { + "epoch": 1.668546255506608, + "grad_norm": 2.1059796200005825, + "learning_rate": 1.4687312084898387e-06, + "loss": 0.5794551372528076, + "step": 4735 + }, + { + "epoch": 1.668898678414097, + "grad_norm": 1.8650983467603144, + "learning_rate": 1.4656930056710006e-06, + "loss": 0.567146897315979, + "step": 4736 + }, + { + "epoch": 1.6692511013215858, + "grad_norm": 2.0850787713289067, + "learning_rate": 1.4626576999837938e-06, + "loss": 0.5330451130867004, + "step": 4737 + }, + { + "epoch": 1.6696035242290748, + "grad_norm": 1.868870689701364, + "learning_rate": 1.459625292458604e-06, + "loss": 0.5631227493286133, + "step": 4738 + }, + { + "epoch": 1.669955947136564, + "grad_norm": 1.8773546830623118, + "learning_rate": 1.456595784124839e-06, + "loss": 0.5145374536514282, + "step": 4739 + }, + { + "epoch": 1.6703083700440529, + "grad_norm": 1.925388120075487, + "learning_rate": 1.453569176010927e-06, + "loss": 0.59378582239151, + "step": 4740 + }, + { + "epoch": 1.6706607929515418, + "grad_norm": 2.053494266916917, + "learning_rate": 1.4505454691443043e-06, + "loss": 0.5827980041503906, + "step": 4741 + }, + { + "epoch": 1.671013215859031, + "grad_norm": 1.6369511357690396, + "learning_rate": 1.4475246645514274e-06, + "loss": 0.5270858407020569, + "step": 4742 + }, + { + "epoch": 1.67136563876652, + "grad_norm": 2.2160806515473186, + "learning_rate": 1.4445067632577625e-06, + "loss": 0.4708535373210907, + "step": 4743 + }, + { + "epoch": 1.6717180616740088, + "grad_norm": 1.6498079624073576, + "learning_rate": 1.4414917662877924e-06, + "loss": 0.5505239963531494, + "step": 4744 + }, + { + "epoch": 1.6720704845814978, + "grad_norm": 1.8451834665357323, + "learning_rate": 1.4384796746650221e-06, + "loss": 0.5841302871704102, + "step": 4745 + }, + { + "epoch": 1.6724229074889867, + "grad_norm": 1.8665624096794386, + "learning_rate": 1.4354704894119554e-06, + "loss": 0.627534031867981, + "step": 4746 + }, + { + "epoch": 1.6727753303964756, + "grad_norm": 1.9526983627618448, + "learning_rate": 1.4324642115501165e-06, + "loss": 0.6160094738006592, + "step": 4747 + }, + { + "epoch": 1.6731277533039648, + "grad_norm": 1.6909104461316946, + "learning_rate": 1.4294608421000489e-06, + "loss": 0.5420609712600708, + "step": 4748 + }, + { + "epoch": 1.6734801762114537, + "grad_norm": 1.9597720364889828, + "learning_rate": 1.4264603820813006e-06, + "loss": 0.7729714512825012, + "step": 4749 + }, + { + "epoch": 1.6738325991189429, + "grad_norm": 1.8780592513411432, + "learning_rate": 1.4234628325124345e-06, + "loss": 0.6458747386932373, + "step": 4750 + }, + { + "epoch": 1.6741850220264318, + "grad_norm": 1.717642350217617, + "learning_rate": 1.4204681944110242e-06, + "loss": 0.5250670313835144, + "step": 4751 + }, + { + "epoch": 1.6745374449339208, + "grad_norm": 1.9945004637909651, + "learning_rate": 1.4174764687936548e-06, + "loss": 0.4985695779323578, + "step": 4752 + }, + { + "epoch": 1.6748898678414097, + "grad_norm": 2.1058731387570253, + "learning_rate": 1.4144876566759303e-06, + "loss": 0.6401104927062988, + "step": 4753 + }, + { + "epoch": 1.6752422907488986, + "grad_norm": 1.8242149794974472, + "learning_rate": 1.4115017590724534e-06, + "loss": 0.5991432666778564, + "step": 4754 + }, + { + "epoch": 1.6755947136563876, + "grad_norm": 1.8977892116048576, + "learning_rate": 1.4085187769968433e-06, + "loss": 0.7787071466445923, + "step": 4755 + }, + { + "epoch": 1.6759471365638765, + "grad_norm": 1.8915051082154768, + "learning_rate": 1.4055387114617336e-06, + "loss": 0.6977101564407349, + "step": 4756 + }, + { + "epoch": 1.6762995594713657, + "grad_norm": 1.7182871374874729, + "learning_rate": 1.4025615634787616e-06, + "loss": 0.541371762752533, + "step": 4757 + }, + { + "epoch": 1.6766519823788546, + "grad_norm": 1.8269477268502086, + "learning_rate": 1.3995873340585765e-06, + "loss": 0.5548759698867798, + "step": 4758 + }, + { + "epoch": 1.6770044052863438, + "grad_norm": 1.947083457475871, + "learning_rate": 1.3966160242108373e-06, + "loss": 0.6022241115570068, + "step": 4759 + }, + { + "epoch": 1.6773568281938327, + "grad_norm": 2.1542376603491946, + "learning_rate": 1.3936476349442074e-06, + "loss": 0.4965083599090576, + "step": 4760 + }, + { + "epoch": 1.6777092511013216, + "grad_norm": 2.4193138120349227, + "learning_rate": 1.3906821672663706e-06, + "loss": 0.6712369918823242, + "step": 4761 + }, + { + "epoch": 1.6780616740088106, + "grad_norm": 1.8527401573304754, + "learning_rate": 1.3877196221840038e-06, + "loss": 0.6236127614974976, + "step": 4762 + }, + { + "epoch": 1.6784140969162995, + "grad_norm": 1.9836915293869917, + "learning_rate": 1.3847600007027996e-06, + "loss": 0.7144246101379395, + "step": 4763 + }, + { + "epoch": 1.6787665198237884, + "grad_norm": 1.926900514093349, + "learning_rate": 1.3818033038274602e-06, + "loss": 0.650489091873169, + "step": 4764 + }, + { + "epoch": 1.6791189427312774, + "grad_norm": 1.896615210676468, + "learning_rate": 1.3788495325616912e-06, + "loss": 0.6711791157722473, + "step": 4765 + }, + { + "epoch": 1.6794713656387665, + "grad_norm": 1.6640253715487854, + "learning_rate": 1.375898687908206e-06, + "loss": 0.49629515409469604, + "step": 4766 + }, + { + "epoch": 1.6798237885462555, + "grad_norm": 2.3032521123469727, + "learning_rate": 1.372950770868724e-06, + "loss": 0.5843443870544434, + "step": 4767 + }, + { + "epoch": 1.6801762114537446, + "grad_norm": 1.7269921421841483, + "learning_rate": 1.3700057824439694e-06, + "loss": 0.6201150417327881, + "step": 4768 + }, + { + "epoch": 1.6805286343612336, + "grad_norm": 2.2518096795033746, + "learning_rate": 1.3670637236336815e-06, + "loss": 0.6149473190307617, + "step": 4769 + }, + { + "epoch": 1.6808810572687225, + "grad_norm": 1.8297389667337718, + "learning_rate": 1.3641245954365868e-06, + "loss": 0.476188987493515, + "step": 4770 + }, + { + "epoch": 1.6812334801762114, + "grad_norm": 1.6086104459565809, + "learning_rate": 1.361188398850436e-06, + "loss": 0.4850924015045166, + "step": 4771 + }, + { + "epoch": 1.6815859030837004, + "grad_norm": 2.8146145731538676, + "learning_rate": 1.3582551348719741e-06, + "loss": 0.6008634567260742, + "step": 4772 + }, + { + "epoch": 1.6819383259911893, + "grad_norm": 1.6382847925926618, + "learning_rate": 1.3553248044969525e-06, + "loss": 0.5383377075195312, + "step": 4773 + }, + { + "epoch": 1.6822907488986785, + "grad_norm": 1.966985389102481, + "learning_rate": 1.3523974087201274e-06, + "loss": 0.5711555480957031, + "step": 4774 + }, + { + "epoch": 1.6826431718061674, + "grad_norm": 1.915810750390724, + "learning_rate": 1.3494729485352586e-06, + "loss": 0.5267810821533203, + "step": 4775 + }, + { + "epoch": 1.6829955947136563, + "grad_norm": 1.968063769811982, + "learning_rate": 1.3465514249351076e-06, + "loss": 0.6203084588050842, + "step": 4776 + }, + { + "epoch": 1.6833480176211455, + "grad_norm": 1.604432029465195, + "learning_rate": 1.3436328389114473e-06, + "loss": 0.46672314405441284, + "step": 4777 + }, + { + "epoch": 1.6837004405286344, + "grad_norm": 2.175917964334397, + "learning_rate": 1.3407171914550366e-06, + "loss": 0.6375850439071655, + "step": 4778 + }, + { + "epoch": 1.6840528634361234, + "grad_norm": 1.7467776544405884, + "learning_rate": 1.337804483555656e-06, + "loss": 0.6162304282188416, + "step": 4779 + }, + { + "epoch": 1.6844052863436123, + "grad_norm": 2.0769560048267817, + "learning_rate": 1.3348947162020741e-06, + "loss": 0.7814363241195679, + "step": 4780 + }, + { + "epoch": 1.6847577092511012, + "grad_norm": 1.4969648698838118, + "learning_rate": 1.3319878903820682e-06, + "loss": 0.47330981492996216, + "step": 4781 + }, + { + "epoch": 1.6851101321585902, + "grad_norm": 2.184365435582337, + "learning_rate": 1.3290840070824163e-06, + "loss": 0.759529173374176, + "step": 4782 + }, + { + "epoch": 1.6854625550660793, + "grad_norm": 2.039688209679945, + "learning_rate": 1.326183067288893e-06, + "loss": 0.7727639675140381, + "step": 4783 + }, + { + "epoch": 1.6858149779735683, + "grad_norm": 1.9567066145193837, + "learning_rate": 1.3232850719862789e-06, + "loss": 0.6429058313369751, + "step": 4784 + }, + { + "epoch": 1.6861674008810574, + "grad_norm": 2.394172291442893, + "learning_rate": 1.3203900221583565e-06, + "loss": 0.5895540714263916, + "step": 4785 + }, + { + "epoch": 1.6865198237885464, + "grad_norm": 1.835232130498821, + "learning_rate": 1.317497918787899e-06, + "loss": 0.5410366058349609, + "step": 4786 + }, + { + "epoch": 1.6868722466960353, + "grad_norm": 2.025742052316709, + "learning_rate": 1.3146087628566894e-06, + "loss": 0.5144281387329102, + "step": 4787 + }, + { + "epoch": 1.6872246696035242, + "grad_norm": 2.3397936718705967, + "learning_rate": 1.311722555345506e-06, + "loss": 0.7025437355041504, + "step": 4788 + }, + { + "epoch": 1.6875770925110132, + "grad_norm": 1.7878267984100036, + "learning_rate": 1.3088392972341256e-06, + "loss": 0.6000303030014038, + "step": 4789 + }, + { + "epoch": 1.6879295154185021, + "grad_norm": 2.057293191164908, + "learning_rate": 1.3059589895013248e-06, + "loss": 0.5732883214950562, + "step": 4790 + }, + { + "epoch": 1.688281938325991, + "grad_norm": 1.83266529432573, + "learning_rate": 1.3030816331248785e-06, + "loss": 0.5908600091934204, + "step": 4791 + }, + { + "epoch": 1.6886343612334802, + "grad_norm": 1.7317933799112057, + "learning_rate": 1.3002072290815571e-06, + "loss": 0.5579310655593872, + "step": 4792 + }, + { + "epoch": 1.6889867841409691, + "grad_norm": 1.9713002881427846, + "learning_rate": 1.2973357783471385e-06, + "loss": 0.6439195871353149, + "step": 4793 + }, + { + "epoch": 1.6893392070484583, + "grad_norm": 2.1068548457431437, + "learning_rate": 1.2944672818963822e-06, + "loss": 0.6213329434394836, + "step": 4794 + }, + { + "epoch": 1.6896916299559472, + "grad_norm": 2.271454462540911, + "learning_rate": 1.291601740703059e-06, + "loss": 0.5875385999679565, + "step": 4795 + }, + { + "epoch": 1.6900440528634362, + "grad_norm": 1.8170542219049632, + "learning_rate": 1.2887391557399287e-06, + "loss": 0.7071372270584106, + "step": 4796 + }, + { + "epoch": 1.690396475770925, + "grad_norm": 1.9467140791895143, + "learning_rate": 1.2858795279787517e-06, + "loss": 0.504144549369812, + "step": 4797 + }, + { + "epoch": 1.690748898678414, + "grad_norm": 1.9410537548952953, + "learning_rate": 1.2830228583902816e-06, + "loss": 0.7081021070480347, + "step": 4798 + }, + { + "epoch": 1.691101321585903, + "grad_norm": 1.679906270518348, + "learning_rate": 1.2801691479442658e-06, + "loss": 0.5843057632446289, + "step": 4799 + }, + { + "epoch": 1.691453744493392, + "grad_norm": 2.366764710202919, + "learning_rate": 1.2773183976094571e-06, + "loss": 0.6437872052192688, + "step": 4800 + }, + { + "epoch": 1.691806167400881, + "grad_norm": 2.0784896951955125, + "learning_rate": 1.2744706083535907e-06, + "loss": 0.6945379972457886, + "step": 4801 + }, + { + "epoch": 1.69215859030837, + "grad_norm": 1.5919193491775196, + "learning_rate": 1.2716257811434019e-06, + "loss": 0.5827867984771729, + "step": 4802 + }, + { + "epoch": 1.6925110132158592, + "grad_norm": 2.6733992431993228, + "learning_rate": 1.2687839169446259e-06, + "loss": 0.5298784971237183, + "step": 4803 + }, + { + "epoch": 1.692863436123348, + "grad_norm": 2.069188268688187, + "learning_rate": 1.2659450167219834e-06, + "loss": 0.6885675191879272, + "step": 4804 + }, + { + "epoch": 1.693215859030837, + "grad_norm": 1.8639055588125417, + "learning_rate": 1.2631090814391945e-06, + "loss": 0.6902546882629395, + "step": 4805 + }, + { + "epoch": 1.693568281938326, + "grad_norm": 1.7529564321244235, + "learning_rate": 1.2602761120589713e-06, + "loss": 0.5972022414207458, + "step": 4806 + }, + { + "epoch": 1.693920704845815, + "grad_norm": 1.676709469876984, + "learning_rate": 1.2574461095430145e-06, + "loss": 0.5527150630950928, + "step": 4807 + }, + { + "epoch": 1.6942731277533039, + "grad_norm": 1.6073136228470446, + "learning_rate": 1.2546190748520294e-06, + "loss": 0.5898724794387817, + "step": 4808 + }, + { + "epoch": 1.6946255506607928, + "grad_norm": 1.7947012597219074, + "learning_rate": 1.2517950089457e-06, + "loss": 0.7023364901542664, + "step": 4809 + }, + { + "epoch": 1.694977973568282, + "grad_norm": 1.6620582383673839, + "learning_rate": 1.2489739127827083e-06, + "loss": 0.6016935706138611, + "step": 4810 + }, + { + "epoch": 1.6953303964757709, + "grad_norm": 1.8540813823422424, + "learning_rate": 1.246155787320733e-06, + "loss": 0.5724658966064453, + "step": 4811 + }, + { + "epoch": 1.69568281938326, + "grad_norm": 1.799661687922518, + "learning_rate": 1.2433406335164389e-06, + "loss": 0.5886126160621643, + "step": 4812 + }, + { + "epoch": 1.696035242290749, + "grad_norm": 1.7467523658249466, + "learning_rate": 1.2405284523254823e-06, + "loss": 0.6656844615936279, + "step": 4813 + }, + { + "epoch": 1.696387665198238, + "grad_norm": 1.9261091270854245, + "learning_rate": 1.237719244702511e-06, + "loss": 0.6208533644676208, + "step": 4814 + }, + { + "epoch": 1.6967400881057269, + "grad_norm": 2.00988865324314, + "learning_rate": 1.234913011601162e-06, + "loss": 0.6895248889923096, + "step": 4815 + }, + { + "epoch": 1.6970925110132158, + "grad_norm": 1.755568469718746, + "learning_rate": 1.23210975397407e-06, + "loss": 0.5525833368301392, + "step": 4816 + }, + { + "epoch": 1.6974449339207047, + "grad_norm": 1.9922303788563613, + "learning_rate": 1.2293094727728471e-06, + "loss": 0.5958225727081299, + "step": 4817 + }, + { + "epoch": 1.6977973568281939, + "grad_norm": 1.8538893137799548, + "learning_rate": 1.226512168948103e-06, + "loss": 0.7570905089378357, + "step": 4818 + }, + { + "epoch": 1.6981497797356828, + "grad_norm": 1.9122257264522353, + "learning_rate": 1.2237178434494379e-06, + "loss": 0.586568295955658, + "step": 4819 + }, + { + "epoch": 1.6985022026431718, + "grad_norm": 2.0787640518450257, + "learning_rate": 1.2209264972254365e-06, + "loss": 0.574797511100769, + "step": 4820 + }, + { + "epoch": 1.698854625550661, + "grad_norm": 1.8487761386635184, + "learning_rate": 1.2181381312236751e-06, + "loss": 0.6277909278869629, + "step": 4821 + }, + { + "epoch": 1.6992070484581498, + "grad_norm": 1.6733284685909569, + "learning_rate": 1.2153527463907155e-06, + "loss": 0.6308181285858154, + "step": 4822 + }, + { + "epoch": 1.6995594713656388, + "grad_norm": 1.493809970809451, + "learning_rate": 1.2125703436721091e-06, + "loss": 0.5076426267623901, + "step": 4823 + }, + { + "epoch": 1.6999118942731277, + "grad_norm": 1.9773484387601714, + "learning_rate": 1.2097909240123994e-06, + "loss": 0.7712167501449585, + "step": 4824 + }, + { + "epoch": 1.7002643171806167, + "grad_norm": 2.0486043522302517, + "learning_rate": 1.2070144883551072e-06, + "loss": 0.5964892506599426, + "step": 4825 + }, + { + "epoch": 1.7006167400881056, + "grad_norm": 2.112366587786807, + "learning_rate": 1.2042410376427472e-06, + "loss": 0.7473628520965576, + "step": 4826 + }, + { + "epoch": 1.7009691629955948, + "grad_norm": 2.17212484756118, + "learning_rate": 1.2014705728168219e-06, + "loss": 0.6897937059402466, + "step": 4827 + }, + { + "epoch": 1.7013215859030837, + "grad_norm": 1.734585395626036, + "learning_rate": 1.198703094817817e-06, + "loss": 0.5955557823181152, + "step": 4828 + }, + { + "epoch": 1.7016740088105728, + "grad_norm": 1.8689895875293312, + "learning_rate": 1.195938604585205e-06, + "loss": 0.7051092386245728, + "step": 4829 + }, + { + "epoch": 1.7020264317180618, + "grad_norm": 3.0972606123503836, + "learning_rate": 1.1931771030574446e-06, + "loss": 0.584572434425354, + "step": 4830 + }, + { + "epoch": 1.7023788546255507, + "grad_norm": 1.9375690474214398, + "learning_rate": 1.1904185911719768e-06, + "loss": 0.5691804885864258, + "step": 4831 + }, + { + "epoch": 1.7027312775330397, + "grad_norm": 2.088088383810667, + "learning_rate": 1.187663069865237e-06, + "loss": 0.5539572834968567, + "step": 4832 + }, + { + "epoch": 1.7030837004405286, + "grad_norm": 2.0666580745541956, + "learning_rate": 1.1849105400726324e-06, + "loss": 0.6229352951049805, + "step": 4833 + }, + { + "epoch": 1.7034361233480175, + "grad_norm": 1.9668453557048677, + "learning_rate": 1.1821610027285613e-06, + "loss": 0.7302919626235962, + "step": 4834 + }, + { + "epoch": 1.7037885462555065, + "grad_norm": 1.6339139133298357, + "learning_rate": 1.1794144587664113e-06, + "loss": 0.6802065372467041, + "step": 4835 + }, + { + "epoch": 1.7041409691629956, + "grad_norm": 1.8504754714684983, + "learning_rate": 1.1766709091185447e-06, + "loss": 0.6448635458946228, + "step": 4836 + }, + { + "epoch": 1.7044933920704846, + "grad_norm": 1.9824663488252348, + "learning_rate": 1.1739303547163138e-06, + "loss": 0.6141834259033203, + "step": 4837 + }, + { + "epoch": 1.7048458149779737, + "grad_norm": 1.8134017158002862, + "learning_rate": 1.1711927964900482e-06, + "loss": 0.5634737014770508, + "step": 4838 + }, + { + "epoch": 1.7051982378854627, + "grad_norm": 1.7474752293775022, + "learning_rate": 1.1684582353690642e-06, + "loss": 0.6786668300628662, + "step": 4839 + }, + { + "epoch": 1.7055506607929516, + "grad_norm": 1.8867560516103576, + "learning_rate": 1.1657266722816663e-06, + "loss": 0.6117962002754211, + "step": 4840 + }, + { + "epoch": 1.7059030837004405, + "grad_norm": 1.8617179395667027, + "learning_rate": 1.1629981081551278e-06, + "loss": 0.6115782260894775, + "step": 4841 + }, + { + "epoch": 1.7062555066079295, + "grad_norm": 1.676805324865966, + "learning_rate": 1.1602725439157114e-06, + "loss": 0.6526266932487488, + "step": 4842 + }, + { + "epoch": 1.7066079295154184, + "grad_norm": 1.8958276581556894, + "learning_rate": 1.1575499804886658e-06, + "loss": 0.5449249148368835, + "step": 4843 + }, + { + "epoch": 1.7069603524229073, + "grad_norm": 1.7291534323462554, + "learning_rate": 1.1548304187982152e-06, + "loss": 0.5793930292129517, + "step": 4844 + }, + { + "epoch": 1.7073127753303965, + "grad_norm": 1.9655642851245159, + "learning_rate": 1.152113859767565e-06, + "loss": 0.5133150815963745, + "step": 4845 + }, + { + "epoch": 1.7076651982378854, + "grad_norm": 2.3523116804029973, + "learning_rate": 1.1494003043189028e-06, + "loss": 0.6771460771560669, + "step": 4846 + }, + { + "epoch": 1.7080176211453746, + "grad_norm": 1.651478684492262, + "learning_rate": 1.1466897533733945e-06, + "loss": 0.5680071115493774, + "step": 4847 + }, + { + "epoch": 1.7083700440528635, + "grad_norm": 1.5749772965105935, + "learning_rate": 1.1439822078511941e-06, + "loss": 0.58878493309021, + "step": 4848 + }, + { + "epoch": 1.7087224669603525, + "grad_norm": 2.108145787297885, + "learning_rate": 1.14127766867142e-06, + "loss": 0.6441792249679565, + "step": 4849 + }, + { + "epoch": 1.7090748898678414, + "grad_norm": 1.98601005576605, + "learning_rate": 1.1385761367521865e-06, + "loss": 0.4910963773727417, + "step": 4850 + }, + { + "epoch": 1.7094273127753303, + "grad_norm": 1.860202988226145, + "learning_rate": 1.1358776130105765e-06, + "loss": 0.5878154635429382, + "step": 4851 + }, + { + "epoch": 1.7097797356828193, + "grad_norm": 1.8203129566458394, + "learning_rate": 1.133182098362654e-06, + "loss": 0.5835394859313965, + "step": 4852 + }, + { + "epoch": 1.7101321585903082, + "grad_norm": 2.552503327854629, + "learning_rate": 1.130489593723465e-06, + "loss": 0.6612577438354492, + "step": 4853 + }, + { + "epoch": 1.7104845814977974, + "grad_norm": 1.6914248252090902, + "learning_rate": 1.1278001000070282e-06, + "loss": 0.5892096757888794, + "step": 4854 + }, + { + "epoch": 1.7108370044052863, + "grad_norm": 1.597330243290686, + "learning_rate": 1.1251136181263433e-06, + "loss": 0.6196271181106567, + "step": 4855 + }, + { + "epoch": 1.7111894273127755, + "grad_norm": 1.7525774926334832, + "learning_rate": 1.122430148993392e-06, + "loss": 0.5365586280822754, + "step": 4856 + }, + { + "epoch": 1.7115418502202644, + "grad_norm": 1.8547440453110882, + "learning_rate": 1.119749693519121e-06, + "loss": 0.6006353497505188, + "step": 4857 + }, + { + "epoch": 1.7118942731277533, + "grad_norm": 1.7445383281871432, + "learning_rate": 1.117072252613467e-06, + "loss": 0.5645362138748169, + "step": 4858 + }, + { + "epoch": 1.7122466960352423, + "grad_norm": 1.928827114023792, + "learning_rate": 1.1143978271853362e-06, + "loss": 0.46408799290657043, + "step": 4859 + }, + { + "epoch": 1.7125991189427312, + "grad_norm": 1.9357772553695842, + "learning_rate": 1.1117264181426134e-06, + "loss": 0.7798513770103455, + "step": 4860 + }, + { + "epoch": 1.7129515418502201, + "grad_norm": 2.234058868113385, + "learning_rate": 1.109058026392158e-06, + "loss": 0.739770770072937, + "step": 4861 + }, + { + "epoch": 1.7133039647577093, + "grad_norm": 1.8311645792398603, + "learning_rate": 1.1063926528398062e-06, + "loss": 0.567306637763977, + "step": 4862 + }, + { + "epoch": 1.7136563876651982, + "grad_norm": 1.8983872649225184, + "learning_rate": 1.1037302983903686e-06, + "loss": 0.5730962753295898, + "step": 4863 + }, + { + "epoch": 1.7140088105726872, + "grad_norm": 2.0428299761075186, + "learning_rate": 1.1010709639476335e-06, + "loss": 0.6311475038528442, + "step": 4864 + }, + { + "epoch": 1.7143612334801763, + "grad_norm": 2.074080543967214, + "learning_rate": 1.098414650414359e-06, + "loss": 0.5867577791213989, + "step": 4865 + }, + { + "epoch": 1.7147136563876653, + "grad_norm": 1.7945849101921227, + "learning_rate": 1.0957613586922844e-06, + "loss": 0.6291393637657166, + "step": 4866 + }, + { + "epoch": 1.7150660792951542, + "grad_norm": 2.221825931925125, + "learning_rate": 1.0931110896821184e-06, + "loss": 0.5811575651168823, + "step": 4867 + }, + { + "epoch": 1.7154185022026431, + "grad_norm": 1.8041589779612486, + "learning_rate": 1.0904638442835459e-06, + "loss": 0.6340835690498352, + "step": 4868 + }, + { + "epoch": 1.715770925110132, + "grad_norm": 2.1324283591729696, + "learning_rate": 1.087819623395222e-06, + "loss": 0.6543419361114502, + "step": 4869 + }, + { + "epoch": 1.716123348017621, + "grad_norm": 1.7815282855404584, + "learning_rate": 1.0851784279147793e-06, + "loss": 0.5669729709625244, + "step": 4870 + }, + { + "epoch": 1.7164757709251102, + "grad_norm": 1.7880383242870224, + "learning_rate": 1.08254025873882e-06, + "loss": 0.5422554612159729, + "step": 4871 + }, + { + "epoch": 1.716828193832599, + "grad_norm": 2.1378220532284646, + "learning_rate": 1.0799051167629215e-06, + "loss": 0.6154215335845947, + "step": 4872 + }, + { + "epoch": 1.7171806167400883, + "grad_norm": 1.6926103915620132, + "learning_rate": 1.0772730028816304e-06, + "loss": 0.6306319236755371, + "step": 4873 + }, + { + "epoch": 1.7175330396475772, + "grad_norm": 1.8857595594505687, + "learning_rate": 1.0746439179884716e-06, + "loss": 0.6301003694534302, + "step": 4874 + }, + { + "epoch": 1.7178854625550661, + "grad_norm": 2.02854056964172, + "learning_rate": 1.0720178629759347e-06, + "loss": 0.5730071067810059, + "step": 4875 + }, + { + "epoch": 1.718237885462555, + "grad_norm": 1.911878436689674, + "learning_rate": 1.0693948387354836e-06, + "loss": 0.5330506563186646, + "step": 4876 + }, + { + "epoch": 1.718590308370044, + "grad_norm": 2.2472032788534033, + "learning_rate": 1.0667748461575544e-06, + "loss": 0.7724611759185791, + "step": 4877 + }, + { + "epoch": 1.718942731277533, + "grad_norm": 1.8072854503281317, + "learning_rate": 1.0641578861315517e-06, + "loss": 0.5415126085281372, + "step": 4878 + }, + { + "epoch": 1.7192951541850219, + "grad_norm": 1.7491717586336433, + "learning_rate": 1.0615439595458554e-06, + "loss": 0.4895828664302826, + "step": 4879 + }, + { + "epoch": 1.719647577092511, + "grad_norm": 2.1761761181791757, + "learning_rate": 1.0589330672878084e-06, + "loss": 0.6049074530601501, + "step": 4880 + }, + { + "epoch": 1.72, + "grad_norm": 1.8120989551683504, + "learning_rate": 1.056325210243726e-06, + "loss": 0.5733378529548645, + "step": 4881 + }, + { + "epoch": 1.7203524229074891, + "grad_norm": 1.842989042937231, + "learning_rate": 1.0537203892989e-06, + "loss": 0.6034674644470215, + "step": 4882 + }, + { + "epoch": 1.720704845814978, + "grad_norm": 1.873470428701205, + "learning_rate": 1.0511186053375833e-06, + "loss": 0.5282200574874878, + "step": 4883 + }, + { + "epoch": 1.721057268722467, + "grad_norm": 1.7879116674889537, + "learning_rate": 1.0485198592430001e-06, + "loss": 0.6331876516342163, + "step": 4884 + }, + { + "epoch": 1.721409691629956, + "grad_norm": 1.9365084560803385, + "learning_rate": 1.045924151897344e-06, + "loss": 0.5194844007492065, + "step": 4885 + }, + { + "epoch": 1.7217621145374449, + "grad_norm": 1.9087945355709668, + "learning_rate": 1.0433314841817755e-06, + "loss": 0.5496135354042053, + "step": 4886 + }, + { + "epoch": 1.7221145374449338, + "grad_norm": 2.009884434911672, + "learning_rate": 1.0407418569764304e-06, + "loss": 0.7871953248977661, + "step": 4887 + }, + { + "epoch": 1.7224669603524227, + "grad_norm": 2.040889138785673, + "learning_rate": 1.0381552711604004e-06, + "loss": 0.7199628353118896, + "step": 4888 + }, + { + "epoch": 1.722819383259912, + "grad_norm": 2.248999014584043, + "learning_rate": 1.0355717276117506e-06, + "loss": 0.5876469612121582, + "step": 4889 + }, + { + "epoch": 1.7231718061674008, + "grad_norm": 2.0349261607011, + "learning_rate": 1.0329912272075181e-06, + "loss": 0.5543426275253296, + "step": 4890 + }, + { + "epoch": 1.72352422907489, + "grad_norm": 1.583533546685778, + "learning_rate": 1.0304137708236994e-06, + "loss": 0.5118540525436401, + "step": 4891 + }, + { + "epoch": 1.723876651982379, + "grad_norm": 1.82424017683773, + "learning_rate": 1.0278393593352631e-06, + "loss": 0.628477931022644, + "step": 4892 + }, + { + "epoch": 1.7242290748898679, + "grad_norm": 1.9109773060364437, + "learning_rate": 1.0252679936161392e-06, + "loss": 0.6119322776794434, + "step": 4893 + }, + { + "epoch": 1.7245814977973568, + "grad_norm": 1.8450217827392812, + "learning_rate": 1.0226996745392259e-06, + "loss": 0.7661763429641724, + "step": 4894 + }, + { + "epoch": 1.7249339207048457, + "grad_norm": 2.1201139928861394, + "learning_rate": 1.0201344029763927e-06, + "loss": 0.6431440114974976, + "step": 4895 + }, + { + "epoch": 1.7252863436123347, + "grad_norm": 2.0387248477928503, + "learning_rate": 1.0175721797984639e-06, + "loss": 0.7295387983322144, + "step": 4896 + }, + { + "epoch": 1.7256387665198238, + "grad_norm": 1.9561833203401287, + "learning_rate": 1.015013005875235e-06, + "loss": 0.58225017786026, + "step": 4897 + }, + { + "epoch": 1.7259911894273128, + "grad_norm": 1.9211243008184207, + "learning_rate": 1.0124568820754689e-06, + "loss": 0.5467473864555359, + "step": 4898 + }, + { + "epoch": 1.7263436123348017, + "grad_norm": 2.2453442964094967, + "learning_rate": 1.00990380926689e-06, + "loss": 0.7637814283370972, + "step": 4899 + }, + { + "epoch": 1.7266960352422909, + "grad_norm": 2.13267606796778, + "learning_rate": 1.0073537883161821e-06, + "loss": 0.5354464650154114, + "step": 4900 + }, + { + "epoch": 1.7270484581497798, + "grad_norm": 1.91187833906973, + "learning_rate": 1.0048068200890037e-06, + "loss": 0.5213606357574463, + "step": 4901 + }, + { + "epoch": 1.7274008810572687, + "grad_norm": 1.8770841550484265, + "learning_rate": 1.0022629054499678e-06, + "loss": 0.6073330640792847, + "step": 4902 + }, + { + "epoch": 1.7277533039647577, + "grad_norm": 2.1663053459498283, + "learning_rate": 9.997220452626587e-07, + "loss": 0.5711998343467712, + "step": 4903 + }, + { + "epoch": 1.7281057268722466, + "grad_norm": 1.8823259072141711, + "learning_rate": 9.971842403896137e-07, + "loss": 0.6824701428413391, + "step": 4904 + }, + { + "epoch": 1.7284581497797356, + "grad_norm": 1.844862593672041, + "learning_rate": 9.9464949169234e-07, + "loss": 0.528059184551239, + "step": 4905 + }, + { + "epoch": 1.7288105726872247, + "grad_norm": 1.7519423160504919, + "learning_rate": 9.92117800031308e-07, + "loss": 0.45617133378982544, + "step": 4906 + }, + { + "epoch": 1.7291629955947136, + "grad_norm": 2.095891000231315, + "learning_rate": 9.895891662659485e-07, + "loss": 0.6186379194259644, + "step": 4907 + }, + { + "epoch": 1.7295154185022028, + "grad_norm": 1.8933361504308706, + "learning_rate": 9.870635912546511e-07, + "loss": 0.622776985168457, + "step": 4908 + }, + { + "epoch": 1.7298678414096917, + "grad_norm": 2.1556634846751073, + "learning_rate": 9.845410758547724e-07, + "loss": 0.6322426199913025, + "step": 4909 + }, + { + "epoch": 1.7302202643171807, + "grad_norm": 1.8637079254212523, + "learning_rate": 9.82021620922624e-07, + "loss": 0.565685510635376, + "step": 4910 + }, + { + "epoch": 1.7305726872246696, + "grad_norm": 1.9032887733300228, + "learning_rate": 9.795052273134908e-07, + "loss": 0.670723557472229, + "step": 4911 + }, + { + "epoch": 1.7309251101321586, + "grad_norm": 1.818317953069921, + "learning_rate": 9.769918958816017e-07, + "loss": 0.627914309501648, + "step": 4912 + }, + { + "epoch": 1.7312775330396475, + "grad_norm": 1.8142433277320784, + "learning_rate": 9.74481627480156e-07, + "loss": 0.613754391670227, + "step": 4913 + }, + { + "epoch": 1.7316299559471364, + "grad_norm": 1.6146673255290158, + "learning_rate": 9.719744229613148e-07, + "loss": 0.7128336429595947, + "step": 4914 + }, + { + "epoch": 1.7319823788546256, + "grad_norm": 2.07516307915708, + "learning_rate": 9.694702831761937e-07, + "loss": 0.692448079586029, + "step": 4915 + }, + { + "epoch": 1.7323348017621145, + "grad_norm": 1.8379288210737326, + "learning_rate": 9.669692089748717e-07, + "loss": 0.5722585916519165, + "step": 4916 + }, + { + "epoch": 1.7326872246696037, + "grad_norm": 2.880722779651987, + "learning_rate": 9.64471201206385e-07, + "loss": 0.5267904996871948, + "step": 4917 + }, + { + "epoch": 1.7330396475770926, + "grad_norm": 1.8098448963152955, + "learning_rate": 9.619762607187277e-07, + "loss": 0.6290950179100037, + "step": 4918 + }, + { + "epoch": 1.7333920704845815, + "grad_norm": 1.6991585212089806, + "learning_rate": 9.594843883588588e-07, + "loss": 0.5137144327163696, + "step": 4919 + }, + { + "epoch": 1.7337444933920705, + "grad_norm": 2.0101083451482067, + "learning_rate": 9.569955849726875e-07, + "loss": 0.6110765337944031, + "step": 4920 + }, + { + "epoch": 1.7340969162995594, + "grad_norm": 1.805820390142787, + "learning_rate": 9.545098514050844e-07, + "loss": 0.5097514390945435, + "step": 4921 + }, + { + "epoch": 1.7344493392070484, + "grad_norm": 1.9204009410934093, + "learning_rate": 9.520271884998822e-07, + "loss": 0.7220968008041382, + "step": 4922 + }, + { + "epoch": 1.7348017621145373, + "grad_norm": 1.816061125504689, + "learning_rate": 9.495475970998669e-07, + "loss": 0.4790550470352173, + "step": 4923 + }, + { + "epoch": 1.7351541850220265, + "grad_norm": 1.8878679441443287, + "learning_rate": 9.470710780467818e-07, + "loss": 0.5440540909767151, + "step": 4924 + }, + { + "epoch": 1.7355066079295154, + "grad_norm": 1.8420075371513611, + "learning_rate": 9.445976321813277e-07, + "loss": 0.6351054310798645, + "step": 4925 + }, + { + "epoch": 1.7358590308370045, + "grad_norm": 1.8685391189030902, + "learning_rate": 9.421272603431619e-07, + "loss": 0.597430944442749, + "step": 4926 + }, + { + "epoch": 1.7362114537444935, + "grad_norm": 1.8993591697635552, + "learning_rate": 9.396599633709013e-07, + "loss": 0.5826110243797302, + "step": 4927 + }, + { + "epoch": 1.7365638766519824, + "grad_norm": 1.9528322527669026, + "learning_rate": 9.371957421021116e-07, + "loss": 0.61531662940979, + "step": 4928 + }, + { + "epoch": 1.7369162995594714, + "grad_norm": 1.7976479809998938, + "learning_rate": 9.347345973733257e-07, + "loss": 0.5286549925804138, + "step": 4929 + }, + { + "epoch": 1.7372687224669603, + "grad_norm": 2.051327926584316, + "learning_rate": 9.322765300200209e-07, + "loss": 0.6923980712890625, + "step": 4930 + }, + { + "epoch": 1.7376211453744492, + "grad_norm": 1.8765754964403032, + "learning_rate": 9.298215408766376e-07, + "loss": 0.5408697128295898, + "step": 4931 + }, + { + "epoch": 1.7379735682819382, + "grad_norm": 1.9428832757254997, + "learning_rate": 9.273696307765656e-07, + "loss": 0.6360228061676025, + "step": 4932 + }, + { + "epoch": 1.7383259911894273, + "grad_norm": 1.5478222777536266, + "learning_rate": 9.249208005521538e-07, + "loss": 0.46559634804725647, + "step": 4933 + }, + { + "epoch": 1.7386784140969163, + "grad_norm": 2.0814940983294465, + "learning_rate": 9.224750510347036e-07, + "loss": 0.6065478324890137, + "step": 4934 + }, + { + "epoch": 1.7390308370044054, + "grad_norm": 2.197942688439507, + "learning_rate": 9.2003238305447e-07, + "loss": 0.6777745485305786, + "step": 4935 + }, + { + "epoch": 1.7393832599118944, + "grad_norm": 1.764242470379209, + "learning_rate": 9.175927974406607e-07, + "loss": 0.568982720375061, + "step": 4936 + }, + { + "epoch": 1.7397356828193833, + "grad_norm": 1.9082270198240563, + "learning_rate": 9.151562950214443e-07, + "loss": 0.6014461517333984, + "step": 4937 + }, + { + "epoch": 1.7400881057268722, + "grad_norm": 1.9463215063568118, + "learning_rate": 9.127228766239349e-07, + "loss": 0.6312133073806763, + "step": 4938 + }, + { + "epoch": 1.7404405286343612, + "grad_norm": 1.9066118382891128, + "learning_rate": 9.102925430742015e-07, + "loss": 0.5440298318862915, + "step": 4939 + }, + { + "epoch": 1.74079295154185, + "grad_norm": 1.9115402376997355, + "learning_rate": 9.078652951972688e-07, + "loss": 0.6599005460739136, + "step": 4940 + }, + { + "epoch": 1.7411453744493393, + "grad_norm": 1.8987879122247575, + "learning_rate": 9.054411338171099e-07, + "loss": 0.6719228625297546, + "step": 4941 + }, + { + "epoch": 1.7414977973568282, + "grad_norm": 1.7692389966879711, + "learning_rate": 9.030200597566529e-07, + "loss": 0.5771356821060181, + "step": 4942 + }, + { + "epoch": 1.7418502202643171, + "grad_norm": 2.0029197465912936, + "learning_rate": 9.006020738377764e-07, + "loss": 0.5066591501235962, + "step": 4943 + }, + { + "epoch": 1.7422026431718063, + "grad_norm": 1.754361693598564, + "learning_rate": 8.981871768813111e-07, + "loss": 0.5091663002967834, + "step": 4944 + }, + { + "epoch": 1.7425550660792952, + "grad_norm": 1.9092674317256029, + "learning_rate": 8.957753697070415e-07, + "loss": 0.6594514846801758, + "step": 4945 + }, + { + "epoch": 1.7429074889867842, + "grad_norm": 1.8033652679865708, + "learning_rate": 8.933666531337004e-07, + "loss": 0.5485379695892334, + "step": 4946 + }, + { + "epoch": 1.743259911894273, + "grad_norm": 2.2602019905537913, + "learning_rate": 8.909610279789716e-07, + "loss": 0.6079416871070862, + "step": 4947 + }, + { + "epoch": 1.743612334801762, + "grad_norm": 1.8415960205262154, + "learning_rate": 8.885584950594894e-07, + "loss": 0.4980606436729431, + "step": 4948 + }, + { + "epoch": 1.743964757709251, + "grad_norm": 1.5880176897451332, + "learning_rate": 8.861590551908405e-07, + "loss": 0.47701022028923035, + "step": 4949 + }, + { + "epoch": 1.7443171806167401, + "grad_norm": 1.7223149872435417, + "learning_rate": 8.837627091875578e-07, + "loss": 0.5041281580924988, + "step": 4950 + }, + { + "epoch": 1.744669603524229, + "grad_norm": 1.9666236461253934, + "learning_rate": 8.813694578631283e-07, + "loss": 0.5477255582809448, + "step": 4951 + }, + { + "epoch": 1.7450220264317182, + "grad_norm": 1.883766477051188, + "learning_rate": 8.78979302029983e-07, + "loss": 0.6377973556518555, + "step": 4952 + }, + { + "epoch": 1.7453744493392072, + "grad_norm": 1.940207867324299, + "learning_rate": 8.76592242499511e-07, + "loss": 0.6688166856765747, + "step": 4953 + }, + { + "epoch": 1.745726872246696, + "grad_norm": 2.0031898505950907, + "learning_rate": 8.742082800820406e-07, + "loss": 0.6236848831176758, + "step": 4954 + }, + { + "epoch": 1.746079295154185, + "grad_norm": 1.7582600318717108, + "learning_rate": 8.718274155868545e-07, + "loss": 0.653768002986908, + "step": 4955 + }, + { + "epoch": 1.746431718061674, + "grad_norm": 1.844534933556578, + "learning_rate": 8.694496498221805e-07, + "loss": 0.5647604465484619, + "step": 4956 + }, + { + "epoch": 1.746784140969163, + "grad_norm": 1.781932697931349, + "learning_rate": 8.670749835951964e-07, + "loss": 0.4960663914680481, + "step": 4957 + }, + { + "epoch": 1.7471365638766518, + "grad_norm": 1.6873484879529697, + "learning_rate": 8.647034177120317e-07, + "loss": 0.6271536350250244, + "step": 4958 + }, + { + "epoch": 1.747488986784141, + "grad_norm": 2.0059254125224757, + "learning_rate": 8.623349529777525e-07, + "loss": 0.6323459148406982, + "step": 4959 + }, + { + "epoch": 1.74784140969163, + "grad_norm": 1.9564636362517054, + "learning_rate": 8.599695901963811e-07, + "loss": 0.6084197163581848, + "step": 4960 + }, + { + "epoch": 1.748193832599119, + "grad_norm": 1.8913653459936526, + "learning_rate": 8.576073301708876e-07, + "loss": 0.48974379897117615, + "step": 4961 + }, + { + "epoch": 1.748546255506608, + "grad_norm": 1.8735173678444992, + "learning_rate": 8.552481737031859e-07, + "loss": 0.5985081195831299, + "step": 4962 + }, + { + "epoch": 1.748898678414097, + "grad_norm": 1.6360789306706147, + "learning_rate": 8.528921215941299e-07, + "loss": 0.507872998714447, + "step": 4963 + }, + { + "epoch": 1.749251101321586, + "grad_norm": 1.5251403239052872, + "learning_rate": 8.50539174643531e-07, + "loss": 0.5772356986999512, + "step": 4964 + }, + { + "epoch": 1.7496035242290748, + "grad_norm": 2.222117569410965, + "learning_rate": 8.48189333650139e-07, + "loss": 0.675100564956665, + "step": 4965 + }, + { + "epoch": 1.7499559471365638, + "grad_norm": 1.9356078104678653, + "learning_rate": 8.458425994116582e-07, + "loss": 0.5571645498275757, + "step": 4966 + }, + { + "epoch": 1.7503083700440527, + "grad_norm": 1.807660183683072, + "learning_rate": 8.434989727247233e-07, + "loss": 0.5842185020446777, + "step": 4967 + }, + { + "epoch": 1.7506607929515419, + "grad_norm": 1.7960899956397995, + "learning_rate": 8.41158454384925e-07, + "loss": 0.5693016648292542, + "step": 4968 + }, + { + "epoch": 1.7510132158590308, + "grad_norm": 1.808037504366546, + "learning_rate": 8.388210451868006e-07, + "loss": 0.5791449546813965, + "step": 4969 + }, + { + "epoch": 1.75136563876652, + "grad_norm": 2.1439820497437516, + "learning_rate": 8.364867459238257e-07, + "loss": 0.4873960018157959, + "step": 4970 + }, + { + "epoch": 1.751718061674009, + "grad_norm": 1.6712365329059415, + "learning_rate": 8.341555573884175e-07, + "loss": 0.609403669834137, + "step": 4971 + }, + { + "epoch": 1.7520704845814978, + "grad_norm": 2.0664225342752327, + "learning_rate": 8.318274803719483e-07, + "loss": 0.5676242113113403, + "step": 4972 + }, + { + "epoch": 1.7524229074889868, + "grad_norm": 2.2550971825464026, + "learning_rate": 8.29502515664723e-07, + "loss": 0.7692728638648987, + "step": 4973 + }, + { + "epoch": 1.7527753303964757, + "grad_norm": 2.318073308236361, + "learning_rate": 8.27180664056001e-07, + "loss": 0.7940253019332886, + "step": 4974 + }, + { + "epoch": 1.7531277533039646, + "grad_norm": 2.021077548315, + "learning_rate": 8.24861926333973e-07, + "loss": 0.5784735083580017, + "step": 4975 + }, + { + "epoch": 1.7534801762114536, + "grad_norm": 2.106016882372918, + "learning_rate": 8.225463032857783e-07, + "loss": 0.6493539810180664, + "step": 4976 + }, + { + "epoch": 1.7538325991189427, + "grad_norm": 1.6893816606485224, + "learning_rate": 8.202337956975026e-07, + "loss": 0.615519106388092, + "step": 4977 + }, + { + "epoch": 1.7541850220264317, + "grad_norm": 2.4337358559529587, + "learning_rate": 8.179244043541678e-07, + "loss": 0.5369104146957397, + "step": 4978 + }, + { + "epoch": 1.7545374449339208, + "grad_norm": 1.8845170170566812, + "learning_rate": 8.156181300397414e-07, + "loss": 0.5527158975601196, + "step": 4979 + }, + { + "epoch": 1.7548898678414098, + "grad_norm": 2.1597753145956786, + "learning_rate": 8.133149735371316e-07, + "loss": 0.5870147943496704, + "step": 4980 + }, + { + "epoch": 1.7552422907488987, + "grad_norm": 2.0333589118991497, + "learning_rate": 8.110149356281848e-07, + "loss": 0.7235025763511658, + "step": 4981 + }, + { + "epoch": 1.7555947136563876, + "grad_norm": 1.9283097758260628, + "learning_rate": 8.087180170937004e-07, + "loss": 0.5630521774291992, + "step": 4982 + }, + { + "epoch": 1.7559471365638766, + "grad_norm": 2.015740627515862, + "learning_rate": 8.06424218713403e-07, + "loss": 0.5005021691322327, + "step": 4983 + }, + { + "epoch": 1.7562995594713655, + "grad_norm": 2.0683486617790066, + "learning_rate": 8.041335412659679e-07, + "loss": 0.7267229557037354, + "step": 4984 + }, + { + "epoch": 1.7566519823788547, + "grad_norm": 2.2397406108409834, + "learning_rate": 8.018459855290107e-07, + "loss": 0.6494802236557007, + "step": 4985 + }, + { + "epoch": 1.7570044052863436, + "grad_norm": 1.8012009390187627, + "learning_rate": 7.995615522790845e-07, + "loss": 0.5637267827987671, + "step": 4986 + }, + { + "epoch": 1.7573568281938328, + "grad_norm": 1.807872858711751, + "learning_rate": 7.972802422916826e-07, + "loss": 0.5143958330154419, + "step": 4987 + }, + { + "epoch": 1.7577092511013217, + "grad_norm": 1.7925007157989583, + "learning_rate": 7.950020563412398e-07, + "loss": 0.607841968536377, + "step": 4988 + }, + { + "epoch": 1.7580616740088106, + "grad_norm": 1.9011698158798267, + "learning_rate": 7.927269952011285e-07, + "loss": 0.6066895723342896, + "step": 4989 + }, + { + "epoch": 1.7584140969162996, + "grad_norm": 2.293924542695718, + "learning_rate": 7.904550596436611e-07, + "loss": 0.6686232686042786, + "step": 4990 + }, + { + "epoch": 1.7587665198237885, + "grad_norm": 1.7540251789370713, + "learning_rate": 7.881862504400884e-07, + "loss": 0.589708685874939, + "step": 4991 + }, + { + "epoch": 1.7591189427312774, + "grad_norm": 1.9346002211307631, + "learning_rate": 7.859205683606008e-07, + "loss": 0.7008450031280518, + "step": 4992 + }, + { + "epoch": 1.7594713656387664, + "grad_norm": 1.5488386957340947, + "learning_rate": 7.836580141743289e-07, + "loss": 0.5754648447036743, + "step": 4993 + }, + { + "epoch": 1.7598237885462555, + "grad_norm": 1.8204543329281522, + "learning_rate": 7.81398588649338e-07, + "loss": 0.5756049156188965, + "step": 4994 + }, + { + "epoch": 1.7601762114537445, + "grad_norm": 1.8754803653843481, + "learning_rate": 7.791422925526326e-07, + "loss": 0.6143715381622314, + "step": 4995 + }, + { + "epoch": 1.7605286343612336, + "grad_norm": 1.9795958910244131, + "learning_rate": 7.768891266501544e-07, + "loss": 0.700069010257721, + "step": 4996 + }, + { + "epoch": 1.7608810572687226, + "grad_norm": 1.8030282940418303, + "learning_rate": 7.746390917067847e-07, + "loss": 0.5200002193450928, + "step": 4997 + }, + { + "epoch": 1.7612334801762115, + "grad_norm": 2.0811179040330483, + "learning_rate": 7.723921884863395e-07, + "loss": 0.6963525414466858, + "step": 4998 + }, + { + "epoch": 1.7615859030837004, + "grad_norm": 1.9255908471526815, + "learning_rate": 7.701484177515717e-07, + "loss": 0.6329556703567505, + "step": 4999 + }, + { + "epoch": 1.7619383259911894, + "grad_norm": 2.0796773022688213, + "learning_rate": 7.67907780264171e-07, + "loss": 0.6980677247047424, + "step": 5000 + }, + { + "epoch": 1.7622907488986783, + "grad_norm": 1.95091452058077, + "learning_rate": 7.656702767847679e-07, + "loss": 0.5244314670562744, + "step": 5001 + }, + { + "epoch": 1.7626431718061673, + "grad_norm": 1.937585844549177, + "learning_rate": 7.634359080729215e-07, + "loss": 0.6679523587226868, + "step": 5002 + }, + { + "epoch": 1.7629955947136564, + "grad_norm": 1.7698344536731299, + "learning_rate": 7.612046748871327e-07, + "loss": 0.6168316602706909, + "step": 5003 + }, + { + "epoch": 1.7633480176211453, + "grad_norm": 1.8295319189191592, + "learning_rate": 7.589765779848346e-07, + "loss": 0.5892738699913025, + "step": 5004 + }, + { + "epoch": 1.7637004405286345, + "grad_norm": 1.8270406797726577, + "learning_rate": 7.567516181223966e-07, + "loss": 0.6714082956314087, + "step": 5005 + }, + { + "epoch": 1.7640528634361234, + "grad_norm": 1.7798086214061835, + "learning_rate": 7.545297960551245e-07, + "loss": 0.6327016353607178, + "step": 5006 + }, + { + "epoch": 1.7644052863436124, + "grad_norm": 1.8272907155681217, + "learning_rate": 7.52311112537254e-07, + "loss": 0.5114126205444336, + "step": 5007 + }, + { + "epoch": 1.7647577092511013, + "grad_norm": 1.9198067827489789, + "learning_rate": 7.500955683219646e-07, + "loss": 0.5701695084571838, + "step": 5008 + }, + { + "epoch": 1.7651101321585903, + "grad_norm": 1.7304483866926885, + "learning_rate": 7.478831641613616e-07, + "loss": 0.5966283082962036, + "step": 5009 + }, + { + "epoch": 1.7654625550660792, + "grad_norm": 1.7690414353003558, + "learning_rate": 7.456739008064883e-07, + "loss": 0.6219101548194885, + "step": 5010 + }, + { + "epoch": 1.7658149779735681, + "grad_norm": 2.1971226449232804, + "learning_rate": 7.434677790073197e-07, + "loss": 0.6516324877738953, + "step": 5011 + }, + { + "epoch": 1.7661674008810573, + "grad_norm": 2.0945250680543395, + "learning_rate": 7.412647995127664e-07, + "loss": 0.4623621106147766, + "step": 5012 + }, + { + "epoch": 1.7665198237885462, + "grad_norm": 1.7568345992089816, + "learning_rate": 7.390649630706703e-07, + "loss": 0.5661109685897827, + "step": 5013 + }, + { + "epoch": 1.7668722466960354, + "grad_norm": 2.0070117088967154, + "learning_rate": 7.368682704278096e-07, + "loss": 0.47063148021698, + "step": 5014 + }, + { + "epoch": 1.7672246696035243, + "grad_norm": 1.636187219475051, + "learning_rate": 7.346747223298889e-07, + "loss": 0.5684597492218018, + "step": 5015 + }, + { + "epoch": 1.7675770925110132, + "grad_norm": 1.872749765270047, + "learning_rate": 7.324843195215548e-07, + "loss": 0.5614477396011353, + "step": 5016 + }, + { + "epoch": 1.7679295154185022, + "grad_norm": 1.9944667195924293, + "learning_rate": 7.302970627463779e-07, + "loss": 0.508664608001709, + "step": 5017 + }, + { + "epoch": 1.7682819383259911, + "grad_norm": 1.9918093815103546, + "learning_rate": 7.281129527468645e-07, + "loss": 0.5348209142684937, + "step": 5018 + }, + { + "epoch": 1.76863436123348, + "grad_norm": 2.2774118234615695, + "learning_rate": 7.259319902644513e-07, + "loss": 0.6441121101379395, + "step": 5019 + }, + { + "epoch": 1.7689867841409692, + "grad_norm": 1.7776640162425583, + "learning_rate": 7.237541760395083e-07, + "loss": 0.6454842686653137, + "step": 5020 + }, + { + "epoch": 1.7693392070484582, + "grad_norm": 1.818033997112941, + "learning_rate": 7.215795108113343e-07, + "loss": 0.4822286367416382, + "step": 5021 + }, + { + "epoch": 1.769691629955947, + "grad_norm": 2.2519074742911775, + "learning_rate": 7.19407995318162e-07, + "loss": 0.6078327894210815, + "step": 5022 + }, + { + "epoch": 1.7700440528634362, + "grad_norm": 1.9964867958416748, + "learning_rate": 7.172396302971507e-07, + "loss": 0.6394459009170532, + "step": 5023 + }, + { + "epoch": 1.7703964757709252, + "grad_norm": 1.919321953608054, + "learning_rate": 7.150744164843959e-07, + "loss": 0.646416425704956, + "step": 5024 + }, + { + "epoch": 1.7707488986784141, + "grad_norm": 1.743918601710363, + "learning_rate": 7.129123546149208e-07, + "loss": 0.6265356540679932, + "step": 5025 + }, + { + "epoch": 1.771101321585903, + "grad_norm": 1.717725969603381, + "learning_rate": 7.107534454226728e-07, + "loss": 0.5074717998504639, + "step": 5026 + }, + { + "epoch": 1.771453744493392, + "grad_norm": 1.9181838757933405, + "learning_rate": 7.0859768964054e-07, + "loss": 0.7036402821540833, + "step": 5027 + }, + { + "epoch": 1.771806167400881, + "grad_norm": 1.7638856276686163, + "learning_rate": 7.064450880003327e-07, + "loss": 0.6098893880844116, + "step": 5028 + }, + { + "epoch": 1.77215859030837, + "grad_norm": 2.005026773406909, + "learning_rate": 7.042956412327917e-07, + "loss": 0.582880973815918, + "step": 5029 + }, + { + "epoch": 1.772511013215859, + "grad_norm": 2.013313109536588, + "learning_rate": 7.021493500675869e-07, + "loss": 0.6003242135047913, + "step": 5030 + }, + { + "epoch": 1.7728634361233482, + "grad_norm": 1.9319887994625418, + "learning_rate": 7.000062152333165e-07, + "loss": 0.4999944865703583, + "step": 5031 + }, + { + "epoch": 1.7732158590308371, + "grad_norm": 1.8450299102376384, + "learning_rate": 6.978662374575107e-07, + "loss": 0.5569149255752563, + "step": 5032 + }, + { + "epoch": 1.773568281938326, + "grad_norm": 1.9277460192299252, + "learning_rate": 6.957294174666263e-07, + "loss": 0.5600287914276123, + "step": 5033 + }, + { + "epoch": 1.773920704845815, + "grad_norm": 1.8890013971887576, + "learning_rate": 6.935957559860418e-07, + "loss": 0.5412951707839966, + "step": 5034 + }, + { + "epoch": 1.774273127753304, + "grad_norm": 1.7378105888388657, + "learning_rate": 6.914652537400735e-07, + "loss": 0.5881151556968689, + "step": 5035 + }, + { + "epoch": 1.7746255506607929, + "grad_norm": 1.8829243382985155, + "learning_rate": 6.893379114519572e-07, + "loss": 0.5975406169891357, + "step": 5036 + }, + { + "epoch": 1.7749779735682818, + "grad_norm": 1.7883517993987919, + "learning_rate": 6.872137298438653e-07, + "loss": 0.6266802549362183, + "step": 5037 + }, + { + "epoch": 1.775330396475771, + "grad_norm": 2.279148556628154, + "learning_rate": 6.850927096368854e-07, + "loss": 0.6825709939002991, + "step": 5038 + }, + { + "epoch": 1.77568281938326, + "grad_norm": 1.6068572613194736, + "learning_rate": 6.829748515510381e-07, + "loss": 0.6035742163658142, + "step": 5039 + }, + { + "epoch": 1.776035242290749, + "grad_norm": 1.901514453732062, + "learning_rate": 6.808601563052742e-07, + "loss": 0.6665611267089844, + "step": 5040 + }, + { + "epoch": 1.776387665198238, + "grad_norm": 2.334324554300087, + "learning_rate": 6.787486246174657e-07, + "loss": 0.8202367424964905, + "step": 5041 + }, + { + "epoch": 1.776740088105727, + "grad_norm": 1.8080635950130315, + "learning_rate": 6.766402572044084e-07, + "loss": 0.6516656875610352, + "step": 5042 + }, + { + "epoch": 1.7770925110132159, + "grad_norm": 1.6361942373114873, + "learning_rate": 6.745350547818307e-07, + "loss": 0.663591742515564, + "step": 5043 + }, + { + "epoch": 1.7774449339207048, + "grad_norm": 2.0460511379273716, + "learning_rate": 6.724330180643824e-07, + "loss": 0.6025142669677734, + "step": 5044 + }, + { + "epoch": 1.7777973568281937, + "grad_norm": 1.6332878492082579, + "learning_rate": 6.703341477656422e-07, + "loss": 0.5704027414321899, + "step": 5045 + }, + { + "epoch": 1.7781497797356827, + "grad_norm": 2.0053343984683534, + "learning_rate": 6.682384445981071e-07, + "loss": 0.6518473625183105, + "step": 5046 + }, + { + "epoch": 1.7785022026431718, + "grad_norm": 1.6878153153712165, + "learning_rate": 6.661459092732037e-07, + "loss": 0.5547574758529663, + "step": 5047 + }, + { + "epoch": 1.7788546255506608, + "grad_norm": 1.8096814000573205, + "learning_rate": 6.640565425012846e-07, + "loss": 0.6248831748962402, + "step": 5048 + }, + { + "epoch": 1.77920704845815, + "grad_norm": 1.8747085080187502, + "learning_rate": 6.619703449916259e-07, + "loss": 0.5899701118469238, + "step": 5049 + }, + { + "epoch": 1.7795594713656389, + "grad_norm": 1.9253293216058311, + "learning_rate": 6.598873174524223e-07, + "loss": 0.41864174604415894, + "step": 5050 + }, + { + "epoch": 1.7799118942731278, + "grad_norm": 2.2457701854009025, + "learning_rate": 6.578074605908002e-07, + "loss": 0.7473436594009399, + "step": 5051 + }, + { + "epoch": 1.7802643171806167, + "grad_norm": 1.6599111795216646, + "learning_rate": 6.557307751128051e-07, + "loss": 0.49480879306793213, + "step": 5052 + }, + { + "epoch": 1.7806167400881057, + "grad_norm": 1.8257078701065834, + "learning_rate": 6.536572617234082e-07, + "loss": 0.5619323253631592, + "step": 5053 + }, + { + "epoch": 1.7809691629955946, + "grad_norm": 1.8566139978409217, + "learning_rate": 6.515869211265013e-07, + "loss": 0.5271984338760376, + "step": 5054 + }, + { + "epoch": 1.7813215859030835, + "grad_norm": 1.967436768949709, + "learning_rate": 6.495197540248999e-07, + "loss": 0.6544383764266968, + "step": 5055 + }, + { + "epoch": 1.7816740088105727, + "grad_norm": 2.157946298106486, + "learning_rate": 6.474557611203458e-07, + "loss": 0.6525388956069946, + "step": 5056 + }, + { + "epoch": 1.7820264317180616, + "grad_norm": 2.0314482863762735, + "learning_rate": 6.453949431134987e-07, + "loss": 0.5509910583496094, + "step": 5057 + }, + { + "epoch": 1.7823788546255508, + "grad_norm": 1.6067790596532618, + "learning_rate": 6.433373007039412e-07, + "loss": 0.5030776262283325, + "step": 5058 + }, + { + "epoch": 1.7827312775330397, + "grad_norm": 1.875686429811456, + "learning_rate": 6.412828345901811e-07, + "loss": 0.6743696331977844, + "step": 5059 + }, + { + "epoch": 1.7830837004405287, + "grad_norm": 1.9399780429001139, + "learning_rate": 6.392315454696452e-07, + "loss": 0.5395437479019165, + "step": 5060 + }, + { + "epoch": 1.7834361233480176, + "grad_norm": 1.7657846282567238, + "learning_rate": 6.371834340386807e-07, + "loss": 0.5773402452468872, + "step": 5061 + }, + { + "epoch": 1.7837885462555065, + "grad_norm": 1.920136830142019, + "learning_rate": 6.351385009925582e-07, + "loss": 0.6014268398284912, + "step": 5062 + }, + { + "epoch": 1.7841409691629955, + "grad_norm": 1.9465884411051106, + "learning_rate": 6.33096747025469e-07, + "loss": 0.5519139170646667, + "step": 5063 + }, + { + "epoch": 1.7844933920704846, + "grad_norm": 3.0085962631929752, + "learning_rate": 6.310581728305254e-07, + "loss": 0.5407502055168152, + "step": 5064 + }, + { + "epoch": 1.7848458149779736, + "grad_norm": 1.5371833099084395, + "learning_rate": 6.290227790997605e-07, + "loss": 0.61688232421875, + "step": 5065 + }, + { + "epoch": 1.7851982378854625, + "grad_norm": 2.002396471657761, + "learning_rate": 6.269905665241271e-07, + "loss": 0.5212849974632263, + "step": 5066 + }, + { + "epoch": 1.7855506607929517, + "grad_norm": 1.7684490871986807, + "learning_rate": 6.249615357934968e-07, + "loss": 0.6827710866928101, + "step": 5067 + }, + { + "epoch": 1.7859030837004406, + "grad_norm": 2.016669351586175, + "learning_rate": 6.22935687596663e-07, + "loss": 0.6907633543014526, + "step": 5068 + }, + { + "epoch": 1.7862555066079295, + "grad_norm": 2.045834595721204, + "learning_rate": 6.209130226213378e-07, + "loss": 0.5707769989967346, + "step": 5069 + }, + { + "epoch": 1.7866079295154185, + "grad_norm": 1.9432188628486171, + "learning_rate": 6.188935415541541e-07, + "loss": 0.6062690019607544, + "step": 5070 + }, + { + "epoch": 1.7869603524229074, + "grad_norm": 1.8744219034756735, + "learning_rate": 6.168772450806604e-07, + "loss": 0.5291163921356201, + "step": 5071 + }, + { + "epoch": 1.7873127753303963, + "grad_norm": 1.8892054954511246, + "learning_rate": 6.148641338853301e-07, + "loss": 0.6324198246002197, + "step": 5072 + }, + { + "epoch": 1.7876651982378855, + "grad_norm": 1.7030219876612867, + "learning_rate": 6.128542086515499e-07, + "loss": 0.5516111850738525, + "step": 5073 + }, + { + "epoch": 1.7880176211453744, + "grad_norm": 2.1800478368143232, + "learning_rate": 6.108474700616263e-07, + "loss": 0.6384079456329346, + "step": 5074 + }, + { + "epoch": 1.7883700440528636, + "grad_norm": 1.777234944410244, + "learning_rate": 6.088439187967865e-07, + "loss": 0.5699876546859741, + "step": 5075 + }, + { + "epoch": 1.7887224669603525, + "grad_norm": 2.081274535023766, + "learning_rate": 6.06843555537171e-07, + "loss": 0.6068697571754456, + "step": 5076 + }, + { + "epoch": 1.7890748898678415, + "grad_norm": 2.1233392160842066, + "learning_rate": 6.048463809618444e-07, + "loss": 0.6254304647445679, + "step": 5077 + }, + { + "epoch": 1.7894273127753304, + "grad_norm": 2.0059926594667914, + "learning_rate": 6.02852395748782e-07, + "loss": 0.6779477596282959, + "step": 5078 + }, + { + "epoch": 1.7897797356828193, + "grad_norm": 1.8024145072939486, + "learning_rate": 6.008616005748802e-07, + "loss": 0.6139817833900452, + "step": 5079 + }, + { + "epoch": 1.7901321585903083, + "grad_norm": 2.042935872875493, + "learning_rate": 5.988739961159539e-07, + "loss": 0.553310215473175, + "step": 5080 + }, + { + "epoch": 1.7904845814977972, + "grad_norm": 1.9543566497010472, + "learning_rate": 5.968895830467325e-07, + "loss": 0.6093542575836182, + "step": 5081 + }, + { + "epoch": 1.7908370044052864, + "grad_norm": 1.8231021161772492, + "learning_rate": 5.949083620408614e-07, + "loss": 0.6224432587623596, + "step": 5082 + }, + { + "epoch": 1.7911894273127753, + "grad_norm": 1.881995664144807, + "learning_rate": 5.929303337709047e-07, + "loss": 0.6155597567558289, + "step": 5083 + }, + { + "epoch": 1.7915418502202645, + "grad_norm": 1.7127795559170356, + "learning_rate": 5.909554989083411e-07, + "loss": 0.5742098093032837, + "step": 5084 + }, + { + "epoch": 1.7918942731277534, + "grad_norm": 2.1579790645115886, + "learning_rate": 5.889838581235641e-07, + "loss": 0.7427949905395508, + "step": 5085 + }, + { + "epoch": 1.7922466960352423, + "grad_norm": 1.8686834683482023, + "learning_rate": 5.870154120858851e-07, + "loss": 0.48208528757095337, + "step": 5086 + }, + { + "epoch": 1.7925991189427313, + "grad_norm": 2.103622298674757, + "learning_rate": 5.850501614635318e-07, + "loss": 0.48402148485183716, + "step": 5087 + }, + { + "epoch": 1.7929515418502202, + "grad_norm": 1.9085757415865392, + "learning_rate": 5.83088106923646e-07, + "loss": 0.6808921694755554, + "step": 5088 + }, + { + "epoch": 1.7933039647577091, + "grad_norm": 1.4851842618773352, + "learning_rate": 5.811292491322795e-07, + "loss": 0.48358428478240967, + "step": 5089 + }, + { + "epoch": 1.793656387665198, + "grad_norm": 1.801328000774117, + "learning_rate": 5.791735887544081e-07, + "loss": 0.6492827534675598, + "step": 5090 + }, + { + "epoch": 1.7940088105726872, + "grad_norm": 1.542873674028149, + "learning_rate": 5.772211264539162e-07, + "loss": 0.5453791618347168, + "step": 5091 + }, + { + "epoch": 1.7943612334801762, + "grad_norm": 1.780642500081645, + "learning_rate": 5.75271862893605e-07, + "loss": 0.5901151895523071, + "step": 5092 + }, + { + "epoch": 1.7947136563876653, + "grad_norm": 2.0888993209852664, + "learning_rate": 5.73325798735187e-07, + "loss": 0.616302490234375, + "step": 5093 + }, + { + "epoch": 1.7950660792951543, + "grad_norm": 1.7666548150635142, + "learning_rate": 5.713829346392907e-07, + "loss": 0.616886556148529, + "step": 5094 + }, + { + "epoch": 1.7954185022026432, + "grad_norm": 2.1253066780397725, + "learning_rate": 5.694432712654597e-07, + "loss": 0.5552375316619873, + "step": 5095 + }, + { + "epoch": 1.7957709251101321, + "grad_norm": 1.9305053090727797, + "learning_rate": 5.675068092721491e-07, + "loss": 0.5956143736839294, + "step": 5096 + }, + { + "epoch": 1.796123348017621, + "grad_norm": 2.0198097994194675, + "learning_rate": 5.655735493167247e-07, + "loss": 0.5870288610458374, + "step": 5097 + }, + { + "epoch": 1.79647577092511, + "grad_norm": 1.737470684820577, + "learning_rate": 5.636434920554701e-07, + "loss": 0.5325669646263123, + "step": 5098 + }, + { + "epoch": 1.7968281938325992, + "grad_norm": 1.9881595702868853, + "learning_rate": 5.617166381435813e-07, + "loss": 0.5931425094604492, + "step": 5099 + }, + { + "epoch": 1.797180616740088, + "grad_norm": 1.9607916445612916, + "learning_rate": 5.597929882351627e-07, + "loss": 0.5755603313446045, + "step": 5100 + }, + { + "epoch": 1.797533039647577, + "grad_norm": 2.000480246693455, + "learning_rate": 5.578725429832344e-07, + "loss": 0.5780980587005615, + "step": 5101 + }, + { + "epoch": 1.7978854625550662, + "grad_norm": 1.9982279321373282, + "learning_rate": 5.559553030397258e-07, + "loss": 0.5863890647888184, + "step": 5102 + }, + { + "epoch": 1.7982378854625551, + "grad_norm": 1.8196971349794717, + "learning_rate": 5.540412690554842e-07, + "loss": 0.5577390789985657, + "step": 5103 + }, + { + "epoch": 1.798590308370044, + "grad_norm": 1.773628551628446, + "learning_rate": 5.521304416802642e-07, + "loss": 0.5994857549667358, + "step": 5104 + }, + { + "epoch": 1.798942731277533, + "grad_norm": 1.8364843823531443, + "learning_rate": 5.502228215627281e-07, + "loss": 0.6065348982810974, + "step": 5105 + }, + { + "epoch": 1.799295154185022, + "grad_norm": 1.9447341697044171, + "learning_rate": 5.483184093504568e-07, + "loss": 0.5390498638153076, + "step": 5106 + }, + { + "epoch": 1.7996475770925109, + "grad_norm": 1.9731136151561257, + "learning_rate": 5.464172056899364e-07, + "loss": 0.5826783180236816, + "step": 5107 + }, + { + "epoch": 1.8, + "grad_norm": 1.7733740837200977, + "learning_rate": 5.445192112265718e-07, + "loss": 0.5429874658584595, + "step": 5108 + }, + { + "epoch": 1.800352422907489, + "grad_norm": 1.8521585290179927, + "learning_rate": 5.426244266046676e-07, + "loss": 0.5591466426849365, + "step": 5109 + }, + { + "epoch": 1.8007048458149781, + "grad_norm": 1.6996794293630604, + "learning_rate": 5.407328524674449e-07, + "loss": 0.5351911187171936, + "step": 5110 + }, + { + "epoch": 1.801057268722467, + "grad_norm": 1.9525068150093072, + "learning_rate": 5.388444894570378e-07, + "loss": 0.6095720529556274, + "step": 5111 + }, + { + "epoch": 1.801409691629956, + "grad_norm": 1.9048124225268466, + "learning_rate": 5.369593382144844e-07, + "loss": 0.6278849840164185, + "step": 5112 + }, + { + "epoch": 1.801762114537445, + "grad_norm": 1.932605893192458, + "learning_rate": 5.350773993797332e-07, + "loss": 0.6787056922912598, + "step": 5113 + }, + { + "epoch": 1.8021145374449339, + "grad_norm": 1.7901749162387552, + "learning_rate": 5.331986735916461e-07, + "loss": 0.6054684519767761, + "step": 5114 + }, + { + "epoch": 1.8024669603524228, + "grad_norm": 1.9918768270140568, + "learning_rate": 5.31323161487991e-07, + "loss": 0.5039973855018616, + "step": 5115 + }, + { + "epoch": 1.8028193832599118, + "grad_norm": 2.1203502988203207, + "learning_rate": 5.294508637054474e-07, + "loss": 0.6306504011154175, + "step": 5116 + }, + { + "epoch": 1.803171806167401, + "grad_norm": 1.5433818431075417, + "learning_rate": 5.275817808796013e-07, + "loss": 0.5654761791229248, + "step": 5117 + }, + { + "epoch": 1.8035242290748899, + "grad_norm": 1.84553610812893, + "learning_rate": 5.257159136449452e-07, + "loss": 0.5801905989646912, + "step": 5118 + }, + { + "epoch": 1.803876651982379, + "grad_norm": 1.9190330109285871, + "learning_rate": 5.238532626348891e-07, + "loss": 0.6565619707107544, + "step": 5119 + }, + { + "epoch": 1.804229074889868, + "grad_norm": 2.043183915925982, + "learning_rate": 5.219938284817416e-07, + "loss": 0.5923253297805786, + "step": 5120 + }, + { + "epoch": 1.8045814977973569, + "grad_norm": 2.0522176560055647, + "learning_rate": 5.2013761181672e-07, + "loss": 0.6697949171066284, + "step": 5121 + }, + { + "epoch": 1.8049339207048458, + "grad_norm": 1.5694231089682613, + "learning_rate": 5.182846132699571e-07, + "loss": 0.5146230459213257, + "step": 5122 + }, + { + "epoch": 1.8052863436123348, + "grad_norm": 1.8882278421308176, + "learning_rate": 5.16434833470485e-07, + "loss": 0.5928882360458374, + "step": 5123 + }, + { + "epoch": 1.8056387665198237, + "grad_norm": 1.8209325836560148, + "learning_rate": 5.145882730462481e-07, + "loss": 0.6114771366119385, + "step": 5124 + }, + { + "epoch": 1.8059911894273126, + "grad_norm": 2.0596769025893122, + "learning_rate": 5.127449326240952e-07, + "loss": 0.6624642014503479, + "step": 5125 + }, + { + "epoch": 1.8063436123348018, + "grad_norm": 1.6177669824438379, + "learning_rate": 5.109048128297822e-07, + "loss": 0.6277980208396912, + "step": 5126 + }, + { + "epoch": 1.8066960352422907, + "grad_norm": 1.8432956331440709, + "learning_rate": 5.090679142879751e-07, + "loss": 0.6470246911048889, + "step": 5127 + }, + { + "epoch": 1.8070484581497799, + "grad_norm": 1.9361376318593135, + "learning_rate": 5.072342376222438e-07, + "loss": 0.6418337821960449, + "step": 5128 + }, + { + "epoch": 1.8074008810572688, + "grad_norm": 1.7303831881097942, + "learning_rate": 5.054037834550596e-07, + "loss": 0.6013847589492798, + "step": 5129 + }, + { + "epoch": 1.8077533039647578, + "grad_norm": 2.0870369514809086, + "learning_rate": 5.035765524078095e-07, + "loss": 0.5354605913162231, + "step": 5130 + }, + { + "epoch": 1.8081057268722467, + "grad_norm": 1.7245482885328716, + "learning_rate": 5.01752545100781e-07, + "loss": 0.6017459034919739, + "step": 5131 + }, + { + "epoch": 1.8084581497797356, + "grad_norm": 2.1853671040659335, + "learning_rate": 4.999317621531663e-07, + "loss": 0.5929696559906006, + "step": 5132 + }, + { + "epoch": 1.8088105726872246, + "grad_norm": 2.1106102623060723, + "learning_rate": 4.981142041830645e-07, + "loss": 0.6444251537322998, + "step": 5133 + }, + { + "epoch": 1.8091629955947135, + "grad_norm": 1.9231094224982612, + "learning_rate": 4.962998718074807e-07, + "loss": 0.5854116678237915, + "step": 5134 + }, + { + "epoch": 1.8095154185022027, + "grad_norm": 1.674252446757184, + "learning_rate": 4.944887656423248e-07, + "loss": 0.5145394206047058, + "step": 5135 + }, + { + "epoch": 1.8098678414096916, + "grad_norm": 1.9221197947181823, + "learning_rate": 4.926808863024102e-07, + "loss": 0.5733104348182678, + "step": 5136 + }, + { + "epoch": 1.8102202643171807, + "grad_norm": 1.955048282910108, + "learning_rate": 4.908762344014573e-07, + "loss": 0.5925072431564331, + "step": 5137 + }, + { + "epoch": 1.8105726872246697, + "grad_norm": 1.8754640994406597, + "learning_rate": 4.890748105520859e-07, + "loss": 0.5346912145614624, + "step": 5138 + }, + { + "epoch": 1.8109251101321586, + "grad_norm": 1.636475505756285, + "learning_rate": 4.87276615365827e-07, + "loss": 0.6206755638122559, + "step": 5139 + }, + { + "epoch": 1.8112775330396476, + "grad_norm": 2.0734228349073076, + "learning_rate": 4.854816494531089e-07, + "loss": 0.5998660326004028, + "step": 5140 + }, + { + "epoch": 1.8116299559471365, + "grad_norm": 2.10222956499389, + "learning_rate": 4.836899134232687e-07, + "loss": 0.44545644521713257, + "step": 5141 + }, + { + "epoch": 1.8119823788546254, + "grad_norm": 1.904050289597462, + "learning_rate": 4.81901407884543e-07, + "loss": 0.701204776763916, + "step": 5142 + }, + { + "epoch": 1.8123348017621146, + "grad_norm": 1.8707530799436762, + "learning_rate": 4.801161334440762e-07, + "loss": 0.6103897094726562, + "step": 5143 + }, + { + "epoch": 1.8126872246696035, + "grad_norm": 1.7727850982789193, + "learning_rate": 4.783340907079126e-07, + "loss": 0.5864719152450562, + "step": 5144 + }, + { + "epoch": 1.8130396475770925, + "grad_norm": 1.436946543481978, + "learning_rate": 4.7655528028099916e-07, + "loss": 0.46949082612991333, + "step": 5145 + }, + { + "epoch": 1.8133920704845816, + "grad_norm": 1.9729708472080463, + "learning_rate": 4.7477970276718855e-07, + "loss": 0.6371885538101196, + "step": 5146 + }, + { + "epoch": 1.8137444933920706, + "grad_norm": 2.043577546107911, + "learning_rate": 4.730073587692319e-07, + "loss": 0.6819220781326294, + "step": 5147 + }, + { + "epoch": 1.8140969162995595, + "grad_norm": 1.7501541102560871, + "learning_rate": 4.712382488887868e-07, + "loss": 0.5230735540390015, + "step": 5148 + }, + { + "epoch": 1.8144493392070484, + "grad_norm": 1.6629154647812032, + "learning_rate": 4.6947237372640954e-07, + "loss": 0.5194997787475586, + "step": 5149 + }, + { + "epoch": 1.8148017621145374, + "grad_norm": 2.6396803493511842, + "learning_rate": 4.677097338815595e-07, + "loss": 0.6025055050849915, + "step": 5150 + }, + { + "epoch": 1.8151541850220263, + "grad_norm": 1.9158428969793393, + "learning_rate": 4.6595032995260135e-07, + "loss": 0.649467945098877, + "step": 5151 + }, + { + "epoch": 1.8155066079295155, + "grad_norm": 1.8951471308172565, + "learning_rate": 4.641941625367918e-07, + "loss": 0.5216347575187683, + "step": 5152 + }, + { + "epoch": 1.8158590308370044, + "grad_norm": 2.264572307408149, + "learning_rate": 4.6244123223030177e-07, + "loss": 0.5135647058486938, + "step": 5153 + }, + { + "epoch": 1.8162114537444936, + "grad_norm": 1.8178771999892822, + "learning_rate": 4.6069153962819193e-07, + "loss": 0.5526058673858643, + "step": 5154 + }, + { + "epoch": 1.8165638766519825, + "grad_norm": 2.050533288883353, + "learning_rate": 4.589450853244315e-07, + "loss": 0.5897486209869385, + "step": 5155 + }, + { + "epoch": 1.8169162995594714, + "grad_norm": 1.8009014119109743, + "learning_rate": 4.5720186991188517e-07, + "loss": 0.5698407888412476, + "step": 5156 + }, + { + "epoch": 1.8172687224669604, + "grad_norm": 1.7954864355128493, + "learning_rate": 4.5546189398232075e-07, + "loss": 0.579573392868042, + "step": 5157 + }, + { + "epoch": 1.8176211453744493, + "grad_norm": 1.7473651992455344, + "learning_rate": 4.5372515812640573e-07, + "loss": 0.41852182149887085, + "step": 5158 + }, + { + "epoch": 1.8179735682819382, + "grad_norm": 1.7056493552996725, + "learning_rate": 4.519916629337107e-07, + "loss": 0.6081204414367676, + "step": 5159 + }, + { + "epoch": 1.8183259911894272, + "grad_norm": 2.046109798166009, + "learning_rate": 4.502614089926982e-07, + "loss": 0.5725652575492859, + "step": 5160 + }, + { + "epoch": 1.8186784140969163, + "grad_norm": 1.7147916989755474, + "learning_rate": 4.4853439689073965e-07, + "loss": 0.5109303593635559, + "step": 5161 + }, + { + "epoch": 1.8190308370044053, + "grad_norm": 1.8721629996812361, + "learning_rate": 4.468106272141004e-07, + "loss": 0.5647833347320557, + "step": 5162 + }, + { + "epoch": 1.8193832599118944, + "grad_norm": 1.8784402680779348, + "learning_rate": 4.450901005479469e-07, + "loss": 0.6074738502502441, + "step": 5163 + }, + { + "epoch": 1.8197356828193834, + "grad_norm": 1.9135972387212516, + "learning_rate": 4.433728174763452e-07, + "loss": 0.647289514541626, + "step": 5164 + }, + { + "epoch": 1.8200881057268723, + "grad_norm": 2.08976454113542, + "learning_rate": 4.416587785822568e-07, + "loss": 0.5817590951919556, + "step": 5165 + }, + { + "epoch": 1.8204405286343612, + "grad_norm": 2.105714289057314, + "learning_rate": 4.399479844475485e-07, + "loss": 0.6483672857284546, + "step": 5166 + }, + { + "epoch": 1.8207929515418502, + "grad_norm": 1.9562649517319024, + "learning_rate": 4.382404356529801e-07, + "loss": 0.5439441204071045, + "step": 5167 + }, + { + "epoch": 1.821145374449339, + "grad_norm": 1.8467126365486348, + "learning_rate": 4.3653613277820804e-07, + "loss": 0.5835710167884827, + "step": 5168 + }, + { + "epoch": 1.821497797356828, + "grad_norm": 1.9450074521030982, + "learning_rate": 4.3483507640179503e-07, + "loss": 0.7024152874946594, + "step": 5169 + }, + { + "epoch": 1.8218502202643172, + "grad_norm": 1.880332916659811, + "learning_rate": 4.331372671011935e-07, + "loss": 0.5223513841629028, + "step": 5170 + }, + { + "epoch": 1.8222026431718061, + "grad_norm": 2.771814545513559, + "learning_rate": 4.3144270545275814e-07, + "loss": 0.5975688099861145, + "step": 5171 + }, + { + "epoch": 1.8225550660792953, + "grad_norm": 1.5329834705964882, + "learning_rate": 4.2975139203173977e-07, + "loss": 0.5459109544754028, + "step": 5172 + }, + { + "epoch": 1.8229074889867842, + "grad_norm": 1.8202354421886453, + "learning_rate": 4.2806332741228586e-07, + "loss": 0.6155862808227539, + "step": 5173 + }, + { + "epoch": 1.8232599118942732, + "grad_norm": 2.2226946714753644, + "learning_rate": 4.263785121674435e-07, + "loss": 0.6505374908447266, + "step": 5174 + }, + { + "epoch": 1.823612334801762, + "grad_norm": 1.9153455724722082, + "learning_rate": 4.246969468691553e-07, + "loss": 0.5243734121322632, + "step": 5175 + }, + { + "epoch": 1.823964757709251, + "grad_norm": 1.8732488601912396, + "learning_rate": 4.2301863208825676e-07, + "loss": 0.6931817531585693, + "step": 5176 + }, + { + "epoch": 1.82431718061674, + "grad_norm": 1.969859922329015, + "learning_rate": 4.2134356839448665e-07, + "loss": 0.5312765836715698, + "step": 5177 + }, + { + "epoch": 1.824669603524229, + "grad_norm": 1.9404158745446412, + "learning_rate": 4.1967175635647674e-07, + "loss": 0.598992109298706, + "step": 5178 + }, + { + "epoch": 1.825022026431718, + "grad_norm": 1.7631344780586065, + "learning_rate": 4.1800319654175413e-07, + "loss": 0.5844708681106567, + "step": 5179 + }, + { + "epoch": 1.825374449339207, + "grad_norm": 1.9995354508958225, + "learning_rate": 4.1633788951674357e-07, + "loss": 0.5884612798690796, + "step": 5180 + }, + { + "epoch": 1.8257268722466962, + "grad_norm": 1.72810410086028, + "learning_rate": 4.1467583584676395e-07, + "loss": 0.6038404107093811, + "step": 5181 + }, + { + "epoch": 1.826079295154185, + "grad_norm": 2.339259211755874, + "learning_rate": 4.130170360960317e-07, + "loss": 0.6511296033859253, + "step": 5182 + }, + { + "epoch": 1.826431718061674, + "grad_norm": 1.925197944351106, + "learning_rate": 4.113614908276609e-07, + "loss": 0.5884404182434082, + "step": 5183 + }, + { + "epoch": 1.826784140969163, + "grad_norm": 1.731239361884253, + "learning_rate": 4.097092006036507e-07, + "loss": 0.5549901723861694, + "step": 5184 + }, + { + "epoch": 1.827136563876652, + "grad_norm": 1.994782951411243, + "learning_rate": 4.0806016598490707e-07, + "loss": 0.561951756477356, + "step": 5185 + }, + { + "epoch": 1.8274889867841408, + "grad_norm": 1.869408348764558, + "learning_rate": 4.064143875312254e-07, + "loss": 0.6412413120269775, + "step": 5186 + }, + { + "epoch": 1.82784140969163, + "grad_norm": 1.6798143654231001, + "learning_rate": 4.0477186580129447e-07, + "loss": 0.6295674443244934, + "step": 5187 + }, + { + "epoch": 1.828193832599119, + "grad_norm": 1.6293958799120483, + "learning_rate": 4.031326013527015e-07, + "loss": 0.6700723767280579, + "step": 5188 + }, + { + "epoch": 1.8285462555066079, + "grad_norm": 1.8215522719850648, + "learning_rate": 4.014965947419236e-07, + "loss": 0.5758254528045654, + "step": 5189 + }, + { + "epoch": 1.828898678414097, + "grad_norm": 1.9932829475641192, + "learning_rate": 3.9986384652433654e-07, + "loss": 0.6663509607315063, + "step": 5190 + }, + { + "epoch": 1.829251101321586, + "grad_norm": 1.9935453293677252, + "learning_rate": 3.982343572542069e-07, + "loss": 0.6459337472915649, + "step": 5191 + }, + { + "epoch": 1.829603524229075, + "grad_norm": 1.854876606446137, + "learning_rate": 3.9660812748469336e-07, + "loss": 0.6411766409873962, + "step": 5192 + }, + { + "epoch": 1.8299559471365638, + "grad_norm": 2.1651745240120976, + "learning_rate": 3.9498515776785207e-07, + "loss": 0.711888313293457, + "step": 5193 + }, + { + "epoch": 1.8303083700440528, + "grad_norm": 2.2389356684810284, + "learning_rate": 3.933654486546312e-07, + "loss": 0.63288813829422, + "step": 5194 + }, + { + "epoch": 1.8306607929515417, + "grad_norm": 1.9048245223498055, + "learning_rate": 3.9174900069486985e-07, + "loss": 0.6330822706222534, + "step": 5195 + }, + { + "epoch": 1.8310132158590309, + "grad_norm": 2.0831179708663154, + "learning_rate": 3.901358144373035e-07, + "loss": 0.7242149114608765, + "step": 5196 + }, + { + "epoch": 1.8313656387665198, + "grad_norm": 1.8790323108631095, + "learning_rate": 3.885258904295575e-07, + "loss": 0.6741703748703003, + "step": 5197 + }, + { + "epoch": 1.831718061674009, + "grad_norm": 1.9200909143991698, + "learning_rate": 3.8691922921815226e-07, + "loss": 0.625057578086853, + "step": 5198 + }, + { + "epoch": 1.832070484581498, + "grad_norm": 2.457846968244059, + "learning_rate": 3.853158313484995e-07, + "loss": 0.673669159412384, + "step": 5199 + }, + { + "epoch": 1.8324229074889868, + "grad_norm": 1.7310768756301407, + "learning_rate": 3.837156973648992e-07, + "loss": 0.5981203317642212, + "step": 5200 + }, + { + "epoch": 1.8327753303964758, + "grad_norm": 2.2560941225086992, + "learning_rate": 3.821188278105514e-07, + "loss": 0.6577199697494507, + "step": 5201 + }, + { + "epoch": 1.8331277533039647, + "grad_norm": 1.8570769012933126, + "learning_rate": 3.805252232275414e-07, + "loss": 0.6951043605804443, + "step": 5202 + }, + { + "epoch": 1.8334801762114536, + "grad_norm": 1.874325920944958, + "learning_rate": 3.7893488415684964e-07, + "loss": 0.572435200214386, + "step": 5203 + }, + { + "epoch": 1.8338325991189426, + "grad_norm": 1.7906206085216059, + "learning_rate": 3.773478111383455e-07, + "loss": 0.5849496126174927, + "step": 5204 + }, + { + "epoch": 1.8341850220264317, + "grad_norm": 1.9908368337543014, + "learning_rate": 3.7576400471079023e-07, + "loss": 0.5380967855453491, + "step": 5205 + }, + { + "epoch": 1.8345374449339207, + "grad_norm": 1.7322293442190257, + "learning_rate": 3.7418346541183923e-07, + "loss": 0.5681222677230835, + "step": 5206 + }, + { + "epoch": 1.8348898678414098, + "grad_norm": 1.7551676131968534, + "learning_rate": 3.7260619377803677e-07, + "loss": 0.5012099146842957, + "step": 5207 + }, + { + "epoch": 1.8352422907488988, + "grad_norm": 1.9889231090545432, + "learning_rate": 3.710321903448133e-07, + "loss": 0.6175205707550049, + "step": 5208 + }, + { + "epoch": 1.8355947136563877, + "grad_norm": 2.0658320822662137, + "learning_rate": 3.6946145564649817e-07, + "loss": 0.6190954446792603, + "step": 5209 + }, + { + "epoch": 1.8359471365638766, + "grad_norm": 2.067936609981899, + "learning_rate": 3.678939902163048e-07, + "loss": 0.6820691823959351, + "step": 5210 + }, + { + "epoch": 1.8362995594713656, + "grad_norm": 1.6116358163190896, + "learning_rate": 3.6632979458633867e-07, + "loss": 0.5309683084487915, + "step": 5211 + }, + { + "epoch": 1.8366519823788545, + "grad_norm": 1.7416007879814253, + "learning_rate": 3.6476886928759726e-07, + "loss": 0.5110820531845093, + "step": 5212 + }, + { + "epoch": 1.8370044052863435, + "grad_norm": 1.723221372899004, + "learning_rate": 3.6321121484996447e-07, + "loss": 0.6226333975791931, + "step": 5213 + }, + { + "epoch": 1.8373568281938326, + "grad_norm": 2.234178040191492, + "learning_rate": 3.6165683180221735e-07, + "loss": 0.6287777423858643, + "step": 5214 + }, + { + "epoch": 1.8377092511013216, + "grad_norm": 1.9295755553308827, + "learning_rate": 3.601057206720182e-07, + "loss": 0.7033661603927612, + "step": 5215 + }, + { + "epoch": 1.8380616740088107, + "grad_norm": 2.3805238150126473, + "learning_rate": 3.5855788198592257e-07, + "loss": 0.5841168165206909, + "step": 5216 + }, + { + "epoch": 1.8384140969162996, + "grad_norm": 1.9475866760038651, + "learning_rate": 3.570133162693734e-07, + "loss": 0.6797176599502563, + "step": 5217 + }, + { + "epoch": 1.8387665198237886, + "grad_norm": 1.8282916435885754, + "learning_rate": 3.5547202404670246e-07, + "loss": 0.4317880868911743, + "step": 5218 + }, + { + "epoch": 1.8391189427312775, + "grad_norm": 1.8334146730463823, + "learning_rate": 3.5393400584113004e-07, + "loss": 0.4757443368434906, + "step": 5219 + }, + { + "epoch": 1.8394713656387665, + "grad_norm": 1.907804753373484, + "learning_rate": 3.5239926217476627e-07, + "loss": 0.6341856718063354, + "step": 5220 + }, + { + "epoch": 1.8398237885462554, + "grad_norm": 1.8320811149781473, + "learning_rate": 3.5086779356860777e-07, + "loss": 0.5401504039764404, + "step": 5221 + }, + { + "epoch": 1.8401762114537445, + "grad_norm": 1.9485378653698677, + "learning_rate": 3.4933960054254314e-07, + "loss": 0.507185697555542, + "step": 5222 + }, + { + "epoch": 1.8405286343612335, + "grad_norm": 1.8475072625751607, + "learning_rate": 3.478146836153418e-07, + "loss": 0.544599175453186, + "step": 5223 + }, + { + "epoch": 1.8408810572687224, + "grad_norm": 1.7516560167770228, + "learning_rate": 3.4629304330466964e-07, + "loss": 0.5231183767318726, + "step": 5224 + }, + { + "epoch": 1.8412334801762116, + "grad_norm": 1.9594972590005177, + "learning_rate": 3.447746801270746e-07, + "loss": 0.5505118370056152, + "step": 5225 + }, + { + "epoch": 1.8415859030837005, + "grad_norm": 1.8779318369867126, + "learning_rate": 3.432595945979944e-07, + "loss": 0.6056097149848938, + "step": 5226 + }, + { + "epoch": 1.8419383259911895, + "grad_norm": 2.1828814894071806, + "learning_rate": 3.4174778723175204e-07, + "loss": 0.6292518377304077, + "step": 5227 + }, + { + "epoch": 1.8422907488986784, + "grad_norm": 2.121254282924953, + "learning_rate": 3.4023925854156035e-07, + "loss": 0.6821235418319702, + "step": 5228 + }, + { + "epoch": 1.8426431718061673, + "grad_norm": 1.8646887822875091, + "learning_rate": 3.3873400903951636e-07, + "loss": 0.6663388013839722, + "step": 5229 + }, + { + "epoch": 1.8429955947136563, + "grad_norm": 1.7699721471254064, + "learning_rate": 3.3723203923660795e-07, + "loss": 0.5283368825912476, + "step": 5230 + }, + { + "epoch": 1.8433480176211454, + "grad_norm": 1.8757843861417383, + "learning_rate": 3.35733349642704e-07, + "loss": 0.6193508505821228, + "step": 5231 + }, + { + "epoch": 1.8437004405286344, + "grad_norm": 1.8277200643148488, + "learning_rate": 3.3423794076656635e-07, + "loss": 0.5790667533874512, + "step": 5232 + }, + { + "epoch": 1.8440528634361235, + "grad_norm": 1.8773326611638317, + "learning_rate": 3.3274581311583786e-07, + "loss": 0.5774649381637573, + "step": 5233 + }, + { + "epoch": 1.8444052863436124, + "grad_norm": 1.8907427086265292, + "learning_rate": 3.312569671970489e-07, + "loss": 0.7818938493728638, + "step": 5234 + }, + { + "epoch": 1.8447577092511014, + "grad_norm": 1.9327729742836703, + "learning_rate": 3.297714035156174e-07, + "loss": 0.7140024900436401, + "step": 5235 + }, + { + "epoch": 1.8451101321585903, + "grad_norm": 1.8813227413168874, + "learning_rate": 3.2828912257584664e-07, + "loss": 0.526549220085144, + "step": 5236 + }, + { + "epoch": 1.8454625550660793, + "grad_norm": 1.7801884231788352, + "learning_rate": 3.268101248809219e-07, + "loss": 0.5497986078262329, + "step": 5237 + }, + { + "epoch": 1.8458149779735682, + "grad_norm": 1.8669723447216968, + "learning_rate": 3.2533441093292153e-07, + "loss": 0.587260901927948, + "step": 5238 + }, + { + "epoch": 1.8461674008810571, + "grad_norm": 1.7543011465942289, + "learning_rate": 3.238619812327992e-07, + "loss": 0.6064329147338867, + "step": 5239 + }, + { + "epoch": 1.8465198237885463, + "grad_norm": 1.6866654405083865, + "learning_rate": 3.22392836280403e-07, + "loss": 0.5427783727645874, + "step": 5240 + }, + { + "epoch": 1.8468722466960352, + "grad_norm": 2.007154381007414, + "learning_rate": 3.209269765744605e-07, + "loss": 0.6315155029296875, + "step": 5241 + }, + { + "epoch": 1.8472246696035244, + "grad_norm": 1.8683798567232428, + "learning_rate": 3.194644026125848e-07, + "loss": 0.47614991664886475, + "step": 5242 + }, + { + "epoch": 1.8475770925110133, + "grad_norm": 1.7870378472192856, + "learning_rate": 3.1800511489127553e-07, + "loss": 0.4671345353126526, + "step": 5243 + }, + { + "epoch": 1.8479295154185023, + "grad_norm": 2.1401583736619774, + "learning_rate": 3.1654911390591404e-07, + "loss": 0.5751510262489319, + "step": 5244 + }, + { + "epoch": 1.8482819383259912, + "grad_norm": 1.8052174793154305, + "learning_rate": 3.1509640015076946e-07, + "loss": 0.41024816036224365, + "step": 5245 + }, + { + "epoch": 1.8486343612334801, + "grad_norm": 1.731551636677765, + "learning_rate": 3.136469741189918e-07, + "loss": 0.5401195287704468, + "step": 5246 + }, + { + "epoch": 1.848986784140969, + "grad_norm": 1.653370854405324, + "learning_rate": 3.1220083630261413e-07, + "loss": 0.526515007019043, + "step": 5247 + }, + { + "epoch": 1.849339207048458, + "grad_norm": 1.8913718815401968, + "learning_rate": 3.1075798719255813e-07, + "loss": 0.5476140975952148, + "step": 5248 + }, + { + "epoch": 1.8496916299559472, + "grad_norm": 1.8985078398075201, + "learning_rate": 3.093184272786254e-07, + "loss": 0.5542911291122437, + "step": 5249 + }, + { + "epoch": 1.850044052863436, + "grad_norm": 1.880723497688654, + "learning_rate": 3.078821570495005e-07, + "loss": 0.5147569179534912, + "step": 5250 + }, + { + "epoch": 1.8503964757709253, + "grad_norm": 1.982026450369604, + "learning_rate": 3.0644917699275355e-07, + "loss": 0.5774611830711365, + "step": 5251 + }, + { + "epoch": 1.8507488986784142, + "grad_norm": 1.7200421440570042, + "learning_rate": 3.0501948759483646e-07, + "loss": 0.6516300439834595, + "step": 5252 + }, + { + "epoch": 1.8511013215859031, + "grad_norm": 2.0195950340864495, + "learning_rate": 3.0359308934108435e-07, + "loss": 0.7598013877868652, + "step": 5253 + }, + { + "epoch": 1.851453744493392, + "grad_norm": 2.0638022912417506, + "learning_rate": 3.0216998271571653e-07, + "loss": 0.5605336427688599, + "step": 5254 + }, + { + "epoch": 1.851806167400881, + "grad_norm": 2.028778763216705, + "learning_rate": 3.007501682018288e-07, + "loss": 0.6549514532089233, + "step": 5255 + }, + { + "epoch": 1.85215859030837, + "grad_norm": 2.059939172990393, + "learning_rate": 2.993336462814089e-07, + "loss": 0.5390901565551758, + "step": 5256 + }, + { + "epoch": 1.8525110132158589, + "grad_norm": 1.812559235788011, + "learning_rate": 2.979204174353201e-07, + "loss": 0.5039275884628296, + "step": 5257 + }, + { + "epoch": 1.852863436123348, + "grad_norm": 1.6793203683546194, + "learning_rate": 2.9651048214330956e-07, + "loss": 0.4715292453765869, + "step": 5258 + }, + { + "epoch": 1.853215859030837, + "grad_norm": 1.5445048853459802, + "learning_rate": 2.951038408840068e-07, + "loss": 0.4593687653541565, + "step": 5259 + }, + { + "epoch": 1.8535682819383261, + "grad_norm": 2.427211613937901, + "learning_rate": 2.9370049413492084e-07, + "loss": 0.8451346158981323, + "step": 5260 + }, + { + "epoch": 1.853920704845815, + "grad_norm": 1.796887553027914, + "learning_rate": 2.923004423724474e-07, + "loss": 0.5567130446434021, + "step": 5261 + }, + { + "epoch": 1.854273127753304, + "grad_norm": 1.6019285108338794, + "learning_rate": 2.909036860718595e-07, + "loss": 0.4740293622016907, + "step": 5262 + }, + { + "epoch": 1.854625550660793, + "grad_norm": 1.566732286884799, + "learning_rate": 2.895102257073101e-07, + "loss": 0.5279378294944763, + "step": 5263 + }, + { + "epoch": 1.8549779735682819, + "grad_norm": 2.0699049521167923, + "learning_rate": 2.881200617518387e-07, + "loss": 0.5977471470832825, + "step": 5264 + }, + { + "epoch": 1.8553303964757708, + "grad_norm": 2.147594228172352, + "learning_rate": 2.8673319467736104e-07, + "loss": 0.5385996699333191, + "step": 5265 + }, + { + "epoch": 1.85568281938326, + "grad_norm": 2.011382389323699, + "learning_rate": 2.85349624954675e-07, + "loss": 0.5702279806137085, + "step": 5266 + }, + { + "epoch": 1.856035242290749, + "grad_norm": 1.875774247263156, + "learning_rate": 2.839693530534604e-07, + "loss": 0.584097146987915, + "step": 5267 + }, + { + "epoch": 1.8563876651982378, + "grad_norm": 1.9561416110933127, + "learning_rate": 2.825923794422758e-07, + "loss": 0.6205782890319824, + "step": 5268 + }, + { + "epoch": 1.856740088105727, + "grad_norm": 1.8766933117628495, + "learning_rate": 2.8121870458856284e-07, + "loss": 0.5626852512359619, + "step": 5269 + }, + { + "epoch": 1.857092511013216, + "grad_norm": 1.826792073608219, + "learning_rate": 2.798483289586396e-07, + "loss": 0.6052513122558594, + "step": 5270 + }, + { + "epoch": 1.8574449339207049, + "grad_norm": 2.051566447554152, + "learning_rate": 2.7848125301770504e-07, + "loss": 0.5074095726013184, + "step": 5271 + }, + { + "epoch": 1.8577973568281938, + "grad_norm": 2.3608926664844705, + "learning_rate": 2.7711747722984127e-07, + "loss": 0.8006119728088379, + "step": 5272 + }, + { + "epoch": 1.8581497797356827, + "grad_norm": 1.939365874771501, + "learning_rate": 2.7575700205800694e-07, + "loss": 0.6437188982963562, + "step": 5273 + }, + { + "epoch": 1.8585022026431717, + "grad_norm": 2.070323156152843, + "learning_rate": 2.743998279640403e-07, + "loss": 0.6610177755355835, + "step": 5274 + }, + { + "epoch": 1.8588546255506608, + "grad_norm": 2.242727394045801, + "learning_rate": 2.7304595540865953e-07, + "loss": 0.6041977405548096, + "step": 5275 + }, + { + "epoch": 1.8592070484581498, + "grad_norm": 2.296252009493085, + "learning_rate": 2.716953848514625e-07, + "loss": 0.5684002041816711, + "step": 5276 + }, + { + "epoch": 1.859559471365639, + "grad_norm": 2.108426771462305, + "learning_rate": 2.703481167509281e-07, + "loss": 0.7256498336791992, + "step": 5277 + }, + { + "epoch": 1.8599118942731279, + "grad_norm": 1.959590007863519, + "learning_rate": 2.690041515644093e-07, + "loss": 0.7264266014099121, + "step": 5278 + }, + { + "epoch": 1.8602643171806168, + "grad_norm": 2.0027244373685047, + "learning_rate": 2.6766348974813895e-07, + "loss": 0.5427879095077515, + "step": 5279 + }, + { + "epoch": 1.8606167400881057, + "grad_norm": 1.679848534564951, + "learning_rate": 2.663261317572341e-07, + "loss": 0.5970745086669922, + "step": 5280 + }, + { + "epoch": 1.8609691629955947, + "grad_norm": 1.9989999209106484, + "learning_rate": 2.6499207804568495e-07, + "loss": 0.5796299576759338, + "step": 5281 + }, + { + "epoch": 1.8613215859030836, + "grad_norm": 1.6433355014728201, + "learning_rate": 2.6366132906635923e-07, + "loss": 0.4900246262550354, + "step": 5282 + }, + { + "epoch": 1.8616740088105725, + "grad_norm": 1.8937189873731617, + "learning_rate": 2.6233388527100777e-07, + "loss": 0.6052582263946533, + "step": 5283 + }, + { + "epoch": 1.8620264317180617, + "grad_norm": 2.1632344831004127, + "learning_rate": 2.610097471102524e-07, + "loss": 0.6908484697341919, + "step": 5284 + }, + { + "epoch": 1.8623788546255506, + "grad_norm": 1.9493448159947622, + "learning_rate": 2.596889150336024e-07, + "loss": 0.6353795528411865, + "step": 5285 + }, + { + "epoch": 1.8627312775330398, + "grad_norm": 2.019445353702499, + "learning_rate": 2.5837138948943354e-07, + "loss": 0.803575873374939, + "step": 5286 + }, + { + "epoch": 1.8630837004405287, + "grad_norm": 1.9882041113358364, + "learning_rate": 2.5705717092500694e-07, + "loss": 0.5551957488059998, + "step": 5287 + }, + { + "epoch": 1.8634361233480177, + "grad_norm": 1.9987103830633048, + "learning_rate": 2.5574625978646017e-07, + "loss": 0.6247879266738892, + "step": 5288 + }, + { + "epoch": 1.8637885462555066, + "grad_norm": 2.072117287811421, + "learning_rate": 2.544386565188062e-07, + "loss": 0.6029977798461914, + "step": 5289 + }, + { + "epoch": 1.8641409691629955, + "grad_norm": 2.101747258049668, + "learning_rate": 2.531343615659343e-07, + "loss": 0.611297070980072, + "step": 5290 + }, + { + "epoch": 1.8644933920704845, + "grad_norm": 2.1168170865355616, + "learning_rate": 2.518333753706137e-07, + "loss": 0.5290260314941406, + "step": 5291 + }, + { + "epoch": 1.8648458149779734, + "grad_norm": 1.88270236786552, + "learning_rate": 2.5053569837448664e-07, + "loss": 0.5988795757293701, + "step": 5292 + }, + { + "epoch": 1.8651982378854626, + "grad_norm": 2.1933893236783613, + "learning_rate": 2.4924133101807636e-07, + "loss": 0.671028733253479, + "step": 5293 + }, + { + "epoch": 1.8655506607929515, + "grad_norm": 2.195163128107634, + "learning_rate": 2.4795027374077905e-07, + "loss": 0.5741167664527893, + "step": 5294 + }, + { + "epoch": 1.8659030837004407, + "grad_norm": 1.8793688638635475, + "learning_rate": 2.4666252698086867e-07, + "loss": 0.47447216510772705, + "step": 5295 + }, + { + "epoch": 1.8662555066079296, + "grad_norm": 1.813537542020307, + "learning_rate": 2.453780911754955e-07, + "loss": 0.6535651087760925, + "step": 5296 + }, + { + "epoch": 1.8666079295154185, + "grad_norm": 1.830958965071389, + "learning_rate": 2.4409696676068517e-07, + "loss": 0.5928847193717957, + "step": 5297 + }, + { + "epoch": 1.8669603524229075, + "grad_norm": 2.1016696944101363, + "learning_rate": 2.428191541713387e-07, + "loss": 0.5928774476051331, + "step": 5298 + }, + { + "epoch": 1.8673127753303964, + "grad_norm": 1.8181831294339377, + "learning_rate": 2.415446538412358e-07, + "loss": 0.5798670053482056, + "step": 5299 + }, + { + "epoch": 1.8676651982378853, + "grad_norm": 1.8162014512536164, + "learning_rate": 2.4027346620302707e-07, + "loss": 0.6222843527793884, + "step": 5300 + }, + { + "epoch": 1.8680176211453743, + "grad_norm": 1.9183032685045331, + "learning_rate": 2.39005591688245e-07, + "loss": 0.5501612424850464, + "step": 5301 + }, + { + "epoch": 1.8683700440528634, + "grad_norm": 1.7621857286720093, + "learning_rate": 2.377410307272887e-07, + "loss": 0.5266422033309937, + "step": 5302 + }, + { + "epoch": 1.8687224669603524, + "grad_norm": 1.9926692528436012, + "learning_rate": 2.3647978374944037e-07, + "loss": 0.7145729064941406, + "step": 5303 + }, + { + "epoch": 1.8690748898678415, + "grad_norm": 1.8939089473542137, + "learning_rate": 2.3522185118285411e-07, + "loss": 0.6505781412124634, + "step": 5304 + }, + { + "epoch": 1.8694273127753305, + "grad_norm": 2.0817226286854607, + "learning_rate": 2.3396723345455728e-07, + "loss": 0.6278528571128845, + "step": 5305 + }, + { + "epoch": 1.8697797356828194, + "grad_norm": 1.790557343760165, + "learning_rate": 2.3271593099045475e-07, + "loss": 0.5650503039360046, + "step": 5306 + }, + { + "epoch": 1.8701321585903083, + "grad_norm": 1.6157546701422072, + "learning_rate": 2.314679442153256e-07, + "loss": 0.6267939209938049, + "step": 5307 + }, + { + "epoch": 1.8704845814977973, + "grad_norm": 1.874302486649101, + "learning_rate": 2.302232735528187e-07, + "loss": 0.45913875102996826, + "step": 5308 + }, + { + "epoch": 1.8708370044052862, + "grad_norm": 1.7607480001908633, + "learning_rate": 2.289819194254661e-07, + "loss": 0.6122059226036072, + "step": 5309 + }, + { + "epoch": 1.8711894273127754, + "grad_norm": 1.803806841150382, + "learning_rate": 2.2774388225466514e-07, + "loss": 0.6479405164718628, + "step": 5310 + }, + { + "epoch": 1.8715418502202643, + "grad_norm": 1.8546829656575279, + "learning_rate": 2.26509162460693e-07, + "loss": 0.5013849139213562, + "step": 5311 + }, + { + "epoch": 1.8718942731277532, + "grad_norm": 1.749663744266161, + "learning_rate": 2.2527776046269767e-07, + "loss": 0.6431373357772827, + "step": 5312 + }, + { + "epoch": 1.8722466960352424, + "grad_norm": 1.669095711801791, + "learning_rate": 2.2404967667870147e-07, + "loss": 0.6447317004203796, + "step": 5313 + }, + { + "epoch": 1.8725991189427313, + "grad_norm": 2.405218866271529, + "learning_rate": 2.2282491152560203e-07, + "loss": 0.5784682631492615, + "step": 5314 + }, + { + "epoch": 1.8729515418502203, + "grad_norm": 1.7544004376252713, + "learning_rate": 2.2160346541916677e-07, + "loss": 0.560835599899292, + "step": 5315 + }, + { + "epoch": 1.8733039647577092, + "grad_norm": 1.7162975954294335, + "learning_rate": 2.2038533877404066e-07, + "loss": 0.5930913686752319, + "step": 5316 + }, + { + "epoch": 1.8736563876651982, + "grad_norm": 1.9892540663354406, + "learning_rate": 2.1917053200374073e-07, + "loss": 0.7221095561981201, + "step": 5317 + }, + { + "epoch": 1.874008810572687, + "grad_norm": 1.9380281400359725, + "learning_rate": 2.179590455206515e-07, + "loss": 0.6307567358016968, + "step": 5318 + }, + { + "epoch": 1.8743612334801762, + "grad_norm": 2.0190052317760814, + "learning_rate": 2.167508797360396e-07, + "loss": 0.6158597469329834, + "step": 5319 + }, + { + "epoch": 1.8747136563876652, + "grad_norm": 1.7468326387459954, + "learning_rate": 2.1554603506003802e-07, + "loss": 0.5778557062149048, + "step": 5320 + }, + { + "epoch": 1.8750660792951543, + "grad_norm": 1.497372593580549, + "learning_rate": 2.1434451190165294e-07, + "loss": 0.5213632583618164, + "step": 5321 + }, + { + "epoch": 1.8754185022026433, + "grad_norm": 1.8555907678767487, + "learning_rate": 2.131463106687659e-07, + "loss": 0.6633203029632568, + "step": 5322 + }, + { + "epoch": 1.8757709251101322, + "grad_norm": 1.9991798348617227, + "learning_rate": 2.1195143176812817e-07, + "loss": 0.6586780548095703, + "step": 5323 + }, + { + "epoch": 1.8761233480176212, + "grad_norm": 1.991978810673319, + "learning_rate": 2.1075987560536305e-07, + "loss": 0.4946047067642212, + "step": 5324 + }, + { + "epoch": 1.87647577092511, + "grad_norm": 1.6744690075916624, + "learning_rate": 2.0957164258497031e-07, + "loss": 0.5689302682876587, + "step": 5325 + }, + { + "epoch": 1.876828193832599, + "grad_norm": 1.9550201402383367, + "learning_rate": 2.0838673311031287e-07, + "loss": 0.5761843323707581, + "step": 5326 + }, + { + "epoch": 1.877180616740088, + "grad_norm": 1.6070623974889393, + "learning_rate": 2.0720514758363343e-07, + "loss": 0.5714447498321533, + "step": 5327 + }, + { + "epoch": 1.8775330396475771, + "grad_norm": 1.7537019465709125, + "learning_rate": 2.0602688640604441e-07, + "loss": 0.4566301107406616, + "step": 5328 + }, + { + "epoch": 1.877885462555066, + "grad_norm": 2.110089760102471, + "learning_rate": 2.04851949977527e-07, + "loss": 0.6326137781143188, + "step": 5329 + }, + { + "epoch": 1.8782378854625552, + "grad_norm": 1.8775980517302555, + "learning_rate": 2.036803386969355e-07, + "loss": 0.6342206001281738, + "step": 5330 + }, + { + "epoch": 1.8785903083700441, + "grad_norm": 1.9958405881870251, + "learning_rate": 2.0251205296199616e-07, + "loss": 0.5525872707366943, + "step": 5331 + }, + { + "epoch": 1.878942731277533, + "grad_norm": 1.6965395036886874, + "learning_rate": 2.0134709316930733e-07, + "loss": 0.4932950735092163, + "step": 5332 + }, + { + "epoch": 1.879295154185022, + "grad_norm": 1.7918605717870588, + "learning_rate": 2.001854597143349e-07, + "loss": 0.6526485681533813, + "step": 5333 + }, + { + "epoch": 1.879647577092511, + "grad_norm": 1.8862781919579625, + "learning_rate": 1.990271529914156e-07, + "loss": 0.6256940960884094, + "step": 5334 + }, + { + "epoch": 1.88, + "grad_norm": 2.361417623387243, + "learning_rate": 1.9787217339376053e-07, + "loss": 0.6406987905502319, + "step": 5335 + }, + { + "epoch": 1.8803524229074888, + "grad_norm": 1.812802653812012, + "learning_rate": 1.9672052131345043e-07, + "loss": 0.6141321659088135, + "step": 5336 + }, + { + "epoch": 1.880704845814978, + "grad_norm": 2.025004487176686, + "learning_rate": 1.955721971414326e-07, + "loss": 0.558428943157196, + "step": 5337 + }, + { + "epoch": 1.881057268722467, + "grad_norm": 1.973943138705469, + "learning_rate": 1.9442720126752968e-07, + "loss": 0.5995065569877625, + "step": 5338 + }, + { + "epoch": 1.881409691629956, + "grad_norm": 1.6822565518265986, + "learning_rate": 1.932855340804296e-07, + "loss": 0.5109822750091553, + "step": 5339 + }, + { + "epoch": 1.881762114537445, + "grad_norm": 1.941646392245956, + "learning_rate": 1.921471959676957e-07, + "loss": 0.6695220470428467, + "step": 5340 + }, + { + "epoch": 1.882114537444934, + "grad_norm": 1.8857636319654494, + "learning_rate": 1.9101218731575777e-07, + "loss": 0.6982283592224121, + "step": 5341 + }, + { + "epoch": 1.882466960352423, + "grad_norm": 1.8944501787373655, + "learning_rate": 1.8988050850991314e-07, + "loss": 0.6475410461425781, + "step": 5342 + }, + { + "epoch": 1.8828193832599118, + "grad_norm": 1.7449353446414906, + "learning_rate": 1.8875215993433448e-07, + "loss": 0.57706218957901, + "step": 5343 + }, + { + "epoch": 1.8831718061674008, + "grad_norm": 1.708696671712054, + "learning_rate": 1.8762714197205988e-07, + "loss": 0.5243045091629028, + "step": 5344 + }, + { + "epoch": 1.88352422907489, + "grad_norm": 1.797956034726921, + "learning_rate": 1.865054550049994e-07, + "loss": 0.6208887100219727, + "step": 5345 + }, + { + "epoch": 1.8838766519823789, + "grad_norm": 1.9048581772706628, + "learning_rate": 1.853870994139284e-07, + "loss": 0.5572443008422852, + "step": 5346 + }, + { + "epoch": 1.8842290748898678, + "grad_norm": 1.7939928987370566, + "learning_rate": 1.8427207557849436e-07, + "loss": 0.5673031806945801, + "step": 5347 + }, + { + "epoch": 1.884581497797357, + "grad_norm": 1.6894216214789064, + "learning_rate": 1.8316038387721558e-07, + "loss": 0.5085422992706299, + "step": 5348 + }, + { + "epoch": 1.8849339207048459, + "grad_norm": 1.7455381888238348, + "learning_rate": 1.8205202468747463e-07, + "loss": 0.5480824708938599, + "step": 5349 + }, + { + "epoch": 1.8852863436123348, + "grad_norm": 1.7848642016680003, + "learning_rate": 1.8094699838552387e-07, + "loss": 0.6236293911933899, + "step": 5350 + }, + { + "epoch": 1.8856387665198238, + "grad_norm": 1.7626474829765526, + "learning_rate": 1.798453053464888e-07, + "loss": 0.541741132736206, + "step": 5351 + }, + { + "epoch": 1.8859911894273127, + "grad_norm": 1.7289887528200605, + "learning_rate": 1.7874694594435692e-07, + "loss": 0.5309538245201111, + "step": 5352 + }, + { + "epoch": 1.8863436123348016, + "grad_norm": 1.944311199542912, + "learning_rate": 1.7765192055198888e-07, + "loss": 0.5886228084564209, + "step": 5353 + }, + { + "epoch": 1.8866960352422908, + "grad_norm": 1.6415851491633797, + "learning_rate": 1.7656022954111064e-07, + "loss": 0.6216265559196472, + "step": 5354 + }, + { + "epoch": 1.8870484581497797, + "grad_norm": 1.6922081510439257, + "learning_rate": 1.7547187328231575e-07, + "loss": 0.5393999814987183, + "step": 5355 + }, + { + "epoch": 1.8874008810572689, + "grad_norm": 1.7167987260272457, + "learning_rate": 1.74386852145072e-07, + "loss": 0.583373486995697, + "step": 5356 + }, + { + "epoch": 1.8877533039647578, + "grad_norm": 2.361225928566298, + "learning_rate": 1.73305166497707e-07, + "loss": 0.6403313875198364, + "step": 5357 + }, + { + "epoch": 1.8881057268722468, + "grad_norm": 1.771396849548527, + "learning_rate": 1.7222681670741814e-07, + "loss": 0.5780963897705078, + "step": 5358 + }, + { + "epoch": 1.8884581497797357, + "grad_norm": 1.59802053134679, + "learning_rate": 1.711518031402748e-07, + "loss": 0.6046397686004639, + "step": 5359 + }, + { + "epoch": 1.8888105726872246, + "grad_norm": 1.5504259730519754, + "learning_rate": 1.700801261612084e-07, + "loss": 0.5582219362258911, + "step": 5360 + }, + { + "epoch": 1.8891629955947136, + "grad_norm": 1.962329345083699, + "learning_rate": 1.6901178613402125e-07, + "loss": 0.4880410432815552, + "step": 5361 + }, + { + "epoch": 1.8895154185022025, + "grad_norm": 2.055990524297856, + "learning_rate": 1.6794678342138105e-07, + "loss": 0.7417550086975098, + "step": 5362 + }, + { + "epoch": 1.8898678414096917, + "grad_norm": 1.8316934396355506, + "learning_rate": 1.668851183848219e-07, + "loss": 0.4616948962211609, + "step": 5363 + }, + { + "epoch": 1.8902202643171806, + "grad_norm": 1.6177478399502592, + "learning_rate": 1.658267913847489e-07, + "loss": 0.5595716834068298, + "step": 5364 + }, + { + "epoch": 1.8905726872246698, + "grad_norm": 1.9610306002643032, + "learning_rate": 1.6477180278042793e-07, + "loss": 0.72450852394104, + "step": 5365 + }, + { + "epoch": 1.8909251101321587, + "grad_norm": 1.8036541582694667, + "learning_rate": 1.637201529299959e-07, + "loss": 0.6261592507362366, + "step": 5366 + }, + { + "epoch": 1.8912775330396476, + "grad_norm": 2.1024939179342823, + "learning_rate": 1.6267184219045607e-07, + "loss": 0.5023064613342285, + "step": 5367 + }, + { + "epoch": 1.8916299559471366, + "grad_norm": 1.9210322300280602, + "learning_rate": 1.6162687091767714e-07, + "loss": 0.7113457918167114, + "step": 5368 + }, + { + "epoch": 1.8919823788546255, + "grad_norm": 1.9212954550271457, + "learning_rate": 1.6058523946639426e-07, + "loss": 0.5376787185668945, + "step": 5369 + }, + { + "epoch": 1.8923348017621144, + "grad_norm": 1.86817536856008, + "learning_rate": 1.5954694819020788e-07, + "loss": 0.6523979902267456, + "step": 5370 + }, + { + "epoch": 1.8926872246696034, + "grad_norm": 1.841265437549123, + "learning_rate": 1.5851199744158607e-07, + "loss": 0.6610705852508545, + "step": 5371 + }, + { + "epoch": 1.8930396475770925, + "grad_norm": 2.0967966308369053, + "learning_rate": 1.5748038757186445e-07, + "loss": 0.657126247882843, + "step": 5372 + }, + { + "epoch": 1.8933920704845815, + "grad_norm": 2.3300722251609893, + "learning_rate": 1.5645211893123846e-07, + "loss": 0.7247096300125122, + "step": 5373 + }, + { + "epoch": 1.8937444933920706, + "grad_norm": 1.5063549897958597, + "learning_rate": 1.5542719186877553e-07, + "loss": 0.5392117500305176, + "step": 5374 + }, + { + "epoch": 1.8940969162995596, + "grad_norm": 1.706529406386883, + "learning_rate": 1.5440560673240735e-07, + "loss": 0.5038361549377441, + "step": 5375 + }, + { + "epoch": 1.8944493392070485, + "grad_norm": 1.9403637299706042, + "learning_rate": 1.5338736386892982e-07, + "loss": 0.4768316447734833, + "step": 5376 + }, + { + "epoch": 1.8948017621145374, + "grad_norm": 1.7917263966392405, + "learning_rate": 1.5237246362400316e-07, + "loss": 0.5925793051719666, + "step": 5377 + }, + { + "epoch": 1.8951541850220264, + "grad_norm": 2.029166285154972, + "learning_rate": 1.5136090634215616e-07, + "loss": 0.47840988636016846, + "step": 5378 + }, + { + "epoch": 1.8955066079295153, + "grad_norm": 1.9172034216887006, + "learning_rate": 1.5035269236677974e-07, + "loss": 0.6365169882774353, + "step": 5379 + }, + { + "epoch": 1.8958590308370042, + "grad_norm": 1.789950493711397, + "learning_rate": 1.4934782204013344e-07, + "loss": 0.6287797689437866, + "step": 5380 + }, + { + "epoch": 1.8962114537444934, + "grad_norm": 1.8420293657892082, + "learning_rate": 1.4834629570333548e-07, + "loss": 0.6859137415885925, + "step": 5381 + }, + { + "epoch": 1.8965638766519823, + "grad_norm": 1.9365437650034845, + "learning_rate": 1.4734811369637725e-07, + "loss": 0.5545040369033813, + "step": 5382 + }, + { + "epoch": 1.8969162995594715, + "grad_norm": 1.6857031681916985, + "learning_rate": 1.463532763581077e-07, + "loss": 0.6418923139572144, + "step": 5383 + }, + { + "epoch": 1.8972687224669604, + "grad_norm": 4.115242480246632, + "learning_rate": 1.4536178402624334e-07, + "loss": 0.7618488669395447, + "step": 5384 + }, + { + "epoch": 1.8976211453744494, + "grad_norm": 1.7790399709296727, + "learning_rate": 1.4437363703736718e-07, + "loss": 0.6178286671638489, + "step": 5385 + }, + { + "epoch": 1.8979735682819383, + "grad_norm": 2.33955789440919, + "learning_rate": 1.4338883572692087e-07, + "loss": 0.6800570487976074, + "step": 5386 + }, + { + "epoch": 1.8983259911894272, + "grad_norm": 1.9056441030293936, + "learning_rate": 1.4240738042921588e-07, + "loss": 0.6063584089279175, + "step": 5387 + }, + { + "epoch": 1.8986784140969162, + "grad_norm": 1.857878498727731, + "learning_rate": 1.4142927147742792e-07, + "loss": 0.5631873607635498, + "step": 5388 + }, + { + "epoch": 1.8990308370044053, + "grad_norm": 1.6999145603505723, + "learning_rate": 1.4045450920358917e-07, + "loss": 0.5346484184265137, + "step": 5389 + }, + { + "epoch": 1.8993832599118943, + "grad_norm": 1.660876208730021, + "learning_rate": 1.3948309393860605e-07, + "loss": 0.5043535232543945, + "step": 5390 + }, + { + "epoch": 1.8997356828193832, + "grad_norm": 1.9091498065078292, + "learning_rate": 1.3851502601224032e-07, + "loss": 0.6591805219650269, + "step": 5391 + }, + { + "epoch": 1.9000881057268724, + "grad_norm": 1.777554153966534, + "learning_rate": 1.3755030575312355e-07, + "loss": 0.6831244826316833, + "step": 5392 + }, + { + "epoch": 1.9004405286343613, + "grad_norm": 1.744983267268657, + "learning_rate": 1.3658893348874714e-07, + "loss": 0.6572617292404175, + "step": 5393 + }, + { + "epoch": 1.9007929515418502, + "grad_norm": 2.007956379457216, + "learning_rate": 1.3563090954546555e-07, + "loss": 0.5834530591964722, + "step": 5394 + }, + { + "epoch": 1.9011453744493392, + "grad_norm": 1.8405418946212868, + "learning_rate": 1.3467623424850084e-07, + "loss": 0.5810972452163696, + "step": 5395 + }, + { + "epoch": 1.9014977973568281, + "grad_norm": 1.8342670520255937, + "learning_rate": 1.3372490792193493e-07, + "loss": 0.6338596940040588, + "step": 5396 + }, + { + "epoch": 1.901850220264317, + "grad_norm": 2.4739742581402946, + "learning_rate": 1.327769308887117e-07, + "loss": 0.5274045467376709, + "step": 5397 + }, + { + "epoch": 1.9022026431718062, + "grad_norm": 2.13415646905843, + "learning_rate": 1.3183230347064147e-07, + "loss": 0.5416278839111328, + "step": 5398 + }, + { + "epoch": 1.9025550660792951, + "grad_norm": 1.8878260396672215, + "learning_rate": 1.3089102598839442e-07, + "loss": 0.4818935692310333, + "step": 5399 + }, + { + "epoch": 1.9029074889867843, + "grad_norm": 1.6383283062285148, + "learning_rate": 1.299530987615072e-07, + "loss": 0.4553770124912262, + "step": 5400 + }, + { + "epoch": 1.9032599118942732, + "grad_norm": 1.7060011862412936, + "learning_rate": 1.2901852210837507e-07, + "loss": 0.5663920640945435, + "step": 5401 + }, + { + "epoch": 1.9036123348017622, + "grad_norm": 1.975611905778012, + "learning_rate": 1.2808729634625872e-07, + "loss": 0.5654638409614563, + "step": 5402 + }, + { + "epoch": 1.903964757709251, + "grad_norm": 2.0012288604540136, + "learning_rate": 1.271594217912797e-07, + "loss": 0.8061939477920532, + "step": 5403 + }, + { + "epoch": 1.90431718061674, + "grad_norm": 2.149695499003911, + "learning_rate": 1.2623489875842276e-07, + "loss": 0.5832188129425049, + "step": 5404 + }, + { + "epoch": 1.904669603524229, + "grad_norm": 1.8966385092802618, + "learning_rate": 1.2531372756153458e-07, + "loss": 0.6112633943557739, + "step": 5405 + }, + { + "epoch": 1.905022026431718, + "grad_norm": 2.3113031929819106, + "learning_rate": 1.2439590851332394e-07, + "loss": 0.7083494663238525, + "step": 5406 + }, + { + "epoch": 1.905374449339207, + "grad_norm": 1.9110441437452201, + "learning_rate": 1.2348144192536272e-07, + "loss": 0.5319055318832397, + "step": 5407 + }, + { + "epoch": 1.905726872246696, + "grad_norm": 1.9724655581165889, + "learning_rate": 1.2257032810808256e-07, + "loss": 0.6199945211410522, + "step": 5408 + }, + { + "epoch": 1.9060792951541852, + "grad_norm": 2.3233890606574503, + "learning_rate": 1.2166256737077942e-07, + "loss": 0.6596004962921143, + "step": 5409 + }, + { + "epoch": 1.906431718061674, + "grad_norm": 1.9040617554840082, + "learning_rate": 1.20758160021609e-07, + "loss": 0.553988516330719, + "step": 5410 + }, + { + "epoch": 1.906784140969163, + "grad_norm": 2.329855084255152, + "learning_rate": 1.1985710636759128e-07, + "loss": 0.6295895576477051, + "step": 5411 + }, + { + "epoch": 1.907136563876652, + "grad_norm": 2.035449496855298, + "learning_rate": 1.1895940671460271e-07, + "loss": 0.6555598378181458, + "step": 5412 + }, + { + "epoch": 1.907488986784141, + "grad_norm": 1.8252966820746244, + "learning_rate": 1.1806506136738616e-07, + "loss": 0.48203831911087036, + "step": 5413 + }, + { + "epoch": 1.9078414096916299, + "grad_norm": 2.0052153848511045, + "learning_rate": 1.1717407062954434e-07, + "loss": 0.6632858514785767, + "step": 5414 + }, + { + "epoch": 1.9081938325991188, + "grad_norm": 1.913108464706502, + "learning_rate": 1.1628643480354085e-07, + "loss": 0.6058870553970337, + "step": 5415 + }, + { + "epoch": 1.908546255506608, + "grad_norm": 1.6689328390033278, + "learning_rate": 1.1540215419070022e-07, + "loss": 0.5106638073921204, + "step": 5416 + }, + { + "epoch": 1.9088986784140969, + "grad_norm": 1.965112171139023, + "learning_rate": 1.1452122909120788e-07, + "loss": 0.6641250848770142, + "step": 5417 + }, + { + "epoch": 1.909251101321586, + "grad_norm": 1.7797017689691026, + "learning_rate": 1.1364365980411019e-07, + "loss": 0.4823518395423889, + "step": 5418 + }, + { + "epoch": 1.909603524229075, + "grad_norm": 1.7374946519813605, + "learning_rate": 1.127694466273166e-07, + "loss": 0.5770869255065918, + "step": 5419 + }, + { + "epoch": 1.909955947136564, + "grad_norm": 1.8439547121423094, + "learning_rate": 1.1189858985759306e-07, + "loss": 0.5120491981506348, + "step": 5420 + }, + { + "epoch": 1.9103083700440529, + "grad_norm": 1.998054444662161, + "learning_rate": 1.1103108979056865e-07, + "loss": 0.6742277145385742, + "step": 5421 + }, + { + "epoch": 1.9106607929515418, + "grad_norm": 1.7361045655014782, + "learning_rate": 1.1016694672073336e-07, + "loss": 0.6053510904312134, + "step": 5422 + }, + { + "epoch": 1.9110132158590307, + "grad_norm": 2.276872906150792, + "learning_rate": 1.0930616094143698e-07, + "loss": 0.5598228573799133, + "step": 5423 + }, + { + "epoch": 1.9113656387665197, + "grad_norm": 1.7689371613585823, + "learning_rate": 1.0844873274488799e-07, + "loss": 0.599521279335022, + "step": 5424 + }, + { + "epoch": 1.9117180616740088, + "grad_norm": 2.270274631303626, + "learning_rate": 1.075946624221591e-07, + "loss": 0.5986596345901489, + "step": 5425 + }, + { + "epoch": 1.9120704845814978, + "grad_norm": 2.0819173495219054, + "learning_rate": 1.067439502631773e-07, + "loss": 0.5657980442047119, + "step": 5426 + }, + { + "epoch": 1.912422907488987, + "grad_norm": 2.498725021517388, + "learning_rate": 1.0589659655673712e-07, + "loss": 0.5561040639877319, + "step": 5427 + }, + { + "epoch": 1.9127753303964758, + "grad_norm": 1.6241033411576455, + "learning_rate": 1.0505260159048513e-07, + "loss": 0.5088320970535278, + "step": 5428 + }, + { + "epoch": 1.9131277533039648, + "grad_norm": 2.1207031706665407, + "learning_rate": 1.0421196565093217e-07, + "loss": 0.5679075717926025, + "step": 5429 + }, + { + "epoch": 1.9134801762114537, + "grad_norm": 1.8775486377310404, + "learning_rate": 1.0337468902344994e-07, + "loss": 0.6701461672782898, + "step": 5430 + }, + { + "epoch": 1.9138325991189427, + "grad_norm": 1.7839638341554918, + "learning_rate": 1.0254077199226553e-07, + "loss": 0.6172112822532654, + "step": 5431 + }, + { + "epoch": 1.9141850220264316, + "grad_norm": 1.904067212081221, + "learning_rate": 1.0171021484046806e-07, + "loss": 0.5926263332366943, + "step": 5432 + }, + { + "epoch": 1.9145374449339208, + "grad_norm": 1.7190787727179386, + "learning_rate": 1.0088301785000754e-07, + "loss": 0.6142431497573853, + "step": 5433 + }, + { + "epoch": 1.9148898678414097, + "grad_norm": 1.7095738070807496, + "learning_rate": 1.0005918130168934e-07, + "loss": 0.5367780923843384, + "step": 5434 + }, + { + "epoch": 1.9152422907488986, + "grad_norm": 1.8769142431022592, + "learning_rate": 9.923870547518311e-08, + "loss": 0.5241641998291016, + "step": 5435 + }, + { + "epoch": 1.9155947136563878, + "grad_norm": 1.7765958549274539, + "learning_rate": 9.842159064901157e-08, + "loss": 0.5906308889389038, + "step": 5436 + }, + { + "epoch": 1.9159471365638767, + "grad_norm": 2.1275572555046613, + "learning_rate": 9.760783710056176e-08, + "loss": 0.5411181449890137, + "step": 5437 + }, + { + "epoch": 1.9162995594713657, + "grad_norm": 1.9001328464490854, + "learning_rate": 9.679744510607825e-08, + "loss": 0.6313618421554565, + "step": 5438 + }, + { + "epoch": 1.9166519823788546, + "grad_norm": 2.0658646856716336, + "learning_rate": 9.599041494066208e-08, + "loss": 0.6330033540725708, + "step": 5439 + }, + { + "epoch": 1.9170044052863435, + "grad_norm": 1.9617429681187768, + "learning_rate": 9.518674687827634e-08, + "loss": 0.5859507322311401, + "step": 5440 + }, + { + "epoch": 1.9173568281938325, + "grad_norm": 1.9233196169731877, + "learning_rate": 9.438644119174057e-08, + "loss": 0.571119487285614, + "step": 5441 + }, + { + "epoch": 1.9177092511013216, + "grad_norm": 1.683294616332208, + "learning_rate": 9.3589498152733e-08, + "loss": 0.6114518046379089, + "step": 5442 + }, + { + "epoch": 1.9180616740088106, + "grad_norm": 2.0948221060814407, + "learning_rate": 9.279591803179277e-08, + "loss": 0.5762027502059937, + "step": 5443 + }, + { + "epoch": 1.9184140969162997, + "grad_norm": 1.973540736612678, + "learning_rate": 9.200570109831441e-08, + "loss": 0.6081440448760986, + "step": 5444 + }, + { + "epoch": 1.9187665198237887, + "grad_norm": 1.9242540837021294, + "learning_rate": 9.121884762055222e-08, + "loss": 0.5682440996170044, + "step": 5445 + }, + { + "epoch": 1.9191189427312776, + "grad_norm": 1.642224199268087, + "learning_rate": 9.043535786561919e-08, + "loss": 0.5290100574493408, + "step": 5446 + }, + { + "epoch": 1.9194713656387665, + "grad_norm": 1.8013641871034827, + "learning_rate": 8.965523209948367e-08, + "loss": 0.5743255019187927, + "step": 5447 + }, + { + "epoch": 1.9198237885462555, + "grad_norm": 1.6357977481393366, + "learning_rate": 8.887847058697718e-08, + "loss": 0.5955618023872375, + "step": 5448 + }, + { + "epoch": 1.9201762114537444, + "grad_norm": 1.9706217525454803, + "learning_rate": 8.810507359178322e-08, + "loss": 0.4732915759086609, + "step": 5449 + }, + { + "epoch": 1.9205286343612333, + "grad_norm": 3.2730228664607797, + "learning_rate": 8.733504137644621e-08, + "loss": 0.6712108850479126, + "step": 5450 + }, + { + "epoch": 1.9208810572687225, + "grad_norm": 1.997966446518774, + "learning_rate": 8.656837420237152e-08, + "loss": 0.5169811248779297, + "step": 5451 + }, + { + "epoch": 1.9212334801762114, + "grad_norm": 1.9146732631772796, + "learning_rate": 8.580507232981428e-08, + "loss": 0.6117082238197327, + "step": 5452 + }, + { + "epoch": 1.9215859030837006, + "grad_norm": 1.7690878518096709, + "learning_rate": 8.504513601789388e-08, + "loss": 0.7020283937454224, + "step": 5453 + }, + { + "epoch": 1.9219383259911895, + "grad_norm": 1.806111695783304, + "learning_rate": 8.42885655245862e-08, + "loss": 0.5489979386329651, + "step": 5454 + }, + { + "epoch": 1.9222907488986785, + "grad_norm": 1.8218906131330599, + "learning_rate": 8.353536110672133e-08, + "loss": 0.5361644625663757, + "step": 5455 + }, + { + "epoch": 1.9226431718061674, + "grad_norm": 1.8728336665856926, + "learning_rate": 8.278552301998921e-08, + "loss": 0.6470010280609131, + "step": 5456 + }, + { + "epoch": 1.9229955947136563, + "grad_norm": 1.5338046694887773, + "learning_rate": 8.203905151893731e-08, + "loss": 0.4642202854156494, + "step": 5457 + }, + { + "epoch": 1.9233480176211453, + "grad_norm": 2.1878989180883357, + "learning_rate": 8.129594685696852e-08, + "loss": 0.6817516088485718, + "step": 5458 + }, + { + "epoch": 1.9237004405286342, + "grad_norm": 1.7544221338170298, + "learning_rate": 8.055620928634433e-08, + "loss": 0.5748617649078369, + "step": 5459 + }, + { + "epoch": 1.9240528634361234, + "grad_norm": 1.9928156109239001, + "learning_rate": 7.981983905818281e-08, + "loss": 0.6730939149856567, + "step": 5460 + }, + { + "epoch": 1.9244052863436123, + "grad_norm": 1.665760800669473, + "learning_rate": 7.90868364224584e-08, + "loss": 0.46469685435295105, + "step": 5461 + }, + { + "epoch": 1.9247577092511015, + "grad_norm": 2.0844638903136907, + "learning_rate": 7.835720162800209e-08, + "loss": 0.5633926391601562, + "step": 5462 + }, + { + "epoch": 1.9251101321585904, + "grad_norm": 2.034693536740542, + "learning_rate": 7.76309349225035e-08, + "loss": 0.5813394784927368, + "step": 5463 + }, + { + "epoch": 1.9254625550660793, + "grad_norm": 1.4118750743542163, + "learning_rate": 7.690803655250656e-08, + "loss": 0.39959418773651123, + "step": 5464 + }, + { + "epoch": 1.9258149779735683, + "grad_norm": 1.7685280750016403, + "learning_rate": 7.618850676341383e-08, + "loss": 0.6136372089385986, + "step": 5465 + }, + { + "epoch": 1.9261674008810572, + "grad_norm": 1.7393751984149959, + "learning_rate": 7.547234579948104e-08, + "loss": 0.6664354801177979, + "step": 5466 + }, + { + "epoch": 1.9265198237885461, + "grad_norm": 1.8827898065352628, + "learning_rate": 7.475955390382483e-08, + "loss": 0.6009566783905029, + "step": 5467 + }, + { + "epoch": 1.9268722466960353, + "grad_norm": 1.7872694267120686, + "learning_rate": 7.405013131841499e-08, + "loss": 0.7307299375534058, + "step": 5468 + }, + { + "epoch": 1.9272246696035242, + "grad_norm": 1.8234703336391604, + "learning_rate": 7.334407828407885e-08, + "loss": 0.5459531545639038, + "step": 5469 + }, + { + "epoch": 1.9275770925110132, + "grad_norm": 2.1252744976115583, + "learning_rate": 7.264139504049916e-08, + "loss": 0.6230820417404175, + "step": 5470 + }, + { + "epoch": 1.9279295154185023, + "grad_norm": 1.6781926619362313, + "learning_rate": 7.194208182621509e-08, + "loss": 0.5282379984855652, + "step": 5471 + }, + { + "epoch": 1.9282819383259913, + "grad_norm": 2.1980396503246604, + "learning_rate": 7.12461388786212e-08, + "loss": 0.626023530960083, + "step": 5472 + }, + { + "epoch": 1.9286343612334802, + "grad_norm": 2.1608211937841197, + "learning_rate": 7.055356643396849e-08, + "loss": 0.6897492408752441, + "step": 5473 + }, + { + "epoch": 1.9289867841409691, + "grad_norm": 1.7214187213722456, + "learning_rate": 6.986436472736447e-08, + "loss": 0.583849310874939, + "step": 5474 + }, + { + "epoch": 1.929339207048458, + "grad_norm": 1.7492909983006562, + "learning_rate": 6.917853399277197e-08, + "loss": 0.6056735515594482, + "step": 5475 + }, + { + "epoch": 1.929691629955947, + "grad_norm": 1.8166317563571888, + "learning_rate": 6.849607446300699e-08, + "loss": 0.52838134765625, + "step": 5476 + }, + { + "epoch": 1.9300440528634362, + "grad_norm": 2.0425025849187954, + "learning_rate": 6.781698636974532e-08, + "loss": 0.6466653943061829, + "step": 5477 + }, + { + "epoch": 1.930396475770925, + "grad_norm": 1.9593462888477349, + "learning_rate": 6.714126994351589e-08, + "loss": 0.6570286750793457, + "step": 5478 + }, + { + "epoch": 1.9307488986784143, + "grad_norm": 2.4867358577799576, + "learning_rate": 6.646892541370409e-08, + "loss": 0.7303042411804199, + "step": 5479 + }, + { + "epoch": 1.9311013215859032, + "grad_norm": 1.7938376915708092, + "learning_rate": 6.579995300854846e-08, + "loss": 0.5556488037109375, + "step": 5480 + }, + { + "epoch": 1.9314537444933921, + "grad_norm": 1.9624740523274589, + "learning_rate": 6.513435295514404e-08, + "loss": 0.6673456430435181, + "step": 5481 + }, + { + "epoch": 1.931806167400881, + "grad_norm": 1.9681067241776358, + "learning_rate": 6.447212547944448e-08, + "loss": 0.5605199337005615, + "step": 5482 + }, + { + "epoch": 1.93215859030837, + "grad_norm": 2.1935053480556785, + "learning_rate": 6.381327080625111e-08, + "loss": 0.5455278158187866, + "step": 5483 + }, + { + "epoch": 1.932511013215859, + "grad_norm": 1.8919678372461928, + "learning_rate": 6.315778915922722e-08, + "loss": 0.5371166467666626, + "step": 5484 + }, + { + "epoch": 1.9328634361233479, + "grad_norm": 1.9114985069981878, + "learning_rate": 6.250568076088814e-08, + "loss": 0.5873486399650574, + "step": 5485 + }, + { + "epoch": 1.933215859030837, + "grad_norm": 1.706006640351556, + "learning_rate": 6.18569458326046e-08, + "loss": 0.4187420606613159, + "step": 5486 + }, + { + "epoch": 1.933568281938326, + "grad_norm": 1.900919435061996, + "learning_rate": 6.121158459460042e-08, + "loss": 0.6006373167037964, + "step": 5487 + }, + { + "epoch": 1.9339207048458151, + "grad_norm": 1.819026585986156, + "learning_rate": 6.056959726595702e-08, + "loss": 0.6022043228149414, + "step": 5488 + }, + { + "epoch": 1.934273127753304, + "grad_norm": 2.037720704211898, + "learning_rate": 5.993098406460895e-08, + "loss": 0.6324778199195862, + "step": 5489 + }, + { + "epoch": 1.934625550660793, + "grad_norm": 2.0263189254585026, + "learning_rate": 5.929574520734505e-08, + "loss": 0.545529305934906, + "step": 5490 + }, + { + "epoch": 1.934977973568282, + "grad_norm": 1.9957592171950855, + "learning_rate": 5.8663880909809454e-08, + "loss": 0.623627781867981, + "step": 5491 + }, + { + "epoch": 1.9353303964757709, + "grad_norm": 1.9773130682504432, + "learning_rate": 5.80353913865006e-08, + "loss": 0.529983639717102, + "step": 5492 + }, + { + "epoch": 1.9356828193832598, + "grad_norm": 1.8301905692374867, + "learning_rate": 5.7410276850770055e-08, + "loss": 0.638504147529602, + "step": 5493 + }, + { + "epoch": 1.9360352422907487, + "grad_norm": 1.7706026455559263, + "learning_rate": 5.678853751482694e-08, + "loss": 0.6822696924209595, + "step": 5494 + }, + { + "epoch": 1.936387665198238, + "grad_norm": 1.6924491917110376, + "learning_rate": 5.6170173589730204e-08, + "loss": 0.5454750061035156, + "step": 5495 + }, + { + "epoch": 1.9367400881057268, + "grad_norm": 2.1428203564618915, + "learning_rate": 5.555518528539638e-08, + "loss": 0.5301260948181152, + "step": 5496 + }, + { + "epoch": 1.937092511013216, + "grad_norm": 1.965552985899495, + "learning_rate": 5.4943572810594035e-08, + "loss": 0.697251558303833, + "step": 5497 + }, + { + "epoch": 1.937444933920705, + "grad_norm": 1.8589631146352448, + "learning_rate": 5.433533637294819e-08, + "loss": 0.5171586871147156, + "step": 5498 + }, + { + "epoch": 1.9377973568281939, + "grad_norm": 1.974708525019113, + "learning_rate": 5.373047617893479e-08, + "loss": 0.6006083488464355, + "step": 5499 + }, + { + "epoch": 1.9381497797356828, + "grad_norm": 1.8914658578007237, + "learning_rate": 5.312899243388403e-08, + "loss": 0.6083849668502808, + "step": 5500 + }, + { + "epoch": 1.9385022026431717, + "grad_norm": 2.189863186886587, + "learning_rate": 5.2530885341982586e-08, + "loss": 0.6572569608688354, + "step": 5501 + }, + { + "epoch": 1.9388546255506607, + "grad_norm": 1.9316409138269541, + "learning_rate": 5.1936155106269146e-08, + "loss": 0.497112512588501, + "step": 5502 + }, + { + "epoch": 1.9392070484581496, + "grad_norm": 1.9380736027791932, + "learning_rate": 5.1344801928636664e-08, + "loss": 0.5804885625839233, + "step": 5503 + }, + { + "epoch": 1.9395594713656388, + "grad_norm": 2.415405306864913, + "learning_rate": 5.075682600982901e-08, + "loss": 0.6225712299346924, + "step": 5504 + }, + { + "epoch": 1.9399118942731277, + "grad_norm": 1.896345547525062, + "learning_rate": 5.017222754944651e-08, + "loss": 0.6100028157234192, + "step": 5505 + }, + { + "epoch": 1.9402643171806169, + "grad_norm": 1.47523556471349, + "learning_rate": 4.959100674594486e-08, + "loss": 0.549712061882019, + "step": 5506 + }, + { + "epoch": 1.9406167400881058, + "grad_norm": 1.4736978929928604, + "learning_rate": 4.901316379662624e-08, + "loss": 0.5327162146568298, + "step": 5507 + }, + { + "epoch": 1.9409691629955947, + "grad_norm": 2.3670974688739697, + "learning_rate": 4.8438698897652626e-08, + "loss": 0.7408417463302612, + "step": 5508 + }, + { + "epoch": 1.9413215859030837, + "grad_norm": 1.8644826998816841, + "learning_rate": 4.7867612244036906e-08, + "loss": 0.6126288175582886, + "step": 5509 + }, + { + "epoch": 1.9416740088105726, + "grad_norm": 1.9600730866036664, + "learning_rate": 4.729990402964402e-08, + "loss": 0.542537271976471, + "step": 5510 + }, + { + "epoch": 1.9420264317180616, + "grad_norm": 1.9121979922913575, + "learning_rate": 4.6735574447195345e-08, + "loss": 0.5429843664169312, + "step": 5511 + }, + { + "epoch": 1.9423788546255507, + "grad_norm": 1.8002113296979507, + "learning_rate": 4.617462368826098e-08, + "loss": 0.6103960275650024, + "step": 5512 + }, + { + "epoch": 1.9427312775330396, + "grad_norm": 1.7389238607151303, + "learning_rate": 4.561705194326749e-08, + "loss": 0.43702462315559387, + "step": 5513 + }, + { + "epoch": 1.9430837004405286, + "grad_norm": 1.7641081174281446, + "learning_rate": 4.506285940149457e-08, + "loss": 0.5313314199447632, + "step": 5514 + }, + { + "epoch": 1.9434361233480177, + "grad_norm": 1.7069377243686814, + "learning_rate": 4.451204625107064e-08, + "loss": 0.568792462348938, + "step": 5515 + }, + { + "epoch": 1.9437885462555067, + "grad_norm": 2.1007223606906185, + "learning_rate": 4.3964612678979446e-08, + "loss": 0.6055475473403931, + "step": 5516 + }, + { + "epoch": 1.9441409691629956, + "grad_norm": 1.9436769148628141, + "learning_rate": 4.3420558871060116e-08, + "loss": 0.6203786730766296, + "step": 5517 + }, + { + "epoch": 1.9444933920704845, + "grad_norm": 1.788437156743959, + "learning_rate": 4.287988501200047e-08, + "loss": 0.5914345979690552, + "step": 5518 + }, + { + "epoch": 1.9448458149779735, + "grad_norm": 1.8745063002086186, + "learning_rate": 4.2342591285343684e-08, + "loss": 0.5650739669799805, + "step": 5519 + }, + { + "epoch": 1.9451982378854624, + "grad_norm": 1.4561818985326163, + "learning_rate": 4.180867787348164e-08, + "loss": 0.5589660406112671, + "step": 5520 + }, + { + "epoch": 1.9455506607929516, + "grad_norm": 1.9465775114906616, + "learning_rate": 4.12781449576638e-08, + "loss": 0.5683336853981018, + "step": 5521 + }, + { + "epoch": 1.9459030837004405, + "grad_norm": 1.7869041316521455, + "learning_rate": 4.075099271798943e-08, + "loss": 0.5388365983963013, + "step": 5522 + }, + { + "epoch": 1.9462555066079297, + "grad_norm": 2.3465100615160757, + "learning_rate": 4.0227221333408726e-08, + "loss": 0.575006365776062, + "step": 5523 + }, + { + "epoch": 1.9466079295154186, + "grad_norm": 1.6872132733494793, + "learning_rate": 3.970683098172723e-08, + "loss": 0.49638503789901733, + "step": 5524 + }, + { + "epoch": 1.9469603524229075, + "grad_norm": 2.095719754969683, + "learning_rate": 3.9189821839600294e-08, + "loss": 0.6484041213989258, + "step": 5525 + }, + { + "epoch": 1.9473127753303965, + "grad_norm": 1.7587272240429226, + "learning_rate": 3.8676194082537535e-08, + "loss": 0.5522493124008179, + "step": 5526 + }, + { + "epoch": 1.9476651982378854, + "grad_norm": 1.8834504959770908, + "learning_rate": 3.8165947884898356e-08, + "loss": 0.5875294208526611, + "step": 5527 + }, + { + "epoch": 1.9480176211453744, + "grad_norm": 1.8990167388470667, + "learning_rate": 3.765908341989644e-08, + "loss": 0.5725122690200806, + "step": 5528 + }, + { + "epoch": 1.9483700440528633, + "grad_norm": 1.7744908913216453, + "learning_rate": 3.7155600859595243e-08, + "loss": 0.5198935866355896, + "step": 5529 + }, + { + "epoch": 1.9487224669603525, + "grad_norm": 1.8236927705658619, + "learning_rate": 3.665550037491361e-08, + "loss": 0.6396631598472595, + "step": 5530 + }, + { + "epoch": 1.9490748898678414, + "grad_norm": 1.8879612013695581, + "learning_rate": 3.6158782135617965e-08, + "loss": 0.666089653968811, + "step": 5531 + }, + { + "epoch": 1.9494273127753305, + "grad_norm": 1.9912413735248546, + "learning_rate": 3.5665446310330087e-08, + "loss": 0.6818836331367493, + "step": 5532 + }, + { + "epoch": 1.9497797356828195, + "grad_norm": 2.04266783813749, + "learning_rate": 3.517549306652157e-08, + "loss": 0.533860981464386, + "step": 5533 + }, + { + "epoch": 1.9501321585903084, + "grad_norm": 2.011493253926506, + "learning_rate": 3.468892257051493e-08, + "loss": 0.6174973249435425, + "step": 5534 + }, + { + "epoch": 1.9504845814977974, + "grad_norm": 2.07102768257305, + "learning_rate": 3.4205734987488027e-08, + "loss": 0.6010403037071228, + "step": 5535 + }, + { + "epoch": 1.9508370044052863, + "grad_norm": 1.8654722728182422, + "learning_rate": 3.372593048146744e-08, + "loss": 0.6475502252578735, + "step": 5536 + }, + { + "epoch": 1.9511894273127752, + "grad_norm": 2.080853183455891, + "learning_rate": 3.3249509215330653e-08, + "loss": 0.5625165700912476, + "step": 5537 + }, + { + "epoch": 1.9515418502202642, + "grad_norm": 2.0303262611818336, + "learning_rate": 3.277647135080941e-08, + "loss": 0.6504719257354736, + "step": 5538 + }, + { + "epoch": 1.9518942731277533, + "grad_norm": 1.7964243534988884, + "learning_rate": 3.230681704848415e-08, + "loss": 0.6217454671859741, + "step": 5539 + }, + { + "epoch": 1.9522466960352423, + "grad_norm": 1.975881803401868, + "learning_rate": 3.1840546467788445e-08, + "loss": 0.5804678201675415, + "step": 5540 + }, + { + "epoch": 1.9525991189427314, + "grad_norm": 1.7644690968017507, + "learning_rate": 3.1377659767006795e-08, + "loss": 0.6133759617805481, + "step": 5541 + }, + { + "epoch": 1.9529515418502204, + "grad_norm": 1.736020484111057, + "learning_rate": 3.0918157103273506e-08, + "loss": 0.508539080619812, + "step": 5542 + }, + { + "epoch": 1.9533039647577093, + "grad_norm": 2.115379893074018, + "learning_rate": 3.0462038632577126e-08, + "loss": 0.5682996511459351, + "step": 5543 + }, + { + "epoch": 1.9536563876651982, + "grad_norm": 2.0360556708735276, + "learning_rate": 3.000930450975603e-08, + "loss": 0.7072808742523193, + "step": 5544 + }, + { + "epoch": 1.9540088105726872, + "grad_norm": 2.092981328238059, + "learning_rate": 2.9559954888497278e-08, + "loss": 0.5948976278305054, + "step": 5545 + }, + { + "epoch": 1.954361233480176, + "grad_norm": 1.827038503098094, + "learning_rate": 2.911398992134218e-08, + "loss": 0.5111032128334045, + "step": 5546 + }, + { + "epoch": 1.954713656387665, + "grad_norm": 1.8278152391313893, + "learning_rate": 2.8671409759681858e-08, + "loss": 0.553802490234375, + "step": 5547 + }, + { + "epoch": 1.9550660792951542, + "grad_norm": 1.685843539181356, + "learning_rate": 2.8232214553759462e-08, + "loss": 0.5091711282730103, + "step": 5548 + }, + { + "epoch": 1.9554185022026431, + "grad_norm": 1.4871983076237012, + "learning_rate": 2.7796404452666847e-08, + "loss": 0.47025251388549805, + "step": 5549 + }, + { + "epoch": 1.9557709251101323, + "grad_norm": 2.031516899140332, + "learning_rate": 2.7363979604349e-08, + "loss": 0.6174348592758179, + "step": 5550 + }, + { + "epoch": 1.9561233480176212, + "grad_norm": 1.8902471541583934, + "learning_rate": 2.69349401555985e-08, + "loss": 0.5516685247421265, + "step": 5551 + }, + { + "epoch": 1.9564757709251102, + "grad_norm": 2.1329834880360563, + "learning_rate": 2.6509286252063282e-08, + "loss": 0.6272131204605103, + "step": 5552 + }, + { + "epoch": 1.956828193832599, + "grad_norm": 1.796045915873636, + "learning_rate": 2.6087018038239987e-08, + "loss": 0.5913189649581909, + "step": 5553 + }, + { + "epoch": 1.957180616740088, + "grad_norm": 1.8863252927172953, + "learning_rate": 2.5668135657472835e-08, + "loss": 0.6802668571472168, + "step": 5554 + }, + { + "epoch": 1.957533039647577, + "grad_norm": 1.9442650959080303, + "learning_rate": 2.525263925196142e-08, + "loss": 0.5829865336418152, + "step": 5555 + }, + { + "epoch": 1.9578854625550661, + "grad_norm": 2.0474932427098627, + "learning_rate": 2.4840528962752907e-08, + "loss": 0.6400870680809021, + "step": 5556 + }, + { + "epoch": 1.958237885462555, + "grad_norm": 2.043080792800152, + "learning_rate": 2.4431804929746506e-08, + "loss": 0.48432302474975586, + "step": 5557 + }, + { + "epoch": 1.958590308370044, + "grad_norm": 1.9639599818265998, + "learning_rate": 2.4026467291691223e-08, + "loss": 0.5494402647018433, + "step": 5558 + }, + { + "epoch": 1.9589427312775332, + "grad_norm": 1.800709765694371, + "learning_rate": 2.3624516186186996e-08, + "loss": 0.5393223762512207, + "step": 5559 + }, + { + "epoch": 1.959295154185022, + "grad_norm": 1.868096905678952, + "learning_rate": 2.322595174968245e-08, + "loss": 0.5500867962837219, + "step": 5560 + }, + { + "epoch": 1.959647577092511, + "grad_norm": 1.986290631971783, + "learning_rate": 2.283077411747825e-08, + "loss": 0.5618818998336792, + "step": 5561 + }, + { + "epoch": 1.96, + "grad_norm": 2.0174876429391526, + "learning_rate": 2.243898342372597e-08, + "loss": 0.5681769251823425, + "step": 5562 + }, + { + "epoch": 1.960352422907489, + "grad_norm": 2.160298007931608, + "learning_rate": 2.2050579801424777e-08, + "loss": 0.8009706139564514, + "step": 5563 + }, + { + "epoch": 1.9607048458149778, + "grad_norm": 2.2076681264311517, + "learning_rate": 2.1665563382426978e-08, + "loss": 0.5609455704689026, + "step": 5564 + }, + { + "epoch": 1.961057268722467, + "grad_norm": 1.6584397285315808, + "learning_rate": 2.1283934297432472e-08, + "loss": 0.5615163445472717, + "step": 5565 + }, + { + "epoch": 1.961409691629956, + "grad_norm": 2.4819954064616265, + "learning_rate": 2.0905692675993182e-08, + "loss": 0.4442581832408905, + "step": 5566 + }, + { + "epoch": 1.961762114537445, + "grad_norm": 2.0037139303731344, + "learning_rate": 2.0530838646510842e-08, + "loss": 0.6557266116142273, + "step": 5567 + }, + { + "epoch": 1.962114537444934, + "grad_norm": 1.851215643338071, + "learning_rate": 2.0159372336235884e-08, + "loss": 0.5911799669265747, + "step": 5568 + }, + { + "epoch": 1.962466960352423, + "grad_norm": 2.0920087166052057, + "learning_rate": 1.9791293871269656e-08, + "loss": 0.5480202436447144, + "step": 5569 + }, + { + "epoch": 1.962819383259912, + "grad_norm": 2.0350633249337795, + "learning_rate": 1.9426603376563325e-08, + "loss": 0.6489467620849609, + "step": 5570 + }, + { + "epoch": 1.9631718061674008, + "grad_norm": 1.8480180634522771, + "learning_rate": 1.9065300975917856e-08, + "loss": 0.4699944853782654, + "step": 5571 + }, + { + "epoch": 1.9635242290748898, + "grad_norm": 1.8923901172350763, + "learning_rate": 1.8707386791985137e-08, + "loss": 0.6684885025024414, + "step": 5572 + }, + { + "epoch": 1.9638766519823787, + "grad_norm": 2.2169126358939413, + "learning_rate": 1.835286094626576e-08, + "loss": 0.5847122073173523, + "step": 5573 + }, + { + "epoch": 1.9642290748898679, + "grad_norm": 1.801041360244202, + "learning_rate": 1.8001723559109007e-08, + "loss": 0.5427859425544739, + "step": 5574 + }, + { + "epoch": 1.9645814977973568, + "grad_norm": 2.032431019918, + "learning_rate": 1.7653974749715087e-08, + "loss": 0.6545590758323669, + "step": 5575 + }, + { + "epoch": 1.964933920704846, + "grad_norm": 1.785624619961358, + "learning_rate": 1.730961463613512e-08, + "loss": 0.6369475722312927, + "step": 5576 + }, + { + "epoch": 1.965286343612335, + "grad_norm": 1.989892215094852, + "learning_rate": 1.696864333526893e-08, + "loss": 0.5165325403213501, + "step": 5577 + }, + { + "epoch": 1.9656387665198238, + "grad_norm": 2.147184198038496, + "learning_rate": 1.6631060962863933e-08, + "loss": 0.5651812553405762, + "step": 5578 + }, + { + "epoch": 1.9659911894273128, + "grad_norm": 1.6839108762220567, + "learning_rate": 1.6296867633519563e-08, + "loss": 0.5249905586242676, + "step": 5579 + }, + { + "epoch": 1.9663436123348017, + "grad_norm": 1.8723453129570697, + "learning_rate": 1.5966063460683967e-08, + "loss": 0.6748663783073425, + "step": 5580 + }, + { + "epoch": 1.9666960352422906, + "grad_norm": 1.654472064493344, + "learning_rate": 1.5638648556656198e-08, + "loss": 0.5276468992233276, + "step": 5581 + }, + { + "epoch": 1.9670484581497796, + "grad_norm": 1.7910399914217132, + "learning_rate": 1.5314623032581798e-08, + "loss": 0.5778729319572449, + "step": 5582 + }, + { + "epoch": 1.9674008810572687, + "grad_norm": 1.8564203677999862, + "learning_rate": 1.4993986998457223e-08, + "loss": 0.5805479288101196, + "step": 5583 + }, + { + "epoch": 1.9677533039647577, + "grad_norm": 1.9817945876697571, + "learning_rate": 1.4676740563129843e-08, + "loss": 0.6213263273239136, + "step": 5584 + }, + { + "epoch": 1.9681057268722468, + "grad_norm": 1.8037978918771924, + "learning_rate": 1.4362883834294627e-08, + "loss": 0.5081031322479248, + "step": 5585 + }, + { + "epoch": 1.9684581497797358, + "grad_norm": 1.71465121106617, + "learning_rate": 1.4052416918495237e-08, + "loss": 0.5605350136756897, + "step": 5586 + }, + { + "epoch": 1.9688105726872247, + "grad_norm": 2.1182297496689877, + "learning_rate": 1.3745339921126255e-08, + "loss": 0.701635479927063, + "step": 5587 + }, + { + "epoch": 1.9691629955947136, + "grad_norm": 4.768978361346767, + "learning_rate": 1.344165294642985e-08, + "loss": 0.5537668466567993, + "step": 5588 + }, + { + "epoch": 1.9695154185022026, + "grad_norm": 1.9636754875619487, + "learning_rate": 1.3141356097500225e-08, + "loss": 0.6395033597946167, + "step": 5589 + }, + { + "epoch": 1.9698678414096915, + "grad_norm": 2.0129419054377355, + "learning_rate": 1.2844449476276943e-08, + "loss": 0.549985408782959, + "step": 5590 + }, + { + "epoch": 1.9702202643171807, + "grad_norm": 1.5684457658919975, + "learning_rate": 1.2550933183550496e-08, + "loss": 0.4503220021724701, + "step": 5591 + }, + { + "epoch": 1.9705726872246696, + "grad_norm": 1.842567825609057, + "learning_rate": 1.2260807318962286e-08, + "loss": 0.6369946599006653, + "step": 5592 + }, + { + "epoch": 1.9709251101321585, + "grad_norm": 1.9389149649481725, + "learning_rate": 1.197407198099909e-08, + "loss": 0.547295093536377, + "step": 5593 + }, + { + "epoch": 1.9712775330396477, + "grad_norm": 1.9379429852476115, + "learning_rate": 1.1690727267000823e-08, + "loss": 0.578770101070404, + "step": 5594 + }, + { + "epoch": 1.9716299559471366, + "grad_norm": 1.7979041690440398, + "learning_rate": 1.1410773273151654e-08, + "loss": 0.5992920398712158, + "step": 5595 + }, + { + "epoch": 1.9719823788546256, + "grad_norm": 2.0358089708846503, + "learning_rate": 1.1134210094488896e-08, + "loss": 0.5912446975708008, + "step": 5596 + }, + { + "epoch": 1.9723348017621145, + "grad_norm": 1.9956728807231137, + "learning_rate": 1.0861037824896337e-08, + "loss": 0.6539223194122314, + "step": 5597 + }, + { + "epoch": 1.9726872246696034, + "grad_norm": 1.6995757910859364, + "learning_rate": 1.0591256557108686e-08, + "loss": 0.6487923860549927, + "step": 5598 + }, + { + "epoch": 1.9730396475770924, + "grad_norm": 2.0265831695223384, + "learning_rate": 1.0324866382707133e-08, + "loss": 0.7950254678726196, + "step": 5599 + }, + { + "epoch": 1.9733920704845815, + "grad_norm": 1.7028165277673737, + "learning_rate": 1.006186739212267e-08, + "loss": 0.4941173195838928, + "step": 5600 + }, + { + "epoch": 1.9737444933920705, + "grad_norm": 1.8542643380709567, + "learning_rate": 9.802259674637215e-09, + "loss": 0.6733928322792053, + "step": 5601 + }, + { + "epoch": 1.9740969162995596, + "grad_norm": 1.7591584352828642, + "learning_rate": 9.546043318376941e-09, + "loss": 0.5084437131881714, + "step": 5602 + }, + { + "epoch": 1.9744493392070486, + "grad_norm": 1.7908335232844454, + "learning_rate": 9.293218410320049e-09, + "loss": 0.4499536156654358, + "step": 5603 + }, + { + "epoch": 1.9748017621145375, + "grad_norm": 1.8930332249062705, + "learning_rate": 9.04378503629344e-09, + "loss": 0.557701826095581, + "step": 5604 + }, + { + "epoch": 1.9751541850220264, + "grad_norm": 1.8335406987256675, + "learning_rate": 8.797743280972715e-09, + "loss": 0.6110183000564575, + "step": 5605 + }, + { + "epoch": 1.9755066079295154, + "grad_norm": 1.7153205510430745, + "learning_rate": 8.555093227878842e-09, + "loss": 0.5877780318260193, + "step": 5606 + }, + { + "epoch": 1.9758590308370043, + "grad_norm": 1.679841288677745, + "learning_rate": 8.315834959385927e-09, + "loss": 0.48567962646484375, + "step": 5607 + }, + { + "epoch": 1.9762114537444933, + "grad_norm": 1.8860795135087454, + "learning_rate": 8.079968556714557e-09, + "loss": 0.5536524653434753, + "step": 5608 + }, + { + "epoch": 1.9765638766519824, + "grad_norm": 1.8444573855158568, + "learning_rate": 7.847494099934017e-09, + "loss": 0.7685257196426392, + "step": 5609 + }, + { + "epoch": 1.9769162995594713, + "grad_norm": 1.6340535068378546, + "learning_rate": 7.618411667961179e-09, + "loss": 0.5442079305648804, + "step": 5610 + }, + { + "epoch": 1.9772687224669605, + "grad_norm": 1.6630624172257082, + "learning_rate": 7.392721338563835e-09, + "loss": 0.5034504532814026, + "step": 5611 + }, + { + "epoch": 1.9776211453744494, + "grad_norm": 1.8322008815729305, + "learning_rate": 7.1704231883551465e-09, + "loss": 0.4912964701652527, + "step": 5612 + }, + { + "epoch": 1.9779735682819384, + "grad_norm": 2.134310001014161, + "learning_rate": 6.951517292800303e-09, + "loss": 0.6034345626831055, + "step": 5613 + }, + { + "epoch": 1.9783259911894273, + "grad_norm": 1.7043803614532214, + "learning_rate": 6.736003726209861e-09, + "loss": 0.5379009246826172, + "step": 5614 + }, + { + "epoch": 1.9786784140969162, + "grad_norm": 1.8487957346880508, + "learning_rate": 6.523882561744188e-09, + "loss": 0.6571087837219238, + "step": 5615 + }, + { + "epoch": 1.9790308370044052, + "grad_norm": 1.9626526350308011, + "learning_rate": 6.315153871411239e-09, + "loss": 0.6473923921585083, + "step": 5616 + }, + { + "epoch": 1.9793832599118941, + "grad_norm": 1.908714917292982, + "learning_rate": 6.1098177260687786e-09, + "loss": 0.5877989530563354, + "step": 5617 + }, + { + "epoch": 1.9797356828193833, + "grad_norm": 2.11970631488856, + "learning_rate": 5.907874195422159e-09, + "loss": 0.5058172941207886, + "step": 5618 + }, + { + "epoch": 1.9800881057268722, + "grad_norm": 2.0101537086675143, + "learning_rate": 5.70932334802432e-09, + "loss": 0.6471046805381775, + "step": 5619 + }, + { + "epoch": 1.9804405286343614, + "grad_norm": 1.9865988373367267, + "learning_rate": 5.514165251276904e-09, + "loss": 0.6687172651290894, + "step": 5620 + }, + { + "epoch": 1.9807929515418503, + "grad_norm": 1.8269412184920357, + "learning_rate": 5.322399971431358e-09, + "loss": 0.5726118087768555, + "step": 5621 + }, + { + "epoch": 1.9811453744493392, + "grad_norm": 2.247520087423374, + "learning_rate": 5.134027573584499e-09, + "loss": 0.6534412503242493, + "step": 5622 + }, + { + "epoch": 1.9814977973568282, + "grad_norm": 1.9962084272761849, + "learning_rate": 4.949048121682953e-09, + "loss": 0.5972425937652588, + "step": 5623 + }, + { + "epoch": 1.9818502202643171, + "grad_norm": 2.13007133485212, + "learning_rate": 4.767461678522045e-09, + "loss": 0.6420427560806274, + "step": 5624 + }, + { + "epoch": 1.982202643171806, + "grad_norm": 2.0162772178768513, + "learning_rate": 4.589268305745798e-09, + "loss": 0.5912461876869202, + "step": 5625 + }, + { + "epoch": 1.982555066079295, + "grad_norm": 1.8724213041645918, + "learning_rate": 4.414468063843602e-09, + "loss": 0.5942744016647339, + "step": 5626 + }, + { + "epoch": 1.9829074889867842, + "grad_norm": 2.156440148804406, + "learning_rate": 4.243061012154659e-09, + "loss": 0.6618138551712036, + "step": 5627 + }, + { + "epoch": 1.983259911894273, + "grad_norm": 1.949825469014612, + "learning_rate": 4.075047208867977e-09, + "loss": 0.6046779155731201, + "step": 5628 + }, + { + "epoch": 1.9836123348017622, + "grad_norm": 1.8484056273808063, + "learning_rate": 3.9104267110168235e-09, + "loss": 0.6797989010810852, + "step": 5629 + }, + { + "epoch": 1.9839647577092512, + "grad_norm": 1.5948446953630264, + "learning_rate": 3.749199574486495e-09, + "loss": 0.4882436692714691, + "step": 5630 + }, + { + "epoch": 1.9843171806167401, + "grad_norm": 2.0513829030138324, + "learning_rate": 3.591365854008766e-09, + "loss": 0.6694678068161011, + "step": 5631 + }, + { + "epoch": 1.984669603524229, + "grad_norm": 1.9290678063690434, + "learning_rate": 3.436925603161889e-09, + "loss": 0.6015830039978027, + "step": 5632 + }, + { + "epoch": 1.985022026431718, + "grad_norm": 1.6208579673883046, + "learning_rate": 3.2858788743739267e-09, + "loss": 0.5627756118774414, + "step": 5633 + }, + { + "epoch": 1.985374449339207, + "grad_norm": 2.0314486190919836, + "learning_rate": 3.138225718920529e-09, + "loss": 0.6069298386573792, + "step": 5634 + }, + { + "epoch": 1.985726872246696, + "grad_norm": 1.742509198855919, + "learning_rate": 2.993966186926045e-09, + "loss": 0.5779693722724915, + "step": 5635 + }, + { + "epoch": 1.986079295154185, + "grad_norm": 2.1341372247717914, + "learning_rate": 2.8531003273624126e-09, + "loss": 0.6706609725952148, + "step": 5636 + }, + { + "epoch": 1.986431718061674, + "grad_norm": 1.8945369145507158, + "learning_rate": 2.715628188046937e-09, + "loss": 0.6361640691757202, + "step": 5637 + }, + { + "epoch": 1.9867841409691631, + "grad_norm": 1.5755870938483592, + "learning_rate": 2.581549815648954e-09, + "loss": 0.557577908039093, + "step": 5638 + }, + { + "epoch": 1.987136563876652, + "grad_norm": 1.9370315563575715, + "learning_rate": 2.450865255684276e-09, + "loss": 0.7735704183578491, + "step": 5639 + }, + { + "epoch": 1.987488986784141, + "grad_norm": 1.7912792802398185, + "learning_rate": 2.3235745525151956e-09, + "loss": 0.5836409330368042, + "step": 5640 + }, + { + "epoch": 1.98784140969163, + "grad_norm": 2.1774628342963616, + "learning_rate": 2.1996777493527023e-09, + "loss": 0.5824601650238037, + "step": 5641 + }, + { + "epoch": 1.9881938325991189, + "grad_norm": 1.9237771552425715, + "learning_rate": 2.0791748882575958e-09, + "loss": 0.6183140873908997, + "step": 5642 + }, + { + "epoch": 1.9885462555066078, + "grad_norm": 2.303117222732587, + "learning_rate": 1.9620660101349333e-09, + "loss": 0.6071987748146057, + "step": 5643 + }, + { + "epoch": 1.988898678414097, + "grad_norm": 1.884829146752756, + "learning_rate": 1.8483511547406907e-09, + "loss": 0.655383825302124, + "step": 5644 + }, + { + "epoch": 1.989251101321586, + "grad_norm": 2.0011243386948117, + "learning_rate": 1.738030360677323e-09, + "loss": 0.6328674554824829, + "step": 5645 + }, + { + "epoch": 1.989603524229075, + "grad_norm": 1.7353302673505981, + "learning_rate": 1.631103665394873e-09, + "loss": 0.5012212991714478, + "step": 5646 + }, + { + "epoch": 1.989955947136564, + "grad_norm": 1.911618533436513, + "learning_rate": 1.5275711051909724e-09, + "loss": 0.6202536821365356, + "step": 5647 + }, + { + "epoch": 1.990308370044053, + "grad_norm": 1.7599160794894961, + "learning_rate": 1.427432715214172e-09, + "loss": 0.4922720789909363, + "step": 5648 + }, + { + "epoch": 1.9906607929515419, + "grad_norm": 2.21375034225685, + "learning_rate": 1.33068852945617e-09, + "loss": 0.6591637134552002, + "step": 5649 + }, + { + "epoch": 1.9910132158590308, + "grad_norm": 2.0468816346516165, + "learning_rate": 1.2373385807584736e-09, + "loss": 0.5481886863708496, + "step": 5650 + }, + { + "epoch": 1.9913656387665197, + "grad_norm": 1.8773578553009866, + "learning_rate": 1.1473829008123994e-09, + "loss": 0.5642685890197754, + "step": 5651 + }, + { + "epoch": 1.9917180616740087, + "grad_norm": 2.25781450338385, + "learning_rate": 1.060821520153521e-09, + "loss": 0.6736876368522644, + "step": 5652 + }, + { + "epoch": 1.9920704845814978, + "grad_norm": 1.9047917173058142, + "learning_rate": 9.776544681672218e-10, + "loss": 0.6823733448982239, + "step": 5653 + }, + { + "epoch": 1.9924229074889868, + "grad_norm": 1.9405145779165673, + "learning_rate": 8.978817730864731e-10, + "loss": 0.4686351716518402, + "step": 5654 + }, + { + "epoch": 1.992775330396476, + "grad_norm": 1.8195643517733058, + "learning_rate": 8.215034619907247e-10, + "loss": 0.5027543306350708, + "step": 5655 + }, + { + "epoch": 1.9931277533039649, + "grad_norm": 1.8439113550188309, + "learning_rate": 7.485195608081253e-10, + "loss": 0.6217285394668579, + "step": 5656 + }, + { + "epoch": 1.9934801762114538, + "grad_norm": 1.6691057045729332, + "learning_rate": 6.78930094315522e-10, + "loss": 0.5942907929420471, + "step": 5657 + }, + { + "epoch": 1.9938325991189427, + "grad_norm": 2.096767040436613, + "learning_rate": 6.127350861351299e-10, + "loss": 0.7282885313034058, + "step": 5658 + }, + { + "epoch": 1.9941850220264317, + "grad_norm": 1.7781855320052453, + "learning_rate": 5.499345587389737e-10, + "loss": 0.653915286064148, + "step": 5659 + }, + { + "epoch": 1.9945374449339206, + "grad_norm": 2.223388921265014, + "learning_rate": 4.905285334455556e-10, + "loss": 0.6993501782417297, + "step": 5660 + }, + { + "epoch": 1.9948898678414095, + "grad_norm": 1.6333458377223884, + "learning_rate": 4.3451703042207694e-10, + "loss": 0.712554931640625, + "step": 5661 + }, + { + "epoch": 1.9952422907488987, + "grad_norm": 2.0241768111751686, + "learning_rate": 3.81900068681107e-10, + "loss": 0.7523812055587769, + "step": 5662 + }, + { + "epoch": 1.9955947136563876, + "grad_norm": 1.787839297148447, + "learning_rate": 3.3267766608502395e-10, + "loss": 0.5138256549835205, + "step": 5663 + }, + { + "epoch": 1.9959471365638768, + "grad_norm": 1.9006089009269762, + "learning_rate": 2.8684983934490486e-10, + "loss": 0.6154034733772278, + "step": 5664 + }, + { + "epoch": 1.9962995594713657, + "grad_norm": 2.0640894330103623, + "learning_rate": 2.4441660401608447e-10, + "loss": 0.5790190696716309, + "step": 5665 + }, + { + "epoch": 1.9966519823788547, + "grad_norm": 2.000808545557318, + "learning_rate": 2.0537797450370657e-10, + "loss": 0.6873353719711304, + "step": 5666 + }, + { + "epoch": 1.9970044052863436, + "grad_norm": 2.2263273319791583, + "learning_rate": 1.6973396405939312e-10, + "loss": 0.5764753222465515, + "step": 5667 + }, + { + "epoch": 1.9973568281938325, + "grad_norm": 1.9150199338130098, + "learning_rate": 1.374845847856854e-10, + "loss": 0.5144297480583191, + "step": 5668 + }, + { + "epoch": 1.9977092511013215, + "grad_norm": 2.1042696631217415, + "learning_rate": 1.0862984762716189e-10, + "loss": 0.5934832692146301, + "step": 5669 + }, + { + "epoch": 1.9980616740088104, + "grad_norm": 1.7802660124503475, + "learning_rate": 8.316976238154084e-11, + "loss": 0.4544188976287842, + "step": 5670 + }, + { + "epoch": 1.9984140969162996, + "grad_norm": 1.88553614744617, + "learning_rate": 6.110433769079827e-11, + "loss": 0.44844698905944824, + "step": 5671 + }, + { + "epoch": 1.9987665198237885, + "grad_norm": 1.7574547722102742, + "learning_rate": 4.2433581045608905e-11, + "loss": 0.5272520780563354, + "step": 5672 + }, + { + "epoch": 1.9991189427312777, + "grad_norm": 1.8300265717895403, + "learning_rate": 2.715749878312579e-11, + "loss": 0.5003396272659302, + "step": 5673 + }, + { + "epoch": 1.9994713656387666, + "grad_norm": 2.047162707278085, + "learning_rate": 1.5276096090310887e-11, + "loss": 0.561710000038147, + "step": 5674 + }, + { + "epoch": 1.9998237885462555, + "grad_norm": 1.8995125757935345, + "learning_rate": 6.789377000604447e-12, + "loss": 0.666955292224884, + "step": 5675 + }, + { + "epoch": 2.0, + "grad_norm": 4.048422061559424, + "learning_rate": 1.6973443939249934e-12, + "loss": 0.7278814911842346, + "step": 5676 + } + ], + "logging_steps": 1, + "max_steps": 5676, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1754791774076928.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-5676/training_args.bin b/checkpoint-5676/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3d16cae9f8126645d9b722fd466525457b2f8a90 --- /dev/null +++ b/checkpoint-5676/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db5e3f64fea9062d775ce1214f2b31fbf79ffdfb10af7998752ce02faa3d3dd5 +size 6968 diff --git a/checkpoint-5676/zero_to_fp32.py b/checkpoint-5676/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/checkpoint-5676/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/processor_config.json b/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3d979498e22845874be30ee7b489effa2a917acc --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 2.0, + "total_flos": 1754791774076928.0, + "train_loss": 0.6725200974527508, + "train_runtime": 30148.3569, + "train_samples_per_second": 0.753, + "train_steps_per_second": 0.188 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4a4b9b285ea621fd74cc20d40ddd918863ae421d --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,5677 @@ +{"current_steps": 1, "total_steps": 5676, "loss": 1.493973731994629, "lr": 0.0, "epoch": 0.0003524229074889868, "percentage": 0.02, "elapsed_time": "0:00:09", "remaining_time": "15:42:00"} +{"current_steps": 2, "total_steps": 5676, "loss": 1.3692013025283813, "lr": 7.042253521126761e-08, "epoch": 0.0007048458149779736, "percentage": 0.04, "elapsed_time": "0:00:14", "remaining_time": "11:22:23"} +{"current_steps": 3, "total_steps": 5676, "loss": 1.3996260166168213, "lr": 1.4084507042253522e-07, "epoch": 0.0010572687224669603, "percentage": 0.05, "elapsed_time": "0:00:20", "remaining_time": "10:51:58"} +{"current_steps": 4, "total_steps": 5676, "loss": 1.3011515140533447, "lr": 2.1126760563380284e-07, "epoch": 0.0014096916299559472, "percentage": 0.07, "elapsed_time": "0:00:24", "remaining_time": "9:45:53"} +{"current_steps": 5, "total_steps": 5676, "loss": 1.3736083507537842, "lr": 2.8169014084507043e-07, "epoch": 0.001762114537444934, "percentage": 0.09, "elapsed_time": "0:00:29", "remaining_time": "9:26:49"} +{"current_steps": 6, "total_steps": 5676, "loss": 1.051241159439087, "lr": 3.521126760563381e-07, "epoch": 0.0021145374449339205, "percentage": 0.11, "elapsed_time": "0:00:34", "remaining_time": "9:07:06"} +{"current_steps": 7, "total_steps": 5676, "loss": 1.2594621181488037, "lr": 4.225352112676057e-07, "epoch": 0.0024669603524229075, "percentage": 0.12, "elapsed_time": "0:00:40", "remaining_time": "9:04:53"} +{"current_steps": 8, "total_steps": 5676, "loss": 1.0498416423797607, "lr": 4.929577464788733e-07, "epoch": 0.0028193832599118945, "percentage": 0.14, "elapsed_time": "0:00:45", "remaining_time": "9:02:53"} +{"current_steps": 9, "total_steps": 5676, "loss": 1.3313459157943726, "lr": 5.633802816901409e-07, "epoch": 0.003171806167400881, "percentage": 0.16, "elapsed_time": "0:00:51", "remaining_time": "8:58:37"} +{"current_steps": 10, "total_steps": 5676, "loss": 1.2484922409057617, "lr": 6.338028169014085e-07, "epoch": 0.003524229074889868, "percentage": 0.18, "elapsed_time": "0:00:56", "remaining_time": "8:50:56"} +{"current_steps": 11, "total_steps": 5676, "loss": 1.097194790840149, "lr": 7.042253521126762e-07, "epoch": 0.0038766519823788545, "percentage": 0.19, "elapsed_time": "0:01:02", "remaining_time": "8:53:34"} +{"current_steps": 12, "total_steps": 5676, "loss": 1.3065136671066284, "lr": 7.746478873239437e-07, "epoch": 0.004229074889867841, "percentage": 0.21, "elapsed_time": "0:01:07", "remaining_time": "8:49:27"} +{"current_steps": 13, "total_steps": 5676, "loss": 1.1574026346206665, "lr": 8.450704225352114e-07, "epoch": 0.0045814977973568285, "percentage": 0.23, "elapsed_time": "0:01:11", "remaining_time": "8:42:10"} +{"current_steps": 14, "total_steps": 5676, "loss": 1.1509445905685425, "lr": 9.154929577464789e-07, "epoch": 0.004933920704845815, "percentage": 0.25, "elapsed_time": "0:01:16", "remaining_time": "8:35:43"} +{"current_steps": 15, "total_steps": 5676, "loss": 1.069403886795044, "lr": 9.859154929577465e-07, "epoch": 0.0052863436123348016, "percentage": 0.26, "elapsed_time": "0:01:23", "remaining_time": "8:44:37"} +{"current_steps": 16, "total_steps": 5676, "loss": 1.1731287240982056, "lr": 1.0563380281690142e-06, "epoch": 0.005638766519823789, "percentage": 0.28, "elapsed_time": "0:01:28", "remaining_time": "8:41:49"} +{"current_steps": 17, "total_steps": 5676, "loss": 0.9314254522323608, "lr": 1.1267605633802817e-06, "epoch": 0.0059911894273127755, "percentage": 0.3, "elapsed_time": "0:01:33", "remaining_time": "8:39:28"} +{"current_steps": 18, "total_steps": 5676, "loss": 1.2915008068084717, "lr": 1.1971830985915492e-06, "epoch": 0.006343612334801762, "percentage": 0.32, "elapsed_time": "0:01:38", "remaining_time": "8:37:57"} +{"current_steps": 19, "total_steps": 5676, "loss": 1.1088309288024902, "lr": 1.267605633802817e-06, "epoch": 0.006696035242290749, "percentage": 0.33, "elapsed_time": "0:01:43", "remaining_time": "8:34:10"} +{"current_steps": 20, "total_steps": 5676, "loss": 1.21511709690094, "lr": 1.3380281690140844e-06, "epoch": 0.007048458149779736, "percentage": 0.35, "elapsed_time": "0:01:49", "remaining_time": "8:34:28"} +{"current_steps": 21, "total_steps": 5676, "loss": 1.241409420967102, "lr": 1.4084507042253523e-06, "epoch": 0.0074008810572687225, "percentage": 0.37, "elapsed_time": "0:01:54", "remaining_time": "8:34:55"} +{"current_steps": 22, "total_steps": 5676, "loss": 1.2170014381408691, "lr": 1.4788732394366198e-06, "epoch": 0.007753303964757709, "percentage": 0.39, "elapsed_time": "0:01:59", "remaining_time": "8:30:58"} +{"current_steps": 23, "total_steps": 5676, "loss": 1.1405870914459229, "lr": 1.5492957746478873e-06, "epoch": 0.008105726872246696, "percentage": 0.41, "elapsed_time": "0:02:04", "remaining_time": "8:31:30"} +{"current_steps": 24, "total_steps": 5676, "loss": 1.122542381286621, "lr": 1.6197183098591552e-06, "epoch": 0.008458149779735682, "percentage": 0.42, "elapsed_time": "0:02:11", "remaining_time": "8:35:41"} +{"current_steps": 25, "total_steps": 5676, "loss": 1.1686937808990479, "lr": 1.6901408450704227e-06, "epoch": 0.00881057268722467, "percentage": 0.44, "elapsed_time": "0:02:16", "remaining_time": "8:36:02"} +{"current_steps": 26, "total_steps": 5676, "loss": 1.215955376625061, "lr": 1.7605633802816902e-06, "epoch": 0.009162995594713657, "percentage": 0.46, "elapsed_time": "0:02:21", "remaining_time": "8:31:46"} +{"current_steps": 27, "total_steps": 5676, "loss": 1.075179100036621, "lr": 1.8309859154929579e-06, "epoch": 0.009515418502202643, "percentage": 0.48, "elapsed_time": "0:02:26", "remaining_time": "8:31:03"} +{"current_steps": 28, "total_steps": 5676, "loss": 1.198237419128418, "lr": 1.9014084507042254e-06, "epoch": 0.00986784140969163, "percentage": 0.49, "elapsed_time": "0:02:32", "remaining_time": "8:32:05"} +{"current_steps": 29, "total_steps": 5676, "loss": 1.2356700897216797, "lr": 1.971830985915493e-06, "epoch": 0.010220264317180617, "percentage": 0.51, "elapsed_time": "0:02:38", "remaining_time": "8:32:57"} +{"current_steps": 30, "total_steps": 5676, "loss": 1.2373592853546143, "lr": 2.0422535211267608e-06, "epoch": 0.010572687224669603, "percentage": 0.53, "elapsed_time": "0:02:43", "remaining_time": "8:32:12"} +{"current_steps": 31, "total_steps": 5676, "loss": 1.1857718229293823, "lr": 2.1126760563380285e-06, "epoch": 0.01092511013215859, "percentage": 0.55, "elapsed_time": "0:02:49", "remaining_time": "8:33:15"} +{"current_steps": 32, "total_steps": 5676, "loss": 1.1844017505645752, "lr": 2.1830985915492958e-06, "epoch": 0.011277533039647578, "percentage": 0.56, "elapsed_time": "0:02:53", "remaining_time": "8:30:37"} +{"current_steps": 33, "total_steps": 5676, "loss": 1.275226354598999, "lr": 2.2535211267605635e-06, "epoch": 0.011629955947136564, "percentage": 0.58, "elapsed_time": "0:02:58", "remaining_time": "8:29:24"} +{"current_steps": 34, "total_steps": 5676, "loss": 1.169473648071289, "lr": 2.323943661971831e-06, "epoch": 0.011982378854625551, "percentage": 0.6, "elapsed_time": "0:03:04", "remaining_time": "8:31:08"} +{"current_steps": 35, "total_steps": 5676, "loss": 1.2182841300964355, "lr": 2.3943661971830984e-06, "epoch": 0.012334801762114538, "percentage": 0.62, "elapsed_time": "0:03:09", "remaining_time": "8:30:03"} +{"current_steps": 36, "total_steps": 5676, "loss": 1.2110469341278076, "lr": 2.4647887323943666e-06, "epoch": 0.012687224669603524, "percentage": 0.63, "elapsed_time": "0:03:15", "remaining_time": "8:30:31"} +{"current_steps": 37, "total_steps": 5676, "loss": 1.2886571884155273, "lr": 2.535211267605634e-06, "epoch": 0.01303964757709251, "percentage": 0.65, "elapsed_time": "0:03:20", "remaining_time": "8:30:17"} +{"current_steps": 38, "total_steps": 5676, "loss": 1.0740901231765747, "lr": 2.6056338028169015e-06, "epoch": 0.013392070484581497, "percentage": 0.67, "elapsed_time": "0:03:24", "remaining_time": "8:26:27"} +{"current_steps": 39, "total_steps": 5676, "loss": 1.0119279623031616, "lr": 2.676056338028169e-06, "epoch": 0.013744493392070485, "percentage": 0.69, "elapsed_time": "0:03:31", "remaining_time": "8:29:12"} +{"current_steps": 40, "total_steps": 5676, "loss": 1.0752044916152954, "lr": 2.746478873239437e-06, "epoch": 0.014096916299559472, "percentage": 0.7, "elapsed_time": "0:03:37", "remaining_time": "8:31:08"} +{"current_steps": 41, "total_steps": 5676, "loss": 1.345343828201294, "lr": 2.8169014084507046e-06, "epoch": 0.014449339207048459, "percentage": 0.72, "elapsed_time": "0:03:41", "remaining_time": "8:27:22"} +{"current_steps": 42, "total_steps": 5676, "loss": 1.102332353591919, "lr": 2.887323943661972e-06, "epoch": 0.014801762114537445, "percentage": 0.74, "elapsed_time": "0:03:46", "remaining_time": "8:26:57"} +{"current_steps": 43, "total_steps": 5676, "loss": 1.006919264793396, "lr": 2.9577464788732396e-06, "epoch": 0.015154185022026432, "percentage": 0.76, "elapsed_time": "0:03:51", "remaining_time": "8:25:27"} +{"current_steps": 44, "total_steps": 5676, "loss": 1.104026436805725, "lr": 3.0281690140845073e-06, "epoch": 0.015506607929515418, "percentage": 0.78, "elapsed_time": "0:03:56", "remaining_time": "8:24:32"} +{"current_steps": 45, "total_steps": 5676, "loss": 1.299152135848999, "lr": 3.0985915492957746e-06, "epoch": 0.015859030837004406, "percentage": 0.79, "elapsed_time": "0:04:00", "remaining_time": "8:21:25"} +{"current_steps": 46, "total_steps": 5676, "loss": 1.1075072288513184, "lr": 3.1690140845070427e-06, "epoch": 0.01621145374449339, "percentage": 0.81, "elapsed_time": "0:04:05", "remaining_time": "8:20:29"} +{"current_steps": 47, "total_steps": 5676, "loss": 1.0296107530593872, "lr": 3.2394366197183104e-06, "epoch": 0.01656387665198238, "percentage": 0.83, "elapsed_time": "0:04:10", "remaining_time": "8:19:49"} +{"current_steps": 48, "total_steps": 5676, "loss": 1.1097803115844727, "lr": 3.3098591549295777e-06, "epoch": 0.016916299559471364, "percentage": 0.85, "elapsed_time": "0:04:15", "remaining_time": "8:19:15"} +{"current_steps": 49, "total_steps": 5676, "loss": 0.945678174495697, "lr": 3.3802816901408454e-06, "epoch": 0.017268722466960353, "percentage": 0.86, "elapsed_time": "0:04:20", "remaining_time": "8:19:16"} +{"current_steps": 50, "total_steps": 5676, "loss": 1.075556993484497, "lr": 3.4507042253521127e-06, "epoch": 0.01762114537444934, "percentage": 0.88, "elapsed_time": "0:04:26", "remaining_time": "8:19:10"} +{"current_steps": 51, "total_steps": 5676, "loss": 1.0790367126464844, "lr": 3.5211267605633804e-06, "epoch": 0.017973568281938326, "percentage": 0.9, "elapsed_time": "0:04:32", "remaining_time": "8:20:04"} +{"current_steps": 52, "total_steps": 5676, "loss": 1.2567799091339111, "lr": 3.5915492957746485e-06, "epoch": 0.018325991189427314, "percentage": 0.92, "elapsed_time": "0:04:36", "remaining_time": "8:19:13"} +{"current_steps": 53, "total_steps": 5676, "loss": 1.1437780857086182, "lr": 3.6619718309859158e-06, "epoch": 0.0186784140969163, "percentage": 0.93, "elapsed_time": "0:04:41", "remaining_time": "8:18:33"} +{"current_steps": 54, "total_steps": 5676, "loss": 1.0962307453155518, "lr": 3.7323943661971835e-06, "epoch": 0.019030837004405287, "percentage": 0.95, "elapsed_time": "0:04:47", "remaining_time": "8:18:45"} +{"current_steps": 55, "total_steps": 5676, "loss": 1.0149122476577759, "lr": 3.8028169014084508e-06, "epoch": 0.019383259911894272, "percentage": 0.97, "elapsed_time": "0:04:51", "remaining_time": "8:17:17"} +{"current_steps": 56, "total_steps": 5676, "loss": 0.9029096364974976, "lr": 3.873239436619718e-06, "epoch": 0.01973568281938326, "percentage": 0.99, "elapsed_time": "0:04:56", "remaining_time": "8:16:41"} +{"current_steps": 57, "total_steps": 5676, "loss": 1.1290819644927979, "lr": 3.943661971830986e-06, "epoch": 0.02008810572687225, "percentage": 1.0, "elapsed_time": "0:05:02", "remaining_time": "8:16:56"} +{"current_steps": 58, "total_steps": 5676, "loss": 1.0965365171432495, "lr": 4.014084507042254e-06, "epoch": 0.020440528634361233, "percentage": 1.02, "elapsed_time": "0:05:07", "remaining_time": "8:17:07"} +{"current_steps": 59, "total_steps": 5676, "loss": 1.2854020595550537, "lr": 4.0845070422535216e-06, "epoch": 0.02079295154185022, "percentage": 1.04, "elapsed_time": "0:05:12", "remaining_time": "8:16:23"} +{"current_steps": 60, "total_steps": 5676, "loss": 1.0303996801376343, "lr": 4.154929577464789e-06, "epoch": 0.021145374449339206, "percentage": 1.06, "elapsed_time": "0:05:18", "remaining_time": "8:17:11"} +{"current_steps": 61, "total_steps": 5676, "loss": 1.0811198949813843, "lr": 4.225352112676057e-06, "epoch": 0.021497797356828195, "percentage": 1.07, "elapsed_time": "0:05:24", "remaining_time": "8:17:55"} +{"current_steps": 62, "total_steps": 5676, "loss": 1.1373648643493652, "lr": 4.295774647887324e-06, "epoch": 0.02185022026431718, "percentage": 1.09, "elapsed_time": "0:05:29", "remaining_time": "8:17:22"} +{"current_steps": 63, "total_steps": 5676, "loss": 0.8717563152313232, "lr": 4.3661971830985915e-06, "epoch": 0.022202643171806168, "percentage": 1.11, "elapsed_time": "0:05:35", "remaining_time": "8:17:56"} +{"current_steps": 64, "total_steps": 5676, "loss": 0.9939290881156921, "lr": 4.43661971830986e-06, "epoch": 0.022555066079295156, "percentage": 1.13, "elapsed_time": "0:05:39", "remaining_time": "8:16:46"} +{"current_steps": 65, "total_steps": 5676, "loss": 1.1776926517486572, "lr": 4.507042253521127e-06, "epoch": 0.02290748898678414, "percentage": 1.15, "elapsed_time": "0:05:45", "remaining_time": "8:16:41"} +{"current_steps": 66, "total_steps": 5676, "loss": 0.9149726629257202, "lr": 4.577464788732395e-06, "epoch": 0.02325991189427313, "percentage": 1.16, "elapsed_time": "0:05:51", "remaining_time": "8:18:10"} +{"current_steps": 67, "total_steps": 5676, "loss": 0.996609091758728, "lr": 4.647887323943662e-06, "epoch": 0.023612334801762114, "percentage": 1.18, "elapsed_time": "0:05:57", "remaining_time": "8:18:08"} +{"current_steps": 68, "total_steps": 5676, "loss": 1.102593183517456, "lr": 4.71830985915493e-06, "epoch": 0.023964757709251102, "percentage": 1.2, "elapsed_time": "0:06:02", "remaining_time": "8:18:38"} +{"current_steps": 69, "total_steps": 5676, "loss": 1.0912048816680908, "lr": 4.788732394366197e-06, "epoch": 0.024317180616740087, "percentage": 1.22, "elapsed_time": "0:06:08", "remaining_time": "8:19:20"} +{"current_steps": 70, "total_steps": 5676, "loss": 1.1192498207092285, "lr": 4.859154929577465e-06, "epoch": 0.024669603524229075, "percentage": 1.23, "elapsed_time": "0:06:13", "remaining_time": "8:17:57"} +{"current_steps": 71, "total_steps": 5676, "loss": 1.0358459949493408, "lr": 4.929577464788733e-06, "epoch": 0.025022026431718063, "percentage": 1.25, "elapsed_time": "0:06:17", "remaining_time": "8:16:32"} +{"current_steps": 72, "total_steps": 5676, "loss": 1.076169490814209, "lr": 5e-06, "epoch": 0.025374449339207048, "percentage": 1.27, "elapsed_time": "0:06:23", "remaining_time": "8:17:44"} +{"current_steps": 73, "total_steps": 5676, "loss": 0.9906084537506104, "lr": 5.070422535211268e-06, "epoch": 0.025726872246696036, "percentage": 1.29, "elapsed_time": "0:06:29", "remaining_time": "8:17:49"} +{"current_steps": 74, "total_steps": 5676, "loss": 0.8163654804229736, "lr": 5.140845070422536e-06, "epoch": 0.02607929515418502, "percentage": 1.3, "elapsed_time": "0:06:34", "remaining_time": "8:18:13"} +{"current_steps": 75, "total_steps": 5676, "loss": 1.140099048614502, "lr": 5.211267605633803e-06, "epoch": 0.02643171806167401, "percentage": 1.32, "elapsed_time": "0:06:39", "remaining_time": "8:16:49"} +{"current_steps": 76, "total_steps": 5676, "loss": 0.7654916048049927, "lr": 5.28169014084507e-06, "epoch": 0.026784140969162994, "percentage": 1.34, "elapsed_time": "0:06:44", "remaining_time": "8:16:41"} +{"current_steps": 77, "total_steps": 5676, "loss": 0.9476499557495117, "lr": 5.352112676056338e-06, "epoch": 0.027136563876651983, "percentage": 1.36, "elapsed_time": "0:06:49", "remaining_time": "8:16:19"} +{"current_steps": 78, "total_steps": 5676, "loss": 1.120811939239502, "lr": 5.422535211267607e-06, "epoch": 0.02748898678414097, "percentage": 1.37, "elapsed_time": "0:06:56", "remaining_time": "8:17:52"} +{"current_steps": 79, "total_steps": 5676, "loss": 0.9745736122131348, "lr": 5.492957746478874e-06, "epoch": 0.027841409691629956, "percentage": 1.39, "elapsed_time": "0:07:01", "remaining_time": "8:17:27"} +{"current_steps": 80, "total_steps": 5676, "loss": 1.086181879043579, "lr": 5.563380281690142e-06, "epoch": 0.028193832599118944, "percentage": 1.41, "elapsed_time": "0:07:05", "remaining_time": "8:16:25"} +{"current_steps": 81, "total_steps": 5676, "loss": 0.9904681444168091, "lr": 5.633802816901409e-06, "epoch": 0.02854625550660793, "percentage": 1.43, "elapsed_time": "0:07:12", "remaining_time": "8:17:59"} +{"current_steps": 82, "total_steps": 5676, "loss": 1.0311436653137207, "lr": 5.7042253521126766e-06, "epoch": 0.028898678414096917, "percentage": 1.44, "elapsed_time": "0:07:18", "remaining_time": "8:18:56"} +{"current_steps": 83, "total_steps": 5676, "loss": 0.996998131275177, "lr": 5.774647887323944e-06, "epoch": 0.029251101321585902, "percentage": 1.46, "elapsed_time": "0:07:23", "remaining_time": "8:18:06"} +{"current_steps": 84, "total_steps": 5676, "loss": 1.1526594161987305, "lr": 5.845070422535212e-06, "epoch": 0.02960352422907489, "percentage": 1.48, "elapsed_time": "0:07:27", "remaining_time": "8:16:26"} +{"current_steps": 85, "total_steps": 5676, "loss": 1.0914695262908936, "lr": 5.915492957746479e-06, "epoch": 0.029955947136563875, "percentage": 1.5, "elapsed_time": "0:07:31", "remaining_time": "8:15:14"} +{"current_steps": 86, "total_steps": 5676, "loss": 0.9558745622634888, "lr": 5.9859154929577465e-06, "epoch": 0.030308370044052863, "percentage": 1.52, "elapsed_time": "0:07:36", "remaining_time": "8:14:17"} +{"current_steps": 87, "total_steps": 5676, "loss": 0.9668983221054077, "lr": 6.056338028169015e-06, "epoch": 0.03066079295154185, "percentage": 1.53, "elapsed_time": "0:07:42", "remaining_time": "8:15:25"} +{"current_steps": 88, "total_steps": 5676, "loss": 1.0132758617401123, "lr": 6.126760563380282e-06, "epoch": 0.031013215859030836, "percentage": 1.55, "elapsed_time": "0:07:47", "remaining_time": "8:15:03"} +{"current_steps": 89, "total_steps": 5676, "loss": 1.0816935300827026, "lr": 6.197183098591549e-06, "epoch": 0.03136563876651982, "percentage": 1.57, "elapsed_time": "0:07:53", "remaining_time": "8:15:21"} +{"current_steps": 90, "total_steps": 5676, "loss": 1.03245210647583, "lr": 6.267605633802818e-06, "epoch": 0.03171806167400881, "percentage": 1.59, "elapsed_time": "0:07:57", "remaining_time": "8:13:49"} +{"current_steps": 91, "total_steps": 5676, "loss": 0.9812602400779724, "lr": 6.3380281690140855e-06, "epoch": 0.0320704845814978, "percentage": 1.6, "elapsed_time": "0:08:03", "remaining_time": "8:14:44"} +{"current_steps": 92, "total_steps": 5676, "loss": 0.9303219318389893, "lr": 6.408450704225353e-06, "epoch": 0.03242290748898678, "percentage": 1.62, "elapsed_time": "0:08:08", "remaining_time": "8:14:19"} +{"current_steps": 93, "total_steps": 5676, "loss": 1.175403356552124, "lr": 6.478873239436621e-06, "epoch": 0.032775330396475774, "percentage": 1.64, "elapsed_time": "0:08:12", "remaining_time": "8:13:01"} +{"current_steps": 94, "total_steps": 5676, "loss": 1.1863958835601807, "lr": 6.549295774647888e-06, "epoch": 0.03312775330396476, "percentage": 1.66, "elapsed_time": "0:08:17", "remaining_time": "8:12:24"} +{"current_steps": 95, "total_steps": 5676, "loss": 0.8630557060241699, "lr": 6.619718309859155e-06, "epoch": 0.033480176211453744, "percentage": 1.67, "elapsed_time": "0:08:22", "remaining_time": "8:12:09"} +{"current_steps": 96, "total_steps": 5676, "loss": 1.0499619245529175, "lr": 6.690140845070423e-06, "epoch": 0.03383259911894273, "percentage": 1.69, "elapsed_time": "0:08:28", "remaining_time": "8:13:02"} +{"current_steps": 97, "total_steps": 5676, "loss": 0.9443086981773376, "lr": 6.760563380281691e-06, "epoch": 0.03418502202643172, "percentage": 1.71, "elapsed_time": "0:08:34", "remaining_time": "8:13:29"} +{"current_steps": 98, "total_steps": 5676, "loss": 1.0011450052261353, "lr": 6.830985915492958e-06, "epoch": 0.034537444933920705, "percentage": 1.73, "elapsed_time": "0:08:40", "remaining_time": "8:13:19"} +{"current_steps": 99, "total_steps": 5676, "loss": 1.0239083766937256, "lr": 6.901408450704225e-06, "epoch": 0.03488986784140969, "percentage": 1.74, "elapsed_time": "0:08:45", "remaining_time": "8:13:01"} +{"current_steps": 100, "total_steps": 5676, "loss": 1.1335347890853882, "lr": 6.9718309859154935e-06, "epoch": 0.03524229074889868, "percentage": 1.76, "elapsed_time": "0:08:49", "remaining_time": "8:11:52"} +{"current_steps": 101, "total_steps": 5676, "loss": 0.9650854468345642, "lr": 7.042253521126761e-06, "epoch": 0.035594713656387666, "percentage": 1.78, "elapsed_time": "0:08:59", "remaining_time": "8:16:19"} +{"current_steps": 102, "total_steps": 5676, "loss": 0.9284406900405884, "lr": 7.112676056338029e-06, "epoch": 0.03594713656387665, "percentage": 1.8, "elapsed_time": "0:09:04", "remaining_time": "8:15:58"} +{"current_steps": 103, "total_steps": 5676, "loss": 0.989676296710968, "lr": 7.183098591549297e-06, "epoch": 0.036299559471365636, "percentage": 1.81, "elapsed_time": "0:09:10", "remaining_time": "8:16:06"} +{"current_steps": 104, "total_steps": 5676, "loss": 0.9149842262268066, "lr": 7.253521126760564e-06, "epoch": 0.03665198237885463, "percentage": 1.83, "elapsed_time": "0:09:14", "remaining_time": "8:15:16"} +{"current_steps": 105, "total_steps": 5676, "loss": 0.9793657064437866, "lr": 7.3239436619718316e-06, "epoch": 0.03700440528634361, "percentage": 1.85, "elapsed_time": "0:09:19", "remaining_time": "8:14:55"} +{"current_steps": 106, "total_steps": 5676, "loss": 0.9508543014526367, "lr": 7.3943661971831e-06, "epoch": 0.0373568281938326, "percentage": 1.87, "elapsed_time": "0:09:25", "remaining_time": "8:14:49"} +{"current_steps": 107, "total_steps": 5676, "loss": 1.0623283386230469, "lr": 7.464788732394367e-06, "epoch": 0.03770925110132159, "percentage": 1.89, "elapsed_time": "0:09:29", "remaining_time": "8:14:04"} +{"current_steps": 108, "total_steps": 5676, "loss": 1.0039314031600952, "lr": 7.535211267605634e-06, "epoch": 0.038061674008810574, "percentage": 1.9, "elapsed_time": "0:09:34", "remaining_time": "8:13:45"} +{"current_steps": 109, "total_steps": 5676, "loss": 0.9315502643585205, "lr": 7.6056338028169015e-06, "epoch": 0.03841409691629956, "percentage": 1.92, "elapsed_time": "0:09:39", "remaining_time": "8:13:09"} +{"current_steps": 110, "total_steps": 5676, "loss": 0.936677098274231, "lr": 7.67605633802817e-06, "epoch": 0.038766519823788544, "percentage": 1.94, "elapsed_time": "0:09:44", "remaining_time": "8:13:08"} +{"current_steps": 111, "total_steps": 5676, "loss": 0.9373410940170288, "lr": 7.746478873239436e-06, "epoch": 0.039118942731277535, "percentage": 1.96, "elapsed_time": "0:09:50", "remaining_time": "8:13:28"} +{"current_steps": 112, "total_steps": 5676, "loss": 1.01617431640625, "lr": 7.816901408450704e-06, "epoch": 0.03947136563876652, "percentage": 1.97, "elapsed_time": "0:09:55", "remaining_time": "8:13:09"} +{"current_steps": 113, "total_steps": 5676, "loss": 1.1713547706604004, "lr": 7.887323943661972e-06, "epoch": 0.039823788546255505, "percentage": 1.99, "elapsed_time": "0:10:01", "remaining_time": "8:13:34"} +{"current_steps": 114, "total_steps": 5676, "loss": 0.8982350826263428, "lr": 7.95774647887324e-06, "epoch": 0.0401762114537445, "percentage": 2.01, "elapsed_time": "0:10:07", "remaining_time": "8:13:47"} +{"current_steps": 115, "total_steps": 5676, "loss": 0.8123869895935059, "lr": 8.028169014084509e-06, "epoch": 0.04052863436123348, "percentage": 2.03, "elapsed_time": "0:10:11", "remaining_time": "8:12:59"} +{"current_steps": 116, "total_steps": 5676, "loss": 1.0762536525726318, "lr": 8.098591549295775e-06, "epoch": 0.040881057268722466, "percentage": 2.04, "elapsed_time": "0:10:16", "remaining_time": "8:12:15"} +{"current_steps": 117, "total_steps": 5676, "loss": 1.004841923713684, "lr": 8.169014084507043e-06, "epoch": 0.04123348017621145, "percentage": 2.06, "elapsed_time": "0:10:22", "remaining_time": "8:12:54"} +{"current_steps": 118, "total_steps": 5676, "loss": 0.9237936735153198, "lr": 8.239436619718311e-06, "epoch": 0.04158590308370044, "percentage": 2.08, "elapsed_time": "0:10:27", "remaining_time": "8:12:46"} +{"current_steps": 119, "total_steps": 5676, "loss": 0.9038913249969482, "lr": 8.309859154929578e-06, "epoch": 0.04193832599118943, "percentage": 2.1, "elapsed_time": "0:10:33", "remaining_time": "8:12:47"} +{"current_steps": 120, "total_steps": 5676, "loss": 0.9488446712493896, "lr": 8.380281690140846e-06, "epoch": 0.04229074889867841, "percentage": 2.11, "elapsed_time": "0:10:38", "remaining_time": "8:12:55"} +{"current_steps": 121, "total_steps": 5676, "loss": 1.0862706899642944, "lr": 8.450704225352114e-06, "epoch": 0.042643171806167404, "percentage": 2.13, "elapsed_time": "0:10:44", "remaining_time": "8:13:06"} +{"current_steps": 122, "total_steps": 5676, "loss": 0.8882313966751099, "lr": 8.52112676056338e-06, "epoch": 0.04299559471365639, "percentage": 2.15, "elapsed_time": "0:10:50", "remaining_time": "8:13:54"} +{"current_steps": 123, "total_steps": 5676, "loss": 0.9125900268554688, "lr": 8.591549295774648e-06, "epoch": 0.043348017621145374, "percentage": 2.17, "elapsed_time": "0:10:57", "remaining_time": "8:15:05"} +{"current_steps": 124, "total_steps": 5676, "loss": 0.944568395614624, "lr": 8.661971830985915e-06, "epoch": 0.04370044052863436, "percentage": 2.18, "elapsed_time": "0:11:02", "remaining_time": "8:14:06"} +{"current_steps": 125, "total_steps": 5676, "loss": 0.9303089380264282, "lr": 8.732394366197183e-06, "epoch": 0.04405286343612335, "percentage": 2.2, "elapsed_time": "0:11:06", "remaining_time": "8:12:59"} +{"current_steps": 126, "total_steps": 5676, "loss": 1.0983362197875977, "lr": 8.802816901408451e-06, "epoch": 0.044405286343612335, "percentage": 2.22, "elapsed_time": "0:11:10", "remaining_time": "8:12:24"} +{"current_steps": 127, "total_steps": 5676, "loss": 1.002477765083313, "lr": 8.87323943661972e-06, "epoch": 0.04475770925110132, "percentage": 2.24, "elapsed_time": "0:11:16", "remaining_time": "8:12:35"} +{"current_steps": 128, "total_steps": 5676, "loss": 0.9994120001792908, "lr": 8.943661971830987e-06, "epoch": 0.04511013215859031, "percentage": 2.26, "elapsed_time": "0:11:22", "remaining_time": "8:13:09"} +{"current_steps": 129, "total_steps": 5676, "loss": 1.0785832405090332, "lr": 9.014084507042254e-06, "epoch": 0.045462555066079297, "percentage": 2.27, "elapsed_time": "0:11:29", "remaining_time": "8:13:50"} +{"current_steps": 130, "total_steps": 5676, "loss": 0.779441237449646, "lr": 9.084507042253522e-06, "epoch": 0.04581497797356828, "percentage": 2.29, "elapsed_time": "0:11:35", "remaining_time": "8:14:18"} +{"current_steps": 131, "total_steps": 5676, "loss": 1.0052348375320435, "lr": 9.15492957746479e-06, "epoch": 0.046167400881057266, "percentage": 2.31, "elapsed_time": "0:11:40", "remaining_time": "8:13:49"} +{"current_steps": 132, "total_steps": 5676, "loss": 1.0327996015548706, "lr": 9.225352112676057e-06, "epoch": 0.04651982378854626, "percentage": 2.33, "elapsed_time": "0:11:46", "remaining_time": "8:14:17"} +{"current_steps": 133, "total_steps": 5676, "loss": 1.0643246173858643, "lr": 9.295774647887325e-06, "epoch": 0.04687224669603524, "percentage": 2.34, "elapsed_time": "0:11:50", "remaining_time": "8:13:41"} +{"current_steps": 134, "total_steps": 5676, "loss": 0.8449216485023499, "lr": 9.366197183098593e-06, "epoch": 0.04722466960352423, "percentage": 2.36, "elapsed_time": "0:11:56", "remaining_time": "8:13:42"} +{"current_steps": 135, "total_steps": 5676, "loss": 0.8867055177688599, "lr": 9.43661971830986e-06, "epoch": 0.04757709251101322, "percentage": 2.38, "elapsed_time": "0:12:01", "remaining_time": "8:13:13"} +{"current_steps": 136, "total_steps": 5676, "loss": 1.048499584197998, "lr": 9.507042253521127e-06, "epoch": 0.047929515418502204, "percentage": 2.4, "elapsed_time": "0:12:06", "remaining_time": "8:13:05"} +{"current_steps": 137, "total_steps": 5676, "loss": 1.0548617839813232, "lr": 9.577464788732394e-06, "epoch": 0.04828193832599119, "percentage": 2.41, "elapsed_time": "0:12:11", "remaining_time": "8:12:43"} +{"current_steps": 138, "total_steps": 5676, "loss": 0.8882845044136047, "lr": 9.647887323943664e-06, "epoch": 0.048634361233480174, "percentage": 2.43, "elapsed_time": "0:12:16", "remaining_time": "8:12:54"} +{"current_steps": 139, "total_steps": 5676, "loss": 1.032647967338562, "lr": 9.71830985915493e-06, "epoch": 0.048986784140969165, "percentage": 2.45, "elapsed_time": "0:12:21", "remaining_time": "8:12:04"} +{"current_steps": 140, "total_steps": 5676, "loss": 1.039523959159851, "lr": 9.788732394366198e-06, "epoch": 0.04933920704845815, "percentage": 2.47, "elapsed_time": "0:12:26", "remaining_time": "8:11:42"} +{"current_steps": 141, "total_steps": 5676, "loss": 0.8451036214828491, "lr": 9.859154929577466e-06, "epoch": 0.049691629955947135, "percentage": 2.48, "elapsed_time": "0:12:30", "remaining_time": "8:11:09"} +{"current_steps": 142, "total_steps": 5676, "loss": 0.8285897970199585, "lr": 9.929577464788733e-06, "epoch": 0.05004405286343613, "percentage": 2.5, "elapsed_time": "0:12:36", "remaining_time": "8:11:16"} +{"current_steps": 143, "total_steps": 5676, "loss": 0.9998278021812439, "lr": 1e-05, "epoch": 0.05039647577092511, "percentage": 2.52, "elapsed_time": "0:12:40", "remaining_time": "8:10:34"} +{"current_steps": 144, "total_steps": 5676, "loss": 0.9782301187515259, "lr": 1.0070422535211269e-05, "epoch": 0.050748898678414096, "percentage": 2.54, "elapsed_time": "0:12:44", "remaining_time": "8:09:46"} +{"current_steps": 145, "total_steps": 5676, "loss": 1.0557070970535278, "lr": 1.0140845070422535e-05, "epoch": 0.05110132158590308, "percentage": 2.55, "elapsed_time": "0:12:50", "remaining_time": "8:09:36"} +{"current_steps": 146, "total_steps": 5676, "loss": 0.9516133069992065, "lr": 1.0211267605633803e-05, "epoch": 0.05145374449339207, "percentage": 2.57, "elapsed_time": "0:12:54", "remaining_time": "8:09:10"} +{"current_steps": 147, "total_steps": 5676, "loss": 0.8965041637420654, "lr": 1.0281690140845072e-05, "epoch": 0.05180616740088106, "percentage": 2.59, "elapsed_time": "0:12:59", "remaining_time": "8:08:46"} +{"current_steps": 148, "total_steps": 5676, "loss": 0.8627057075500488, "lr": 1.0352112676056338e-05, "epoch": 0.05215859030837004, "percentage": 2.61, "elapsed_time": "0:13:04", "remaining_time": "8:08:33"} +{"current_steps": 149, "total_steps": 5676, "loss": 0.7627567648887634, "lr": 1.0422535211267606e-05, "epoch": 0.052511013215859034, "percentage": 2.63, "elapsed_time": "0:13:11", "remaining_time": "8:09:10"} +{"current_steps": 150, "total_steps": 5676, "loss": 0.8522504568099976, "lr": 1.0492957746478873e-05, "epoch": 0.05286343612334802, "percentage": 2.64, "elapsed_time": "0:13:16", "remaining_time": "8:08:57"} +{"current_steps": 151, "total_steps": 5676, "loss": 0.7164312601089478, "lr": 1.056338028169014e-05, "epoch": 0.053215859030837004, "percentage": 2.66, "elapsed_time": "0:13:21", "remaining_time": "8:08:58"} +{"current_steps": 152, "total_steps": 5676, "loss": 0.9141941070556641, "lr": 1.0633802816901409e-05, "epoch": 0.05356828193832599, "percentage": 2.68, "elapsed_time": "0:13:27", "remaining_time": "8:09:02"} +{"current_steps": 153, "total_steps": 5676, "loss": 0.9145504832267761, "lr": 1.0704225352112675e-05, "epoch": 0.05392070484581498, "percentage": 2.7, "elapsed_time": "0:13:33", "remaining_time": "8:09:32"} +{"current_steps": 154, "total_steps": 5676, "loss": 0.9851646423339844, "lr": 1.0774647887323943e-05, "epoch": 0.054273127753303965, "percentage": 2.71, "elapsed_time": "0:13:38", "remaining_time": "8:09:24"} +{"current_steps": 155, "total_steps": 5676, "loss": 0.9319474697113037, "lr": 1.0845070422535213e-05, "epoch": 0.05462555066079295, "percentage": 2.73, "elapsed_time": "0:13:43", "remaining_time": "8:08:43"} +{"current_steps": 156, "total_steps": 5676, "loss": 0.995783269405365, "lr": 1.0915492957746481e-05, "epoch": 0.05497797356828194, "percentage": 2.75, "elapsed_time": "0:13:47", "remaining_time": "8:07:51"} +{"current_steps": 157, "total_steps": 5676, "loss": 0.8636226654052734, "lr": 1.0985915492957748e-05, "epoch": 0.05533039647577093, "percentage": 2.77, "elapsed_time": "0:13:52", "remaining_time": "8:07:34"} +{"current_steps": 158, "total_steps": 5676, "loss": 0.8860869407653809, "lr": 1.1056338028169016e-05, "epoch": 0.05568281938325991, "percentage": 2.78, "elapsed_time": "0:13:56", "remaining_time": "8:07:04"} +{"current_steps": 159, "total_steps": 5676, "loss": 0.9256196618080139, "lr": 1.1126760563380284e-05, "epoch": 0.056035242290748896, "percentage": 2.8, "elapsed_time": "0:14:02", "remaining_time": "8:07:11"} +{"current_steps": 160, "total_steps": 5676, "loss": 0.8217915296554565, "lr": 1.119718309859155e-05, "epoch": 0.05638766519823789, "percentage": 2.82, "elapsed_time": "0:14:07", "remaining_time": "8:07:04"} +{"current_steps": 161, "total_steps": 5676, "loss": 0.9808465838432312, "lr": 1.1267605633802819e-05, "epoch": 0.05674008810572687, "percentage": 2.84, "elapsed_time": "0:14:14", "remaining_time": "8:07:57"} +{"current_steps": 162, "total_steps": 5676, "loss": 0.7733014822006226, "lr": 1.1338028169014087e-05, "epoch": 0.05709251101321586, "percentage": 2.85, "elapsed_time": "0:14:19", "remaining_time": "8:07:34"} +{"current_steps": 163, "total_steps": 5676, "loss": 0.8581304550170898, "lr": 1.1408450704225353e-05, "epoch": 0.05744493392070485, "percentage": 2.87, "elapsed_time": "0:14:24", "remaining_time": "8:07:17"} +{"current_steps": 164, "total_steps": 5676, "loss": 0.9242054224014282, "lr": 1.1478873239436621e-05, "epoch": 0.057797356828193834, "percentage": 2.89, "elapsed_time": "0:14:29", "remaining_time": "8:07:07"} +{"current_steps": 165, "total_steps": 5676, "loss": 1.0302021503448486, "lr": 1.1549295774647888e-05, "epoch": 0.05814977973568282, "percentage": 2.91, "elapsed_time": "0:14:34", "remaining_time": "8:06:42"} +{"current_steps": 166, "total_steps": 5676, "loss": 0.8954275846481323, "lr": 1.1619718309859156e-05, "epoch": 0.058502202643171804, "percentage": 2.92, "elapsed_time": "0:14:40", "remaining_time": "8:06:50"} +{"current_steps": 167, "total_steps": 5676, "loss": 0.891846776008606, "lr": 1.1690140845070424e-05, "epoch": 0.058854625550660795, "percentage": 2.94, "elapsed_time": "0:14:45", "remaining_time": "8:06:50"} +{"current_steps": 168, "total_steps": 5676, "loss": 0.887005627155304, "lr": 1.176056338028169e-05, "epoch": 0.05920704845814978, "percentage": 2.96, "elapsed_time": "0:14:51", "remaining_time": "8:06:52"} +{"current_steps": 169, "total_steps": 5676, "loss": 0.9020301103591919, "lr": 1.1830985915492958e-05, "epoch": 0.059559471365638765, "percentage": 2.98, "elapsed_time": "0:14:56", "remaining_time": "8:06:51"} +{"current_steps": 170, "total_steps": 5676, "loss": 0.7925454378128052, "lr": 1.1901408450704227e-05, "epoch": 0.05991189427312775, "percentage": 3.0, "elapsed_time": "0:15:01", "remaining_time": "8:06:46"} +{"current_steps": 171, "total_steps": 5676, "loss": 0.8058332800865173, "lr": 1.1971830985915493e-05, "epoch": 0.06026431718061674, "percentage": 3.01, "elapsed_time": "0:15:06", "remaining_time": "8:06:29"} +{"current_steps": 172, "total_steps": 5676, "loss": 0.892992377281189, "lr": 1.2042253521126761e-05, "epoch": 0.060616740088105726, "percentage": 3.03, "elapsed_time": "0:15:11", "remaining_time": "8:06:00"} +{"current_steps": 173, "total_steps": 5676, "loss": 0.8482734560966492, "lr": 1.211267605633803e-05, "epoch": 0.06096916299559471, "percentage": 3.05, "elapsed_time": "0:15:16", "remaining_time": "8:05:45"} +{"current_steps": 174, "total_steps": 5676, "loss": 0.8652878999710083, "lr": 1.2183098591549296e-05, "epoch": 0.0613215859030837, "percentage": 3.07, "elapsed_time": "0:15:21", "remaining_time": "8:05:48"} +{"current_steps": 175, "total_steps": 5676, "loss": 0.8590051531791687, "lr": 1.2253521126760564e-05, "epoch": 0.06167400881057269, "percentage": 3.08, "elapsed_time": "0:15:27", "remaining_time": "8:06:03"} +{"current_steps": 176, "total_steps": 5676, "loss": 0.7106916904449463, "lr": 1.232394366197183e-05, "epoch": 0.06202643171806167, "percentage": 3.1, "elapsed_time": "0:15:34", "remaining_time": "8:06:42"} +{"current_steps": 177, "total_steps": 5676, "loss": 0.9298936128616333, "lr": 1.2394366197183098e-05, "epoch": 0.06237885462555066, "percentage": 3.12, "elapsed_time": "0:15:38", "remaining_time": "8:05:58"} +{"current_steps": 178, "total_steps": 5676, "loss": 1.027758240699768, "lr": 1.2464788732394367e-05, "epoch": 0.06273127753303964, "percentage": 3.14, "elapsed_time": "0:15:43", "remaining_time": "8:05:30"} +{"current_steps": 179, "total_steps": 5676, "loss": 0.9576354026794434, "lr": 1.2535211267605636e-05, "epoch": 0.06308370044052863, "percentage": 3.15, "elapsed_time": "0:15:48", "remaining_time": "8:05:39"} +{"current_steps": 180, "total_steps": 5676, "loss": 0.6881245374679565, "lr": 1.2605633802816903e-05, "epoch": 0.06343612334801763, "percentage": 3.17, "elapsed_time": "0:15:54", "remaining_time": "8:05:51"} +{"current_steps": 181, "total_steps": 5676, "loss": 0.9629781246185303, "lr": 1.2676056338028171e-05, "epoch": 0.0637885462555066, "percentage": 3.19, "elapsed_time": "0:15:58", "remaining_time": "8:05:11"} +{"current_steps": 182, "total_steps": 5676, "loss": 0.849892258644104, "lr": 1.2746478873239439e-05, "epoch": 0.0641409691629956, "percentage": 3.21, "elapsed_time": "0:16:03", "remaining_time": "8:04:30"} +{"current_steps": 183, "total_steps": 5676, "loss": 0.9294229745864868, "lr": 1.2816901408450705e-05, "epoch": 0.06449339207048459, "percentage": 3.22, "elapsed_time": "0:16:08", "remaining_time": "8:04:18"} +{"current_steps": 184, "total_steps": 5676, "loss": 0.9534600973129272, "lr": 1.2887323943661974e-05, "epoch": 0.06484581497797356, "percentage": 3.24, "elapsed_time": "0:16:13", "remaining_time": "8:04:30"} +{"current_steps": 185, "total_steps": 5676, "loss": 0.7937755584716797, "lr": 1.2957746478873242e-05, "epoch": 0.06519823788546256, "percentage": 3.26, "elapsed_time": "0:16:19", "remaining_time": "8:04:20"} +{"current_steps": 186, "total_steps": 5676, "loss": 0.9188590049743652, "lr": 1.3028169014084508e-05, "epoch": 0.06555066079295155, "percentage": 3.28, "elapsed_time": "0:16:23", "remaining_time": "8:04:01"} +{"current_steps": 187, "total_steps": 5676, "loss": 0.8775123357772827, "lr": 1.3098591549295776e-05, "epoch": 0.06590308370044053, "percentage": 3.29, "elapsed_time": "0:16:28", "remaining_time": "8:03:27"} +{"current_steps": 188, "total_steps": 5676, "loss": 0.8562190532684326, "lr": 1.3169014084507044e-05, "epoch": 0.06625550660792952, "percentage": 3.31, "elapsed_time": "0:16:32", "remaining_time": "8:02:47"} +{"current_steps": 189, "total_steps": 5676, "loss": 0.9427295327186584, "lr": 1.323943661971831e-05, "epoch": 0.0666079295154185, "percentage": 3.33, "elapsed_time": "0:16:36", "remaining_time": "8:02:23"} +{"current_steps": 190, "total_steps": 5676, "loss": 0.6142286062240601, "lr": 1.3309859154929579e-05, "epoch": 0.06696035242290749, "percentage": 3.35, "elapsed_time": "0:16:43", "remaining_time": "8:02:40"} +{"current_steps": 191, "total_steps": 5676, "loss": 0.7480863332748413, "lr": 1.3380281690140845e-05, "epoch": 0.06731277533039648, "percentage": 3.37, "elapsed_time": "0:16:49", "remaining_time": "8:03:15"} +{"current_steps": 192, "total_steps": 5676, "loss": 0.898857593536377, "lr": 1.3450704225352114e-05, "epoch": 0.06766519823788546, "percentage": 3.38, "elapsed_time": "0:16:55", "remaining_time": "8:03:15"} +{"current_steps": 193, "total_steps": 5676, "loss": 0.8584127426147461, "lr": 1.3521126760563382e-05, "epoch": 0.06801762114537445, "percentage": 3.4, "elapsed_time": "0:16:59", "remaining_time": "8:02:32"} +{"current_steps": 194, "total_steps": 5676, "loss": 0.9400655031204224, "lr": 1.3591549295774648e-05, "epoch": 0.06837004405286344, "percentage": 3.42, "elapsed_time": "0:17:04", "remaining_time": "8:02:35"} +{"current_steps": 195, "total_steps": 5676, "loss": 0.7750787734985352, "lr": 1.3661971830985916e-05, "epoch": 0.06872246696035242, "percentage": 3.44, "elapsed_time": "0:17:09", "remaining_time": "8:02:20"} +{"current_steps": 196, "total_steps": 5676, "loss": 0.8530284762382507, "lr": 1.3732394366197184e-05, "epoch": 0.06907488986784141, "percentage": 3.45, "elapsed_time": "0:17:15", "remaining_time": "8:02:24"} +{"current_steps": 197, "total_steps": 5676, "loss": 0.9019994735717773, "lr": 1.380281690140845e-05, "epoch": 0.0694273127753304, "percentage": 3.47, "elapsed_time": "0:17:20", "remaining_time": "8:02:30"} +{"current_steps": 198, "total_steps": 5676, "loss": 0.7749642133712769, "lr": 1.3873239436619719e-05, "epoch": 0.06977973568281938, "percentage": 3.49, "elapsed_time": "0:17:26", "remaining_time": "8:02:33"} +{"current_steps": 199, "total_steps": 5676, "loss": 0.956200122833252, "lr": 1.3943661971830987e-05, "epoch": 0.07013215859030837, "percentage": 3.51, "elapsed_time": "0:17:31", "remaining_time": "8:02:10"} +{"current_steps": 200, "total_steps": 5676, "loss": 0.8544470071792603, "lr": 1.4014084507042253e-05, "epoch": 0.07048458149779736, "percentage": 3.52, "elapsed_time": "0:17:37", "remaining_time": "8:02:21"} +{"current_steps": 201, "total_steps": 5676, "loss": 0.8776387572288513, "lr": 1.4084507042253522e-05, "epoch": 0.07083700440528634, "percentage": 3.54, "elapsed_time": "0:17:46", "remaining_time": "8:04:16"} +{"current_steps": 202, "total_steps": 5676, "loss": 0.9949591755867004, "lr": 1.4154929577464788e-05, "epoch": 0.07118942731277533, "percentage": 3.56, "elapsed_time": "0:17:51", "remaining_time": "8:03:50"} +{"current_steps": 203, "total_steps": 5676, "loss": 0.9616764783859253, "lr": 1.4225352112676058e-05, "epoch": 0.07154185022026431, "percentage": 3.58, "elapsed_time": "0:17:55", "remaining_time": "8:03:29"} +{"current_steps": 204, "total_steps": 5676, "loss": 0.9247175455093384, "lr": 1.4295774647887326e-05, "epoch": 0.0718942731277533, "percentage": 3.59, "elapsed_time": "0:18:00", "remaining_time": "8:02:55"} +{"current_steps": 205, "total_steps": 5676, "loss": 0.7946479320526123, "lr": 1.4366197183098594e-05, "epoch": 0.0722466960352423, "percentage": 3.61, "elapsed_time": "0:18:05", "remaining_time": "8:02:45"} +{"current_steps": 206, "total_steps": 5676, "loss": 0.7929860353469849, "lr": 1.443661971830986e-05, "epoch": 0.07259911894273127, "percentage": 3.63, "elapsed_time": "0:18:10", "remaining_time": "8:02:37"} +{"current_steps": 207, "total_steps": 5676, "loss": 0.9215391874313354, "lr": 1.4507042253521129e-05, "epoch": 0.07295154185022026, "percentage": 3.65, "elapsed_time": "0:18:15", "remaining_time": "8:02:15"} +{"current_steps": 208, "total_steps": 5676, "loss": 0.8767607808113098, "lr": 1.4577464788732397e-05, "epoch": 0.07330396475770926, "percentage": 3.66, "elapsed_time": "0:18:19", "remaining_time": "8:01:42"} +{"current_steps": 209, "total_steps": 5676, "loss": 0.8586276769638062, "lr": 1.4647887323943663e-05, "epoch": 0.07365638766519823, "percentage": 3.68, "elapsed_time": "0:18:25", "remaining_time": "8:02:09"} +{"current_steps": 210, "total_steps": 5676, "loss": 0.8680851459503174, "lr": 1.4718309859154931e-05, "epoch": 0.07400881057268723, "percentage": 3.7, "elapsed_time": "0:18:31", "remaining_time": "8:02:19"} +{"current_steps": 211, "total_steps": 5676, "loss": 0.8887720108032227, "lr": 1.47887323943662e-05, "epoch": 0.07436123348017622, "percentage": 3.72, "elapsed_time": "0:18:37", "remaining_time": "8:02:31"} +{"current_steps": 212, "total_steps": 5676, "loss": 0.8887100219726562, "lr": 1.4859154929577466e-05, "epoch": 0.0747136563876652, "percentage": 3.74, "elapsed_time": "0:18:42", "remaining_time": "8:02:05"} +{"current_steps": 213, "total_steps": 5676, "loss": 0.9257807731628418, "lr": 1.4929577464788734e-05, "epoch": 0.07506607929515419, "percentage": 3.75, "elapsed_time": "0:18:49", "remaining_time": "8:02:37"} +{"current_steps": 214, "total_steps": 5676, "loss": 0.8107355833053589, "lr": 1.5000000000000002e-05, "epoch": 0.07541850220264318, "percentage": 3.77, "elapsed_time": "0:18:55", "remaining_time": "8:03:10"} +{"current_steps": 215, "total_steps": 5676, "loss": 0.8765913844108582, "lr": 1.5070422535211269e-05, "epoch": 0.07577092511013216, "percentage": 3.79, "elapsed_time": "0:19:01", "remaining_time": "8:03:22"} +{"current_steps": 216, "total_steps": 5676, "loss": 0.8973524570465088, "lr": 1.5140845070422537e-05, "epoch": 0.07612334801762115, "percentage": 3.81, "elapsed_time": "0:19:06", "remaining_time": "8:02:51"} +{"current_steps": 217, "total_steps": 5676, "loss": 0.9194613695144653, "lr": 1.5211267605633803e-05, "epoch": 0.07647577092511013, "percentage": 3.82, "elapsed_time": "0:19:10", "remaining_time": "8:02:23"} +{"current_steps": 218, "total_steps": 5676, "loss": 0.8832643032073975, "lr": 1.528169014084507e-05, "epoch": 0.07682819383259912, "percentage": 3.84, "elapsed_time": "0:19:15", "remaining_time": "8:02:08"} +{"current_steps": 219, "total_steps": 5676, "loss": 0.9575356245040894, "lr": 1.535211267605634e-05, "epoch": 0.07718061674008811, "percentage": 3.86, "elapsed_time": "0:19:21", "remaining_time": "8:02:10"} +{"current_steps": 220, "total_steps": 5676, "loss": 0.8302342891693115, "lr": 1.5422535211267607e-05, "epoch": 0.07753303964757709, "percentage": 3.88, "elapsed_time": "0:19:27", "remaining_time": "8:02:27"} +{"current_steps": 221, "total_steps": 5676, "loss": 0.7999966144561768, "lr": 1.5492957746478872e-05, "epoch": 0.07788546255506608, "percentage": 3.89, "elapsed_time": "0:19:31", "remaining_time": "8:01:47"} +{"current_steps": 222, "total_steps": 5676, "loss": 0.8201859593391418, "lr": 1.556338028169014e-05, "epoch": 0.07823788546255507, "percentage": 3.91, "elapsed_time": "0:19:36", "remaining_time": "8:01:40"} +{"current_steps": 223, "total_steps": 5676, "loss": 0.872761607170105, "lr": 1.563380281690141e-05, "epoch": 0.07859030837004405, "percentage": 3.93, "elapsed_time": "0:19:42", "remaining_time": "8:02:00"} +{"current_steps": 224, "total_steps": 5676, "loss": 0.8695409297943115, "lr": 1.5704225352112677e-05, "epoch": 0.07894273127753304, "percentage": 3.95, "elapsed_time": "0:19:48", "remaining_time": "8:02:01"} +{"current_steps": 225, "total_steps": 5676, "loss": 0.8532050848007202, "lr": 1.5774647887323945e-05, "epoch": 0.07929515418502203, "percentage": 3.96, "elapsed_time": "0:19:52", "remaining_time": "8:01:36"} +{"current_steps": 226, "total_steps": 5676, "loss": 0.7875121235847473, "lr": 1.5845070422535213e-05, "epoch": 0.07964757709251101, "percentage": 3.98, "elapsed_time": "0:19:58", "remaining_time": "8:01:42"} +{"current_steps": 227, "total_steps": 5676, "loss": 0.7131509780883789, "lr": 1.591549295774648e-05, "epoch": 0.08, "percentage": 4.0, "elapsed_time": "0:20:04", "remaining_time": "8:01:52"} +{"current_steps": 228, "total_steps": 5676, "loss": 0.9758431911468506, "lr": 1.598591549295775e-05, "epoch": 0.080352422907489, "percentage": 4.02, "elapsed_time": "0:20:10", "remaining_time": "8:02:04"} +{"current_steps": 229, "total_steps": 5676, "loss": 0.7894232273101807, "lr": 1.6056338028169017e-05, "epoch": 0.08070484581497797, "percentage": 4.03, "elapsed_time": "0:20:15", "remaining_time": "8:01:48"} +{"current_steps": 230, "total_steps": 5676, "loss": 0.9933483600616455, "lr": 1.6126760563380285e-05, "epoch": 0.08105726872246696, "percentage": 4.05, "elapsed_time": "0:20:21", "remaining_time": "8:01:54"} +{"current_steps": 231, "total_steps": 5676, "loss": 0.8424056768417358, "lr": 1.619718309859155e-05, "epoch": 0.08140969162995594, "percentage": 4.07, "elapsed_time": "0:20:27", "remaining_time": "8:02:02"} +{"current_steps": 232, "total_steps": 5676, "loss": 0.7957695126533508, "lr": 1.6267605633802818e-05, "epoch": 0.08176211453744493, "percentage": 4.09, "elapsed_time": "0:20:32", "remaining_time": "8:01:59"} +{"current_steps": 233, "total_steps": 5676, "loss": 0.8491722345352173, "lr": 1.6338028169014086e-05, "epoch": 0.08211453744493392, "percentage": 4.11, "elapsed_time": "0:20:38", "remaining_time": "8:02:04"} +{"current_steps": 234, "total_steps": 5676, "loss": 0.7010964751243591, "lr": 1.6408450704225354e-05, "epoch": 0.0824669603524229, "percentage": 4.12, "elapsed_time": "0:20:44", "remaining_time": "8:02:13"} +{"current_steps": 235, "total_steps": 5676, "loss": 0.8713864088058472, "lr": 1.6478873239436623e-05, "epoch": 0.0828193832599119, "percentage": 4.14, "elapsed_time": "0:20:48", "remaining_time": "8:01:50"} +{"current_steps": 236, "total_steps": 5676, "loss": 0.6724761128425598, "lr": 1.6549295774647887e-05, "epoch": 0.08317180616740089, "percentage": 4.16, "elapsed_time": "0:20:55", "remaining_time": "8:02:11"} +{"current_steps": 237, "total_steps": 5676, "loss": 0.8612109422683716, "lr": 1.6619718309859155e-05, "epoch": 0.08352422907488986, "percentage": 4.18, "elapsed_time": "0:21:00", "remaining_time": "8:02:06"} +{"current_steps": 238, "total_steps": 5676, "loss": 0.5697110891342163, "lr": 1.6690140845070424e-05, "epoch": 0.08387665198237886, "percentage": 4.19, "elapsed_time": "0:21:05", "remaining_time": "8:01:59"} +{"current_steps": 239, "total_steps": 5676, "loss": 0.7877228260040283, "lr": 1.676056338028169e-05, "epoch": 0.08422907488986785, "percentage": 4.21, "elapsed_time": "0:21:11", "remaining_time": "8:01:55"} +{"current_steps": 240, "total_steps": 5676, "loss": 0.8751014471054077, "lr": 1.683098591549296e-05, "epoch": 0.08458149779735682, "percentage": 4.23, "elapsed_time": "0:21:15", "remaining_time": "8:01:38"} +{"current_steps": 241, "total_steps": 5676, "loss": 0.8731381893157959, "lr": 1.6901408450704228e-05, "epoch": 0.08493392070484582, "percentage": 4.25, "elapsed_time": "0:21:21", "remaining_time": "8:01:39"} +{"current_steps": 242, "total_steps": 5676, "loss": 0.831383228302002, "lr": 1.6971830985915493e-05, "epoch": 0.08528634361233481, "percentage": 4.26, "elapsed_time": "0:21:26", "remaining_time": "8:01:17"} +{"current_steps": 243, "total_steps": 5676, "loss": 0.792934238910675, "lr": 1.704225352112676e-05, "epoch": 0.08563876651982379, "percentage": 4.28, "elapsed_time": "0:21:31", "remaining_time": "8:01:08"} +{"current_steps": 244, "total_steps": 5676, "loss": 0.6723657846450806, "lr": 1.711267605633803e-05, "epoch": 0.08599118942731278, "percentage": 4.3, "elapsed_time": "0:21:37", "remaining_time": "8:01:28"} +{"current_steps": 245, "total_steps": 5676, "loss": 0.8377546072006226, "lr": 1.7183098591549297e-05, "epoch": 0.08634361233480176, "percentage": 4.32, "elapsed_time": "0:21:42", "remaining_time": "8:01:07"} +{"current_steps": 246, "total_steps": 5676, "loss": 0.8073972463607788, "lr": 1.7253521126760565e-05, "epoch": 0.08669603524229075, "percentage": 4.33, "elapsed_time": "0:21:47", "remaining_time": "8:01:03"} +{"current_steps": 247, "total_steps": 5676, "loss": 0.8913615942001343, "lr": 1.732394366197183e-05, "epoch": 0.08704845814977974, "percentage": 4.35, "elapsed_time": "0:21:52", "remaining_time": "8:00:57"} +{"current_steps": 248, "total_steps": 5676, "loss": 0.9133341312408447, "lr": 1.7394366197183098e-05, "epoch": 0.08740088105726872, "percentage": 4.37, "elapsed_time": "0:21:57", "remaining_time": "8:00:46"} +{"current_steps": 249, "total_steps": 5676, "loss": 0.7593938112258911, "lr": 1.7464788732394366e-05, "epoch": 0.08775330396475771, "percentage": 4.39, "elapsed_time": "0:22:02", "remaining_time": "8:00:33"} +{"current_steps": 250, "total_steps": 5676, "loss": 0.8049266934394836, "lr": 1.7535211267605638e-05, "epoch": 0.0881057268722467, "percentage": 4.4, "elapsed_time": "0:22:07", "remaining_time": "8:00:08"} +{"current_steps": 251, "total_steps": 5676, "loss": 0.9065679311752319, "lr": 1.7605633802816902e-05, "epoch": 0.08845814977973568, "percentage": 4.42, "elapsed_time": "0:22:12", "remaining_time": "8:00:00"} +{"current_steps": 252, "total_steps": 5676, "loss": 0.9309085011482239, "lr": 1.767605633802817e-05, "epoch": 0.08881057268722467, "percentage": 4.44, "elapsed_time": "0:22:16", "remaining_time": "7:59:25"} +{"current_steps": 253, "total_steps": 5676, "loss": 0.7846949100494385, "lr": 1.774647887323944e-05, "epoch": 0.08916299559471366, "percentage": 4.46, "elapsed_time": "0:22:21", "remaining_time": "7:59:09"} +{"current_steps": 254, "total_steps": 5676, "loss": 0.9153063297271729, "lr": 1.7816901408450707e-05, "epoch": 0.08951541850220264, "percentage": 4.47, "elapsed_time": "0:22:25", "remaining_time": "7:58:46"} +{"current_steps": 255, "total_steps": 5676, "loss": 0.7743638157844543, "lr": 1.7887323943661975e-05, "epoch": 0.08986784140969163, "percentage": 4.49, "elapsed_time": "0:22:32", "remaining_time": "7:59:06"} +{"current_steps": 256, "total_steps": 5676, "loss": 0.887751579284668, "lr": 1.7957746478873243e-05, "epoch": 0.09022026431718062, "percentage": 4.51, "elapsed_time": "0:22:36", "remaining_time": "7:58:43"} +{"current_steps": 257, "total_steps": 5676, "loss": 0.8072899580001831, "lr": 1.8028169014084508e-05, "epoch": 0.0905726872246696, "percentage": 4.53, "elapsed_time": "0:22:41", "remaining_time": "7:58:31"} +{"current_steps": 258, "total_steps": 5676, "loss": 0.8275943994522095, "lr": 1.8098591549295776e-05, "epoch": 0.09092511013215859, "percentage": 4.55, "elapsed_time": "0:22:46", "remaining_time": "7:58:20"} +{"current_steps": 259, "total_steps": 5676, "loss": 0.8300620913505554, "lr": 1.8169014084507044e-05, "epoch": 0.09127753303964757, "percentage": 4.56, "elapsed_time": "0:22:52", "remaining_time": "7:58:30"} +{"current_steps": 260, "total_steps": 5676, "loss": 0.6886857748031616, "lr": 1.8239436619718312e-05, "epoch": 0.09162995594713656, "percentage": 4.58, "elapsed_time": "0:22:58", "remaining_time": "7:58:26"} +{"current_steps": 261, "total_steps": 5676, "loss": 0.8190158605575562, "lr": 1.830985915492958e-05, "epoch": 0.09198237885462555, "percentage": 4.6, "elapsed_time": "0:23:02", "remaining_time": "7:58:06"} +{"current_steps": 262, "total_steps": 5676, "loss": 0.8500730991363525, "lr": 1.8380281690140845e-05, "epoch": 0.09233480176211453, "percentage": 4.62, "elapsed_time": "0:23:07", "remaining_time": "7:57:59"} +{"current_steps": 263, "total_steps": 5676, "loss": 0.8340811729431152, "lr": 1.8450704225352113e-05, "epoch": 0.09268722466960352, "percentage": 4.63, "elapsed_time": "0:23:13", "remaining_time": "7:58:10"} +{"current_steps": 264, "total_steps": 5676, "loss": 0.8055675029754639, "lr": 1.852112676056338e-05, "epoch": 0.09303964757709252, "percentage": 4.65, "elapsed_time": "0:23:18", "remaining_time": "7:57:57"} +{"current_steps": 265, "total_steps": 5676, "loss": 0.7956680059432983, "lr": 1.859154929577465e-05, "epoch": 0.0933920704845815, "percentage": 4.67, "elapsed_time": "0:23:24", "remaining_time": "7:58:03"} +{"current_steps": 266, "total_steps": 5676, "loss": 0.8232501745223999, "lr": 1.8661971830985917e-05, "epoch": 0.09374449339207049, "percentage": 4.69, "elapsed_time": "0:23:30", "remaining_time": "7:58:02"} +{"current_steps": 267, "total_steps": 5676, "loss": 0.8808565139770508, "lr": 1.8732394366197186e-05, "epoch": 0.09409691629955948, "percentage": 4.7, "elapsed_time": "0:23:34", "remaining_time": "7:57:37"} +{"current_steps": 268, "total_steps": 5676, "loss": 0.885380744934082, "lr": 1.880281690140845e-05, "epoch": 0.09444933920704845, "percentage": 4.72, "elapsed_time": "0:23:38", "remaining_time": "7:57:03"} +{"current_steps": 269, "total_steps": 5676, "loss": 0.8408790826797485, "lr": 1.887323943661972e-05, "epoch": 0.09480176211453745, "percentage": 4.74, "elapsed_time": "0:23:43", "remaining_time": "7:56:50"} +{"current_steps": 270, "total_steps": 5676, "loss": 0.7089993953704834, "lr": 1.8943661971830987e-05, "epoch": 0.09515418502202644, "percentage": 4.76, "elapsed_time": "0:23:48", "remaining_time": "7:56:48"} +{"current_steps": 271, "total_steps": 5676, "loss": 0.7941038608551025, "lr": 1.9014084507042255e-05, "epoch": 0.09550660792951542, "percentage": 4.77, "elapsed_time": "0:23:55", "remaining_time": "7:57:05"} +{"current_steps": 272, "total_steps": 5676, "loss": 0.8269138932228088, "lr": 1.9084507042253523e-05, "epoch": 0.09585903083700441, "percentage": 4.79, "elapsed_time": "0:23:59", "remaining_time": "7:56:43"} +{"current_steps": 273, "total_steps": 5676, "loss": 0.8206192255020142, "lr": 1.9154929577464788e-05, "epoch": 0.09621145374449339, "percentage": 4.81, "elapsed_time": "0:24:04", "remaining_time": "7:56:27"} +{"current_steps": 274, "total_steps": 5676, "loss": 0.9146496653556824, "lr": 1.922535211267606e-05, "epoch": 0.09656387665198238, "percentage": 4.83, "elapsed_time": "0:24:08", "remaining_time": "7:55:59"} +{"current_steps": 275, "total_steps": 5676, "loss": 0.6738560199737549, "lr": 1.9295774647887327e-05, "epoch": 0.09691629955947137, "percentage": 4.84, "elapsed_time": "0:24:14", "remaining_time": "7:55:57"} +{"current_steps": 276, "total_steps": 5676, "loss": 0.934916615486145, "lr": 1.9366197183098595e-05, "epoch": 0.09726872246696035, "percentage": 4.86, "elapsed_time": "0:24:17", "remaining_time": "7:55:23"} +{"current_steps": 277, "total_steps": 5676, "loss": 0.8952134847640991, "lr": 1.943661971830986e-05, "epoch": 0.09762114537444934, "percentage": 4.88, "elapsed_time": "0:24:23", "remaining_time": "7:55:28"} +{"current_steps": 278, "total_steps": 5676, "loss": 0.9346420764923096, "lr": 1.9507042253521128e-05, "epoch": 0.09797356828193833, "percentage": 4.9, "elapsed_time": "0:24:28", "remaining_time": "7:55:10"} +{"current_steps": 279, "total_steps": 5676, "loss": 0.8781993985176086, "lr": 1.9577464788732396e-05, "epoch": 0.09832599118942731, "percentage": 4.92, "elapsed_time": "0:24:33", "remaining_time": "7:54:54"} +{"current_steps": 280, "total_steps": 5676, "loss": 0.8283448219299316, "lr": 1.9647887323943664e-05, "epoch": 0.0986784140969163, "percentage": 4.93, "elapsed_time": "0:24:37", "remaining_time": "7:54:39"} +{"current_steps": 281, "total_steps": 5676, "loss": 0.8010722398757935, "lr": 1.9718309859154933e-05, "epoch": 0.09903083700440529, "percentage": 4.95, "elapsed_time": "0:24:44", "remaining_time": "7:54:56"} +{"current_steps": 282, "total_steps": 5676, "loss": 0.8697119951248169, "lr": 1.97887323943662e-05, "epoch": 0.09938325991189427, "percentage": 4.97, "elapsed_time": "0:24:49", "remaining_time": "7:54:58"} +{"current_steps": 283, "total_steps": 5676, "loss": 0.6448882818222046, "lr": 1.9859154929577465e-05, "epoch": 0.09973568281938326, "percentage": 4.99, "elapsed_time": "0:24:55", "remaining_time": "7:54:55"} +{"current_steps": 284, "total_steps": 5676, "loss": 0.7782007455825806, "lr": 1.9929577464788734e-05, "epoch": 0.10008810572687225, "percentage": 5.0, "elapsed_time": "0:24:58", "remaining_time": "7:54:17"} +{"current_steps": 285, "total_steps": 5676, "loss": 0.8131508827209473, "lr": 2e-05, "epoch": 0.10044052863436123, "percentage": 5.02, "elapsed_time": "0:25:03", "remaining_time": "7:54:07"} +{"current_steps": 286, "total_steps": 5676, "loss": 0.8592134714126587, "lr": 1.999999830265561e-05, "epoch": 0.10079295154185022, "percentage": 5.04, "elapsed_time": "0:25:08", "remaining_time": "7:53:48"} +{"current_steps": 287, "total_steps": 5676, "loss": 0.9374675750732422, "lr": 1.9999993210623002e-05, "epoch": 0.1011453744493392, "percentage": 5.06, "elapsed_time": "0:25:12", "remaining_time": "7:53:28"} +{"current_steps": 288, "total_steps": 5676, "loss": 0.8416328430175781, "lr": 1.9999984723903913e-05, "epoch": 0.10149779735682819, "percentage": 5.07, "elapsed_time": "0:25:18", "remaining_time": "7:53:32"} +{"current_steps": 289, "total_steps": 5676, "loss": 0.7587184906005859, "lr": 1.9999972842501218e-05, "epoch": 0.10185022026431718, "percentage": 5.09, "elapsed_time": "0:25:24", "remaining_time": "7:53:45"} +{"current_steps": 290, "total_steps": 5676, "loss": 1.010494351387024, "lr": 1.9999957566418956e-05, "epoch": 0.10220264317180616, "percentage": 5.11, "elapsed_time": "0:25:30", "remaining_time": "7:53:44"} +{"current_steps": 291, "total_steps": 5676, "loss": 0.7942835092544556, "lr": 1.999993889566231e-05, "epoch": 0.10255506607929515, "percentage": 5.13, "elapsed_time": "0:25:35", "remaining_time": "7:53:27"} +{"current_steps": 292, "total_steps": 5676, "loss": 0.9069477915763855, "lr": 1.999991683023762e-05, "epoch": 0.10290748898678415, "percentage": 5.14, "elapsed_time": "0:25:40", "remaining_time": "7:53:31"} +{"current_steps": 293, "total_steps": 5676, "loss": 0.8776397705078125, "lr": 1.9999891370152375e-05, "epoch": 0.10325991189427312, "percentage": 5.16, "elapsed_time": "0:25:45", "remaining_time": "7:53:10"} +{"current_steps": 294, "total_steps": 5676, "loss": 0.8560416102409363, "lr": 1.9999862515415216e-05, "epoch": 0.10361233480176212, "percentage": 5.18, "elapsed_time": "0:25:51", "remaining_time": "7:53:16"} +{"current_steps": 295, "total_steps": 5676, "loss": 0.9177321195602417, "lr": 1.9999830266035942e-05, "epoch": 0.10396475770925111, "percentage": 5.2, "elapsed_time": "0:25:55", "remaining_time": "7:52:49"} +{"current_steps": 296, "total_steps": 5676, "loss": 0.8830884695053101, "lr": 1.99997946220255e-05, "epoch": 0.10431718061674009, "percentage": 5.21, "elapsed_time": "0:26:01", "remaining_time": "7:53:03"} +{"current_steps": 297, "total_steps": 5676, "loss": 0.913659930229187, "lr": 1.9999755583395987e-05, "epoch": 0.10466960352422908, "percentage": 5.23, "elapsed_time": "0:26:07", "remaining_time": "7:53:06"} +{"current_steps": 298, "total_steps": 5676, "loss": 0.773309588432312, "lr": 1.999971315016066e-05, "epoch": 0.10502202643171807, "percentage": 5.25, "elapsed_time": "0:26:12", "remaining_time": "7:52:59"} +{"current_steps": 299, "total_steps": 5676, "loss": 0.8432563543319702, "lr": 1.9999667322333916e-05, "epoch": 0.10537444933920705, "percentage": 5.27, "elapsed_time": "0:26:18", "remaining_time": "7:53:00"} +{"current_steps": 300, "total_steps": 5676, "loss": 0.9632397890090942, "lr": 1.999961809993132e-05, "epoch": 0.10572687224669604, "percentage": 5.29, "elapsed_time": "0:26:23", "remaining_time": "7:52:58"} +{"current_steps": 301, "total_steps": 5676, "loss": 0.8205600380897522, "lr": 1.999956548296958e-05, "epoch": 0.10607929515418502, "percentage": 5.3, "elapsed_time": "0:26:34", "remaining_time": "7:54:27"} +{"current_steps": 302, "total_steps": 5676, "loss": 0.8789785504341125, "lr": 1.9999509471466557e-05, "epoch": 0.10643171806167401, "percentage": 5.32, "elapsed_time": "0:26:40", "remaining_time": "7:54:35"} +{"current_steps": 303, "total_steps": 5676, "loss": 0.8445791006088257, "lr": 1.999945006544126e-05, "epoch": 0.106784140969163, "percentage": 5.34, "elapsed_time": "0:26:45", "remaining_time": "7:54:31"} +{"current_steps": 304, "total_steps": 5676, "loss": 0.8025245666503906, "lr": 1.9999387264913865e-05, "epoch": 0.10713656387665198, "percentage": 5.36, "elapsed_time": "0:26:50", "remaining_time": "7:54:22"} +{"current_steps": 305, "total_steps": 5676, "loss": 0.9271318912506104, "lr": 1.9999321069905688e-05, "epoch": 0.10748898678414097, "percentage": 5.37, "elapsed_time": "0:26:56", "remaining_time": "7:54:27"} +{"current_steps": 306, "total_steps": 5676, "loss": 0.809894859790802, "lr": 1.999925148043919e-05, "epoch": 0.10784140969162996, "percentage": 5.39, "elapsed_time": "0:27:02", "remaining_time": "7:54:41"} +{"current_steps": 307, "total_steps": 5676, "loss": 0.8940669298171997, "lr": 1.999917849653801e-05, "epoch": 0.10819383259911894, "percentage": 5.41, "elapsed_time": "0:27:09", "remaining_time": "7:54:55"} +{"current_steps": 308, "total_steps": 5676, "loss": 0.9301233887672424, "lr": 1.9999102118226912e-05, "epoch": 0.10854625550660793, "percentage": 5.43, "elapsed_time": "0:27:16", "remaining_time": "7:55:13"} +{"current_steps": 309, "total_steps": 5676, "loss": 0.6429216861724854, "lr": 1.9999022345531834e-05, "epoch": 0.10889867841409692, "percentage": 5.44, "elapsed_time": "0:27:21", "remaining_time": "7:55:07"} +{"current_steps": 310, "total_steps": 5676, "loss": 0.7199009656906128, "lr": 1.999893917847985e-05, "epoch": 0.1092511013215859, "percentage": 5.46, "elapsed_time": "0:27:27", "remaining_time": "7:55:14"} +{"current_steps": 311, "total_steps": 5676, "loss": 0.8312395811080933, "lr": 1.999885261709919e-05, "epoch": 0.10960352422907489, "percentage": 5.48, "elapsed_time": "0:27:32", "remaining_time": "7:55:11"} +{"current_steps": 312, "total_steps": 5676, "loss": 0.8187745213508606, "lr": 1.999876266141924e-05, "epoch": 0.10995594713656388, "percentage": 5.5, "elapsed_time": "0:27:37", "remaining_time": "7:55:00"} +{"current_steps": 313, "total_steps": 5676, "loss": 0.8632344603538513, "lr": 1.9998669311470546e-05, "epoch": 0.11030837004405286, "percentage": 5.51, "elapsed_time": "0:27:42", "remaining_time": "7:54:48"} +{"current_steps": 314, "total_steps": 5676, "loss": 0.8789447546005249, "lr": 1.9998572567284787e-05, "epoch": 0.11066079295154185, "percentage": 5.53, "elapsed_time": "0:27:47", "remaining_time": "7:54:33"} +{"current_steps": 315, "total_steps": 5676, "loss": 0.7647864818572998, "lr": 1.999847242889481e-05, "epoch": 0.11101321585903083, "percentage": 5.55, "elapsed_time": "0:27:51", "remaining_time": "7:54:11"} +{"current_steps": 316, "total_steps": 5676, "loss": 0.872633695602417, "lr": 1.9998368896334606e-05, "epoch": 0.11136563876651982, "percentage": 5.57, "elapsed_time": "0:27:57", "remaining_time": "7:54:15"} +{"current_steps": 317, "total_steps": 5676, "loss": 0.8249840140342712, "lr": 1.9998261969639324e-05, "epoch": 0.11171806167400881, "percentage": 5.58, "elapsed_time": "0:28:03", "remaining_time": "7:54:12"} +{"current_steps": 318, "total_steps": 5676, "loss": 0.7558056116104126, "lr": 1.999815164884526e-05, "epoch": 0.11207048458149779, "percentage": 5.6, "elapsed_time": "0:28:09", "remaining_time": "7:54:29"} +{"current_steps": 319, "total_steps": 5676, "loss": 0.7447441220283508, "lr": 1.9998037933989866e-05, "epoch": 0.11242290748898678, "percentage": 5.62, "elapsed_time": "0:28:15", "remaining_time": "7:54:36"} +{"current_steps": 320, "total_steps": 5676, "loss": 0.8260442018508911, "lr": 1.9997920825111743e-05, "epoch": 0.11277533039647578, "percentage": 5.64, "elapsed_time": "0:28:20", "remaining_time": "7:54:17"} +{"current_steps": 321, "total_steps": 5676, "loss": 0.7916134595870972, "lr": 1.999780032225065e-05, "epoch": 0.11312775330396475, "percentage": 5.66, "elapsed_time": "0:28:25", "remaining_time": "7:54:05"} +{"current_steps": 322, "total_steps": 5676, "loss": 0.7460259199142456, "lr": 1.9997676425447486e-05, "epoch": 0.11348017621145375, "percentage": 5.67, "elapsed_time": "0:28:30", "remaining_time": "7:53:56"} +{"current_steps": 323, "total_steps": 5676, "loss": 0.9739946126937866, "lr": 1.9997549134744318e-05, "epoch": 0.11383259911894274, "percentage": 5.69, "elapsed_time": "0:28:36", "remaining_time": "7:54:01"} +{"current_steps": 324, "total_steps": 5676, "loss": 0.7242900133132935, "lr": 1.9997418450184352e-05, "epoch": 0.11418502202643172, "percentage": 5.71, "elapsed_time": "0:28:41", "remaining_time": "7:53:55"} +{"current_steps": 325, "total_steps": 5676, "loss": 0.7645323276519775, "lr": 1.9997284371811955e-05, "epoch": 0.1145374449339207, "percentage": 5.73, "elapsed_time": "0:28:47", "remaining_time": "7:53:59"} +{"current_steps": 326, "total_steps": 5676, "loss": 0.7377017736434937, "lr": 1.9997146899672638e-05, "epoch": 0.1148898678414097, "percentage": 5.74, "elapsed_time": "0:28:52", "remaining_time": "7:53:55"} +{"current_steps": 327, "total_steps": 5676, "loss": 0.7117934226989746, "lr": 1.9997006033813076e-05, "epoch": 0.11524229074889868, "percentage": 5.76, "elapsed_time": "0:28:56", "remaining_time": "7:53:29"} +{"current_steps": 328, "total_steps": 5676, "loss": 0.8517680168151855, "lr": 1.999686177428108e-05, "epoch": 0.11559471365638767, "percentage": 5.78, "elapsed_time": "0:29:02", "remaining_time": "7:53:27"} +{"current_steps": 329, "total_steps": 5676, "loss": 0.7099400758743286, "lr": 1.9996714121125626e-05, "epoch": 0.11594713656387665, "percentage": 5.8, "elapsed_time": "0:29:07", "remaining_time": "7:53:18"} +{"current_steps": 330, "total_steps": 5676, "loss": 0.8581711053848267, "lr": 1.9996563074396838e-05, "epoch": 0.11629955947136564, "percentage": 5.81, "elapsed_time": "0:29:13", "remaining_time": "7:53:32"} +{"current_steps": 331, "total_steps": 5676, "loss": 0.7841953635215759, "lr": 1.9996408634145994e-05, "epoch": 0.11665198237885463, "percentage": 5.83, "elapsed_time": "0:29:19", "remaining_time": "7:53:32"} +{"current_steps": 332, "total_steps": 5676, "loss": 0.7376754879951477, "lr": 1.9996250800425515e-05, "epoch": 0.11700440528634361, "percentage": 5.85, "elapsed_time": "0:29:26", "remaining_time": "7:53:54"} +{"current_steps": 333, "total_steps": 5676, "loss": 0.8934558033943176, "lr": 1.9996089573288985e-05, "epoch": 0.1173568281938326, "percentage": 5.87, "elapsed_time": "0:29:32", "remaining_time": "7:54:07"} +{"current_steps": 334, "total_steps": 5676, "loss": 0.7870250940322876, "lr": 1.999592495279113e-05, "epoch": 0.11770925110132159, "percentage": 5.88, "elapsed_time": "0:29:37", "remaining_time": "7:53:51"} +{"current_steps": 335, "total_steps": 5676, "loss": 0.7026203274726868, "lr": 1.9995756938987846e-05, "epoch": 0.11806167400881057, "percentage": 5.9, "elapsed_time": "0:29:41", "remaining_time": "7:53:26"} +{"current_steps": 336, "total_steps": 5676, "loss": 1.0066381692886353, "lr": 1.999558553193616e-05, "epoch": 0.11841409691629956, "percentage": 5.92, "elapsed_time": "0:29:46", "remaining_time": "7:53:19"} +{"current_steps": 337, "total_steps": 5676, "loss": 0.7860246896743774, "lr": 1.9995410731694255e-05, "epoch": 0.11876651982378855, "percentage": 5.94, "elapsed_time": "0:29:51", "remaining_time": "7:53:03"} +{"current_steps": 338, "total_steps": 5676, "loss": 0.8142588138580322, "lr": 1.999523253832148e-05, "epoch": 0.11911894273127753, "percentage": 5.95, "elapsed_time": "0:29:58", "remaining_time": "7:53:22"} +{"current_steps": 339, "total_steps": 5676, "loss": 0.9737639427185059, "lr": 1.9995050951878317e-05, "epoch": 0.11947136563876652, "percentage": 5.97, "elapsed_time": "0:30:03", "remaining_time": "7:53:16"} +{"current_steps": 340, "total_steps": 5676, "loss": 0.6165765523910522, "lr": 1.999486597242642e-05, "epoch": 0.1198237885462555, "percentage": 5.99, "elapsed_time": "0:30:09", "remaining_time": "7:53:18"} +{"current_steps": 341, "total_steps": 5676, "loss": 0.8553996086120605, "lr": 1.999467760002857e-05, "epoch": 0.12017621145374449, "percentage": 6.01, "elapsed_time": "0:30:14", "remaining_time": "7:53:10"} +{"current_steps": 342, "total_steps": 5676, "loss": 0.9291022419929504, "lr": 1.9994485834748725e-05, "epoch": 0.12052863436123348, "percentage": 6.03, "elapsed_time": "0:30:19", "remaining_time": "7:53:02"} +{"current_steps": 343, "total_steps": 5676, "loss": 0.8309136629104614, "lr": 1.9994290676651977e-05, "epoch": 0.12088105726872246, "percentage": 6.04, "elapsed_time": "0:30:25", "remaining_time": "7:52:58"} +{"current_steps": 344, "total_steps": 5676, "loss": 0.6963932514190674, "lr": 1.999409212580458e-05, "epoch": 0.12123348017621145, "percentage": 6.06, "elapsed_time": "0:30:29", "remaining_time": "7:52:43"} +{"current_steps": 345, "total_steps": 5676, "loss": 0.8220632076263428, "lr": 1.9993890182273932e-05, "epoch": 0.12158590308370044, "percentage": 6.08, "elapsed_time": "0:30:35", "remaining_time": "7:52:48"} +{"current_steps": 346, "total_steps": 5676, "loss": 0.8407794237136841, "lr": 1.9993684846128588e-05, "epoch": 0.12193832599118942, "percentage": 6.1, "elapsed_time": "0:30:41", "remaining_time": "7:52:41"} +{"current_steps": 347, "total_steps": 5676, "loss": 0.795718789100647, "lr": 1.9993476117438257e-05, "epoch": 0.12229074889867841, "percentage": 6.11, "elapsed_time": "0:30:45", "remaining_time": "7:52:15"} +{"current_steps": 348, "total_steps": 5676, "loss": 0.7482223510742188, "lr": 1.9993263996273792e-05, "epoch": 0.1226431718061674, "percentage": 6.13, "elapsed_time": "0:30:51", "remaining_time": "7:52:19"} +{"current_steps": 349, "total_steps": 5676, "loss": 0.814468264579773, "lr": 1.99930484827072e-05, "epoch": 0.12299559471365638, "percentage": 6.15, "elapsed_time": "0:30:56", "remaining_time": "7:52:17"} +{"current_steps": 350, "total_steps": 5676, "loss": 0.8105748891830444, "lr": 1.9992829576811648e-05, "epoch": 0.12334801762114538, "percentage": 6.17, "elapsed_time": "0:31:00", "remaining_time": "7:51:53"} +{"current_steps": 351, "total_steps": 5676, "loss": 0.8756073713302612, "lr": 1.9992607278661437e-05, "epoch": 0.12370044052863437, "percentage": 6.18, "elapsed_time": "0:31:05", "remaining_time": "7:51:42"} +{"current_steps": 352, "total_steps": 5676, "loss": 0.8643946647644043, "lr": 1.9992381588332043e-05, "epoch": 0.12405286343612335, "percentage": 6.2, "elapsed_time": "0:31:10", "remaining_time": "7:51:34"} +{"current_steps": 353, "total_steps": 5676, "loss": 0.7691172361373901, "lr": 1.9992152505900067e-05, "epoch": 0.12440528634361234, "percentage": 6.22, "elapsed_time": "0:31:16", "remaining_time": "7:51:38"} +{"current_steps": 354, "total_steps": 5676, "loss": 0.716686487197876, "lr": 1.9991920031443288e-05, "epoch": 0.12475770925110131, "percentage": 6.24, "elapsed_time": "0:31:23", "remaining_time": "7:51:57"} +{"current_steps": 355, "total_steps": 5676, "loss": 0.697482705116272, "lr": 1.9991684165040616e-05, "epoch": 0.12511013215859032, "percentage": 6.25, "elapsed_time": "0:31:28", "remaining_time": "7:51:45"} +{"current_steps": 356, "total_steps": 5676, "loss": 0.8039460182189941, "lr": 1.999144490677212e-05, "epoch": 0.12546255506607928, "percentage": 6.27, "elapsed_time": "0:31:33", "remaining_time": "7:51:43"} +{"current_steps": 357, "total_steps": 5676, "loss": 0.872138261795044, "lr": 1.9991202256719032e-05, "epoch": 0.12581497797356828, "percentage": 6.29, "elapsed_time": "0:31:38", "remaining_time": "7:51:28"} +{"current_steps": 358, "total_steps": 5676, "loss": 0.8659502267837524, "lr": 1.999095621496371e-05, "epoch": 0.12616740088105727, "percentage": 6.31, "elapsed_time": "0:31:43", "remaining_time": "7:51:16"} +{"current_steps": 359, "total_steps": 5676, "loss": 0.7585660219192505, "lr": 1.9990706781589682e-05, "epoch": 0.12651982378854626, "percentage": 6.32, "elapsed_time": "0:31:49", "remaining_time": "7:51:15"} +{"current_steps": 360, "total_steps": 5676, "loss": 0.86381995677948, "lr": 1.9990453956681626e-05, "epoch": 0.12687224669603525, "percentage": 6.34, "elapsed_time": "0:31:54", "remaining_time": "7:51:05"} +{"current_steps": 361, "total_steps": 5676, "loss": 0.7623461484909058, "lr": 1.9990197740325365e-05, "epoch": 0.12722466960352422, "percentage": 6.36, "elapsed_time": "0:31:59", "remaining_time": "7:51:02"} +{"current_steps": 362, "total_steps": 5676, "loss": 0.8262917995452881, "lr": 1.9989938132607877e-05, "epoch": 0.1275770925110132, "percentage": 6.38, "elapsed_time": "0:32:03", "remaining_time": "7:50:40"} +{"current_steps": 363, "total_steps": 5676, "loss": 0.7879630327224731, "lr": 1.9989675133617294e-05, "epoch": 0.1279295154185022, "percentage": 6.4, "elapsed_time": "0:32:09", "remaining_time": "7:50:44"} +{"current_steps": 364, "total_steps": 5676, "loss": 0.8282565474510193, "lr": 1.9989408743442892e-05, "epoch": 0.1282819383259912, "percentage": 6.41, "elapsed_time": "0:32:14", "remaining_time": "7:50:31"} +{"current_steps": 365, "total_steps": 5676, "loss": 0.8358104228973389, "lr": 1.9989138962175105e-05, "epoch": 0.12863436123348018, "percentage": 6.43, "elapsed_time": "0:32:19", "remaining_time": "7:50:26"} +{"current_steps": 366, "total_steps": 5676, "loss": 0.9111027121543884, "lr": 1.9988865789905513e-05, "epoch": 0.12898678414096917, "percentage": 6.45, "elapsed_time": "0:32:25", "remaining_time": "7:50:20"} +{"current_steps": 367, "total_steps": 5676, "loss": 0.766915500164032, "lr": 1.9988589226726847e-05, "epoch": 0.12933920704845814, "percentage": 6.47, "elapsed_time": "0:32:30", "remaining_time": "7:50:18"} +{"current_steps": 368, "total_steps": 5676, "loss": 0.818048357963562, "lr": 1.9988309272733e-05, "epoch": 0.12969162995594713, "percentage": 6.48, "elapsed_time": "0:32:34", "remaining_time": "7:49:53"} +{"current_steps": 369, "total_steps": 5676, "loss": 0.8188307285308838, "lr": 1.9988025928019e-05, "epoch": 0.13004405286343612, "percentage": 6.5, "elapsed_time": "0:32:38", "remaining_time": "7:49:33"} +{"current_steps": 370, "total_steps": 5676, "loss": 0.88718181848526, "lr": 1.998773919268104e-05, "epoch": 0.1303964757709251, "percentage": 6.52, "elapsed_time": "0:32:43", "remaining_time": "7:49:22"} +{"current_steps": 371, "total_steps": 5676, "loss": 0.9173898696899414, "lr": 1.998744906681645e-05, "epoch": 0.1307488986784141, "percentage": 6.54, "elapsed_time": "0:32:48", "remaining_time": "7:49:12"} +{"current_steps": 372, "total_steps": 5676, "loss": 0.8025110960006714, "lr": 1.9987155550523725e-05, "epoch": 0.1311013215859031, "percentage": 6.55, "elapsed_time": "0:32:53", "remaining_time": "7:48:52"} +{"current_steps": 373, "total_steps": 5676, "loss": 0.8931341767311096, "lr": 1.9986858643902502e-05, "epoch": 0.13145374449339206, "percentage": 6.57, "elapsed_time": "0:32:58", "remaining_time": "7:48:42"} +{"current_steps": 374, "total_steps": 5676, "loss": 0.8813796043395996, "lr": 1.9986558347053574e-05, "epoch": 0.13180616740088105, "percentage": 6.59, "elapsed_time": "0:33:04", "remaining_time": "7:48:46"} +{"current_steps": 375, "total_steps": 5676, "loss": 0.8021976947784424, "lr": 1.9986254660078877e-05, "epoch": 0.13215859030837004, "percentage": 6.61, "elapsed_time": "0:33:10", "remaining_time": "7:48:53"} +{"current_steps": 376, "total_steps": 5676, "loss": 0.8083860874176025, "lr": 1.9985947583081506e-05, "epoch": 0.13251101321585904, "percentage": 6.62, "elapsed_time": "0:33:15", "remaining_time": "7:48:48"} +{"current_steps": 377, "total_steps": 5676, "loss": 0.7639983296394348, "lr": 1.9985637116165705e-05, "epoch": 0.13286343612334803, "percentage": 6.64, "elapsed_time": "0:33:20", "remaining_time": "7:48:41"} +{"current_steps": 378, "total_steps": 5676, "loss": 0.7775800228118896, "lr": 1.9985323259436874e-05, "epoch": 0.133215859030837, "percentage": 6.66, "elapsed_time": "0:33:25", "remaining_time": "7:48:28"} +{"current_steps": 379, "total_steps": 5676, "loss": 0.8892228603363037, "lr": 1.9985006013001545e-05, "epoch": 0.13356828193832598, "percentage": 6.68, "elapsed_time": "0:33:31", "remaining_time": "7:48:37"} +{"current_steps": 380, "total_steps": 5676, "loss": 0.9158765077590942, "lr": 1.998468537696742e-05, "epoch": 0.13392070484581498, "percentage": 6.69, "elapsed_time": "0:33:38", "remaining_time": "7:48:47"} +{"current_steps": 381, "total_steps": 5676, "loss": 0.7523722648620605, "lr": 1.9984361351443343e-05, "epoch": 0.13427312775330397, "percentage": 6.71, "elapsed_time": "0:33:43", "remaining_time": "7:48:41"} +{"current_steps": 382, "total_steps": 5676, "loss": 0.8052740693092346, "lr": 1.998403393653932e-05, "epoch": 0.13462555066079296, "percentage": 6.73, "elapsed_time": "0:33:49", "remaining_time": "7:48:43"} +{"current_steps": 383, "total_steps": 5676, "loss": 0.8271476626396179, "lr": 1.9983703132366484e-05, "epoch": 0.13497797356828195, "percentage": 6.75, "elapsed_time": "0:33:55", "remaining_time": "7:48:44"} +{"current_steps": 384, "total_steps": 5676, "loss": 0.8904454112052917, "lr": 1.998336893903714e-05, "epoch": 0.13533039647577091, "percentage": 6.77, "elapsed_time": "0:33:59", "remaining_time": "7:48:29"} +{"current_steps": 385, "total_steps": 5676, "loss": 0.8705847263336182, "lr": 1.9983031356664733e-05, "epoch": 0.1356828193832599, "percentage": 6.78, "elapsed_time": "0:34:05", "remaining_time": "7:48:24"} +{"current_steps": 386, "total_steps": 5676, "loss": 0.8269569873809814, "lr": 1.9982690385363867e-05, "epoch": 0.1360352422907489, "percentage": 6.8, "elapsed_time": "0:34:11", "remaining_time": "7:48:30"} +{"current_steps": 387, "total_steps": 5676, "loss": 0.9796818494796753, "lr": 1.998234602525029e-05, "epoch": 0.1363876651982379, "percentage": 6.82, "elapsed_time": "0:34:16", "remaining_time": "7:48:32"} +{"current_steps": 388, "total_steps": 5676, "loss": 0.8276596665382385, "lr": 1.9981998276440892e-05, "epoch": 0.13674008810572688, "percentage": 6.84, "elapsed_time": "0:34:22", "remaining_time": "7:48:30"} +{"current_steps": 389, "total_steps": 5676, "loss": 0.8739231824874878, "lr": 1.9981647139053737e-05, "epoch": 0.13709251101321585, "percentage": 6.85, "elapsed_time": "0:34:27", "remaining_time": "7:48:16"} +{"current_steps": 390, "total_steps": 5676, "loss": 0.677521824836731, "lr": 1.9981292613208018e-05, "epoch": 0.13744493392070484, "percentage": 6.87, "elapsed_time": "0:34:31", "remaining_time": "7:48:02"} +{"current_steps": 391, "total_steps": 5676, "loss": 0.744938313961029, "lr": 1.9980934699024084e-05, "epoch": 0.13779735682819383, "percentage": 6.89, "elapsed_time": "0:34:36", "remaining_time": "7:47:51"} +{"current_steps": 392, "total_steps": 5676, "loss": 0.8367065787315369, "lr": 1.998057339662344e-05, "epoch": 0.13814977973568282, "percentage": 6.91, "elapsed_time": "0:34:41", "remaining_time": "7:47:31"} +{"current_steps": 393, "total_steps": 5676, "loss": 0.775547981262207, "lr": 1.9980208706128733e-05, "epoch": 0.1385022026431718, "percentage": 6.92, "elapsed_time": "0:34:46", "remaining_time": "7:47:29"} +{"current_steps": 394, "total_steps": 5676, "loss": 0.8287982940673828, "lr": 1.9979840627663764e-05, "epoch": 0.1388546255506608, "percentage": 6.94, "elapsed_time": "0:34:51", "remaining_time": "7:47:15"} +{"current_steps": 395, "total_steps": 5676, "loss": 0.7582247257232666, "lr": 1.997946916135349e-05, "epoch": 0.13920704845814977, "percentage": 6.96, "elapsed_time": "0:34:55", "remaining_time": "7:46:59"} +{"current_steps": 396, "total_steps": 5676, "loss": 0.968267560005188, "lr": 1.997909430732401e-05, "epoch": 0.13955947136563876, "percentage": 6.98, "elapsed_time": "0:35:02", "remaining_time": "7:47:10"} +{"current_steps": 397, "total_steps": 5676, "loss": 0.8850257396697998, "lr": 1.9978716065702566e-05, "epoch": 0.13991189427312775, "percentage": 6.99, "elapsed_time": "0:35:07", "remaining_time": "7:47:02"} +{"current_steps": 398, "total_steps": 5676, "loss": 0.7206246852874756, "lr": 1.9978334436617574e-05, "epoch": 0.14026431718061674, "percentage": 7.01, "elapsed_time": "0:35:13", "remaining_time": "7:47:11"} +{"current_steps": 399, "total_steps": 5676, "loss": 0.7833065986633301, "lr": 1.9977949420198576e-05, "epoch": 0.14061674008810573, "percentage": 7.03, "elapsed_time": "0:35:18", "remaining_time": "7:46:59"} +{"current_steps": 400, "total_steps": 5676, "loss": 0.7199673652648926, "lr": 1.9977561016576275e-05, "epoch": 0.14096916299559473, "percentage": 7.05, "elapsed_time": "0:35:24", "remaining_time": "7:46:57"} +{"current_steps": 401, "total_steps": 5676, "loss": 0.7544811367988586, "lr": 1.9977169225882522e-05, "epoch": 0.1413215859030837, "percentage": 7.06, "elapsed_time": "0:35:32", "remaining_time": "7:47:27"} +{"current_steps": 402, "total_steps": 5676, "loss": 0.7528219819068909, "lr": 1.9976774048250317e-05, "epoch": 0.14167400881057268, "percentage": 7.08, "elapsed_time": "0:35:37", "remaining_time": "7:47:17"} +{"current_steps": 403, "total_steps": 5676, "loss": 0.8025565147399902, "lr": 1.9976375483813814e-05, "epoch": 0.14202643171806167, "percentage": 7.1, "elapsed_time": "0:35:43", "remaining_time": "7:47:23"} +{"current_steps": 404, "total_steps": 5676, "loss": 0.6553962230682373, "lr": 1.997597353270831e-05, "epoch": 0.14237885462555067, "percentage": 7.12, "elapsed_time": "0:35:49", "remaining_time": "7:47:23"} +{"current_steps": 405, "total_steps": 5676, "loss": 0.7070015072822571, "lr": 1.9975568195070253e-05, "epoch": 0.14273127753303966, "percentage": 7.14, "elapsed_time": "0:35:54", "remaining_time": "7:47:20"} +{"current_steps": 406, "total_steps": 5676, "loss": 0.7454725503921509, "lr": 1.9975159471037247e-05, "epoch": 0.14308370044052862, "percentage": 7.15, "elapsed_time": "0:35:59", "remaining_time": "7:47:14"} +{"current_steps": 407, "total_steps": 5676, "loss": 0.7074518799781799, "lr": 1.9974747360748038e-05, "epoch": 0.1434361233480176, "percentage": 7.17, "elapsed_time": "0:36:04", "remaining_time": "7:47:06"} +{"current_steps": 408, "total_steps": 5676, "loss": 0.6870182752609253, "lr": 1.9974331864342527e-05, "epoch": 0.1437885462555066, "percentage": 7.19, "elapsed_time": "0:36:10", "remaining_time": "7:46:59"} +{"current_steps": 409, "total_steps": 5676, "loss": 0.826898455619812, "lr": 1.9973912981961763e-05, "epoch": 0.1441409691629956, "percentage": 7.21, "elapsed_time": "0:36:14", "remaining_time": "7:46:48"} +{"current_steps": 410, "total_steps": 5676, "loss": 0.7244436740875244, "lr": 1.997349071374794e-05, "epoch": 0.1444933920704846, "percentage": 7.22, "elapsed_time": "0:36:20", "remaining_time": "7:46:49"} +{"current_steps": 411, "total_steps": 5676, "loss": 0.6885448694229126, "lr": 1.9973065059844404e-05, "epoch": 0.14484581497797358, "percentage": 7.24, "elapsed_time": "0:36:26", "remaining_time": "7:46:53"} +{"current_steps": 412, "total_steps": 5676, "loss": 0.8477644920349121, "lr": 1.9972636020395653e-05, "epoch": 0.14519823788546254, "percentage": 7.26, "elapsed_time": "0:36:33", "remaining_time": "7:47:02"} +{"current_steps": 413, "total_steps": 5676, "loss": 0.9432111382484436, "lr": 1.9972203595547334e-05, "epoch": 0.14555066079295154, "percentage": 7.28, "elapsed_time": "0:36:38", "remaining_time": "7:46:53"} +{"current_steps": 414, "total_steps": 5676, "loss": 1.0101501941680908, "lr": 1.9971767785446243e-05, "epoch": 0.14590308370044053, "percentage": 7.29, "elapsed_time": "0:36:44", "remaining_time": "7:46:53"} +{"current_steps": 415, "total_steps": 5676, "loss": 0.8174984455108643, "lr": 1.997132859024032e-05, "epoch": 0.14625550660792952, "percentage": 7.31, "elapsed_time": "0:36:49", "remaining_time": "7:46:49"} +{"current_steps": 416, "total_steps": 5676, "loss": 0.6857198476791382, "lr": 1.997088601007866e-05, "epoch": 0.1466079295154185, "percentage": 7.33, "elapsed_time": "0:36:54", "remaining_time": "7:46:41"} +{"current_steps": 417, "total_steps": 5676, "loss": 0.7742792367935181, "lr": 1.9970440045111505e-05, "epoch": 0.14696035242290748, "percentage": 7.35, "elapsed_time": "0:37:00", "remaining_time": "7:46:41"} +{"current_steps": 418, "total_steps": 5676, "loss": 0.7489269971847534, "lr": 1.996999069549025e-05, "epoch": 0.14731277533039647, "percentage": 7.36, "elapsed_time": "0:37:05", "remaining_time": "7:46:34"} +{"current_steps": 419, "total_steps": 5676, "loss": 0.7362021207809448, "lr": 1.9969537961367423e-05, "epoch": 0.14766519823788546, "percentage": 7.38, "elapsed_time": "0:37:09", "remaining_time": "7:46:14"} +{"current_steps": 420, "total_steps": 5676, "loss": 0.7596213221549988, "lr": 1.996908184289673e-05, "epoch": 0.14801762114537445, "percentage": 7.4, "elapsed_time": "0:37:13", "remaining_time": "7:45:54"} +{"current_steps": 421, "total_steps": 5676, "loss": 0.7739163637161255, "lr": 1.9968622340232993e-05, "epoch": 0.14837004405286344, "percentage": 7.42, "elapsed_time": "0:37:17", "remaining_time": "7:45:32"} +{"current_steps": 422, "total_steps": 5676, "loss": 0.9059790372848511, "lr": 1.9968159453532215e-05, "epoch": 0.14872246696035243, "percentage": 7.43, "elapsed_time": "0:37:23", "remaining_time": "7:45:32"} +{"current_steps": 423, "total_steps": 5676, "loss": 0.7298871278762817, "lr": 1.9967693182951516e-05, "epoch": 0.1490748898678414, "percentage": 7.45, "elapsed_time": "0:37:27", "remaining_time": "7:45:13"} +{"current_steps": 424, "total_steps": 5676, "loss": 0.7218194007873535, "lr": 1.9967223528649194e-05, "epoch": 0.1494273127753304, "percentage": 7.47, "elapsed_time": "0:37:32", "remaining_time": "7:45:06"} +{"current_steps": 425, "total_steps": 5676, "loss": 0.8031259179115295, "lr": 1.996675049078467e-05, "epoch": 0.14977973568281938, "percentage": 7.49, "elapsed_time": "0:37:37", "remaining_time": "7:44:55"} +{"current_steps": 426, "total_steps": 5676, "loss": 0.8583194613456726, "lr": 1.9966274069518533e-05, "epoch": 0.15013215859030837, "percentage": 7.51, "elapsed_time": "0:37:42", "remaining_time": "7:44:42"} +{"current_steps": 427, "total_steps": 5676, "loss": 0.7829155921936035, "lr": 1.9965794265012514e-05, "epoch": 0.15048458149779737, "percentage": 7.52, "elapsed_time": "0:37:48", "remaining_time": "7:44:42"} +{"current_steps": 428, "total_steps": 5676, "loss": 0.709203839302063, "lr": 1.9965311077429484e-05, "epoch": 0.15083700440528636, "percentage": 7.54, "elapsed_time": "0:37:53", "remaining_time": "7:44:39"} +{"current_steps": 429, "total_steps": 5676, "loss": 0.7515710592269897, "lr": 1.996482450693348e-05, "epoch": 0.15118942731277532, "percentage": 7.56, "elapsed_time": "0:38:00", "remaining_time": "7:44:49"} +{"current_steps": 430, "total_steps": 5676, "loss": 0.8552615642547607, "lr": 1.9964334553689674e-05, "epoch": 0.1515418502202643, "percentage": 7.58, "elapsed_time": "0:38:04", "remaining_time": "7:44:28"} +{"current_steps": 431, "total_steps": 5676, "loss": 0.7946224808692932, "lr": 1.9963841217864385e-05, "epoch": 0.1518942731277533, "percentage": 7.59, "elapsed_time": "0:38:10", "remaining_time": "7:44:31"} +{"current_steps": 432, "total_steps": 5676, "loss": 0.7117756605148315, "lr": 1.9963344499625087e-05, "epoch": 0.1522466960352423, "percentage": 7.61, "elapsed_time": "0:38:15", "remaining_time": "7:44:22"} +{"current_steps": 433, "total_steps": 5676, "loss": 0.8892849683761597, "lr": 1.9962844399140405e-05, "epoch": 0.1525991189427313, "percentage": 7.63, "elapsed_time": "0:38:18", "remaining_time": "7:43:55"} +{"current_steps": 434, "total_steps": 5676, "loss": 0.9037783145904541, "lr": 1.9962340916580105e-05, "epoch": 0.15295154185022025, "percentage": 7.65, "elapsed_time": "0:38:23", "remaining_time": "7:43:38"} +{"current_steps": 435, "total_steps": 5676, "loss": 0.7419179677963257, "lr": 1.9961834052115104e-05, "epoch": 0.15330396475770924, "percentage": 7.66, "elapsed_time": "0:38:27", "remaining_time": "7:43:22"} +{"current_steps": 436, "total_steps": 5676, "loss": 0.847285270690918, "lr": 1.9961323805917464e-05, "epoch": 0.15365638766519824, "percentage": 7.68, "elapsed_time": "0:38:33", "remaining_time": "7:43:27"} +{"current_steps": 437, "total_steps": 5676, "loss": 0.793263852596283, "lr": 1.99608101781604e-05, "epoch": 0.15400881057268723, "percentage": 7.7, "elapsed_time": "0:38:38", "remaining_time": "7:43:16"} +{"current_steps": 438, "total_steps": 5676, "loss": 0.6600923538208008, "lr": 1.9960293169018276e-05, "epoch": 0.15436123348017622, "percentage": 7.72, "elapsed_time": "0:38:43", "remaining_time": "7:43:10"} +{"current_steps": 439, "total_steps": 5676, "loss": 0.7642164826393127, "lr": 1.9959772778666592e-05, "epoch": 0.1547136563876652, "percentage": 7.73, "elapsed_time": "0:38:50", "remaining_time": "7:43:16"} +{"current_steps": 440, "total_steps": 5676, "loss": 0.897221565246582, "lr": 1.995924900728201e-05, "epoch": 0.15506607929515417, "percentage": 7.75, "elapsed_time": "0:38:55", "remaining_time": "7:43:13"} +{"current_steps": 441, "total_steps": 5676, "loss": 0.830953061580658, "lr": 1.9958721855042338e-05, "epoch": 0.15541850220264317, "percentage": 7.77, "elapsed_time": "0:39:00", "remaining_time": "7:43:04"} +{"current_steps": 442, "total_steps": 5676, "loss": 0.7514863014221191, "lr": 1.995819132212652e-05, "epoch": 0.15577092511013216, "percentage": 7.79, "elapsed_time": "0:39:06", "remaining_time": "7:43:07"} +{"current_steps": 443, "total_steps": 5676, "loss": 0.7039257287979126, "lr": 1.995765740871466e-05, "epoch": 0.15612334801762115, "percentage": 7.8, "elapsed_time": "0:39:11", "remaining_time": "7:42:53"} +{"current_steps": 444, "total_steps": 5676, "loss": 0.810503363609314, "lr": 1.9957120114988e-05, "epoch": 0.15647577092511014, "percentage": 7.82, "elapsed_time": "0:39:14", "remaining_time": "7:42:28"} +{"current_steps": 445, "total_steps": 5676, "loss": 0.616968035697937, "lr": 1.9956579441128942e-05, "epoch": 0.1568281938325991, "percentage": 7.84, "elapsed_time": "0:39:21", "remaining_time": "7:42:43"} +{"current_steps": 446, "total_steps": 5676, "loss": 0.7008740901947021, "lr": 1.9956035387321024e-05, "epoch": 0.1571806167400881, "percentage": 7.86, "elapsed_time": "0:39:28", "remaining_time": "7:42:55"} +{"current_steps": 447, "total_steps": 5676, "loss": 0.847025454044342, "lr": 1.995548795374893e-05, "epoch": 0.1575330396475771, "percentage": 7.88, "elapsed_time": "0:39:34", "remaining_time": "7:42:54"} +{"current_steps": 448, "total_steps": 5676, "loss": 0.7788053750991821, "lr": 1.9954937140598506e-05, "epoch": 0.15788546255506608, "percentage": 7.89, "elapsed_time": "0:39:39", "remaining_time": "7:42:49"} +{"current_steps": 449, "total_steps": 5676, "loss": 0.7592896819114685, "lr": 1.9954382948056735e-05, "epoch": 0.15823788546255507, "percentage": 7.91, "elapsed_time": "0:39:45", "remaining_time": "7:42:45"} +{"current_steps": 450, "total_steps": 5676, "loss": 0.9458491802215576, "lr": 1.995382537631174e-05, "epoch": 0.15859030837004406, "percentage": 7.93, "elapsed_time": "0:39:49", "remaining_time": "7:42:29"} +{"current_steps": 451, "total_steps": 5676, "loss": 0.8069632053375244, "lr": 1.9953264425552804e-05, "epoch": 0.15894273127753303, "percentage": 7.95, "elapsed_time": "0:39:55", "remaining_time": "7:42:31"} +{"current_steps": 452, "total_steps": 5676, "loss": 0.7876379489898682, "lr": 1.9952700095970357e-05, "epoch": 0.15929515418502202, "percentage": 7.96, "elapsed_time": "0:40:00", "remaining_time": "7:42:20"} +{"current_steps": 453, "total_steps": 5676, "loss": 0.796333909034729, "lr": 1.9952132387755965e-05, "epoch": 0.159647577092511, "percentage": 7.98, "elapsed_time": "0:40:05", "remaining_time": "7:42:09"} +{"current_steps": 454, "total_steps": 5676, "loss": 0.7171634435653687, "lr": 1.9951561301102348e-05, "epoch": 0.16, "percentage": 8.0, "elapsed_time": "0:40:11", "remaining_time": "7:42:13"} +{"current_steps": 455, "total_steps": 5676, "loss": 0.8312792778015137, "lr": 1.9950986836203374e-05, "epoch": 0.160352422907489, "percentage": 8.02, "elapsed_time": "0:40:16", "remaining_time": "7:42:09"} +{"current_steps": 456, "total_steps": 5676, "loss": 0.7496857643127441, "lr": 1.995040899325406e-05, "epoch": 0.160704845814978, "percentage": 8.03, "elapsed_time": "0:40:22", "remaining_time": "7:42:05"} +{"current_steps": 457, "total_steps": 5676, "loss": 0.89504075050354, "lr": 1.9949827772450555e-05, "epoch": 0.16105726872246695, "percentage": 8.05, "elapsed_time": "0:40:26", "remaining_time": "7:41:55"} +{"current_steps": 458, "total_steps": 5676, "loss": 0.7580761313438416, "lr": 1.9949243173990172e-05, "epoch": 0.16140969162995594, "percentage": 8.07, "elapsed_time": "0:40:31", "remaining_time": "7:41:45"} +{"current_steps": 459, "total_steps": 5676, "loss": 0.7826676368713379, "lr": 1.9948655198071365e-05, "epoch": 0.16176211453744493, "percentage": 8.09, "elapsed_time": "0:40:36", "remaining_time": "7:41:35"} +{"current_steps": 460, "total_steps": 5676, "loss": 0.7591372728347778, "lr": 1.9948063844893733e-05, "epoch": 0.16211453744493393, "percentage": 8.1, "elapsed_time": "0:40:40", "remaining_time": "7:41:17"} +{"current_steps": 461, "total_steps": 5676, "loss": 0.7366905808448792, "lr": 1.994746911465802e-05, "epoch": 0.16246696035242292, "percentage": 8.12, "elapsed_time": "0:40:46", "remaining_time": "7:41:13"} +{"current_steps": 462, "total_steps": 5676, "loss": 0.7152266502380371, "lr": 1.9946871007566116e-05, "epoch": 0.16281938325991188, "percentage": 8.14, "elapsed_time": "0:40:52", "remaining_time": "7:41:18"} +{"current_steps": 463, "total_steps": 5676, "loss": 0.8411930799484253, "lr": 1.994626952382107e-05, "epoch": 0.16317180616740087, "percentage": 8.16, "elapsed_time": "0:40:58", "remaining_time": "7:41:16"} +{"current_steps": 464, "total_steps": 5676, "loss": 0.6689857244491577, "lr": 1.9945664663627054e-05, "epoch": 0.16352422907488987, "percentage": 8.17, "elapsed_time": "0:41:02", "remaining_time": "7:41:02"} +{"current_steps": 465, "total_steps": 5676, "loss": 0.6474499106407166, "lr": 1.9945056427189408e-05, "epoch": 0.16387665198237886, "percentage": 8.19, "elapsed_time": "0:41:09", "remaining_time": "7:41:09"} +{"current_steps": 466, "total_steps": 5676, "loss": 0.7861372232437134, "lr": 1.9944444814714604e-05, "epoch": 0.16422907488986785, "percentage": 8.21, "elapsed_time": "0:41:13", "remaining_time": "7:40:58"} +{"current_steps": 467, "total_steps": 5676, "loss": 0.8301665186882019, "lr": 1.9943829826410273e-05, "epoch": 0.16458149779735684, "percentage": 8.23, "elapsed_time": "0:41:17", "remaining_time": "7:40:34"} +{"current_steps": 468, "total_steps": 5676, "loss": 0.661811888217926, "lr": 1.9943211462485176e-05, "epoch": 0.1649339207048458, "percentage": 8.25, "elapsed_time": "0:41:22", "remaining_time": "7:40:30"} +{"current_steps": 469, "total_steps": 5676, "loss": 0.7768537402153015, "lr": 1.9942589723149233e-05, "epoch": 0.1652863436123348, "percentage": 8.26, "elapsed_time": "0:41:28", "remaining_time": "7:40:25"} +{"current_steps": 470, "total_steps": 5676, "loss": 0.6139112710952759, "lr": 1.9941964608613503e-05, "epoch": 0.1656387665198238, "percentage": 8.28, "elapsed_time": "0:41:33", "remaining_time": "7:40:21"} +{"current_steps": 471, "total_steps": 5676, "loss": 0.8284693956375122, "lr": 1.9941336119090193e-05, "epoch": 0.16599118942731278, "percentage": 8.3, "elapsed_time": "0:41:38", "remaining_time": "7:40:07"} +{"current_steps": 472, "total_steps": 5676, "loss": 0.7281739711761475, "lr": 1.9940704254792655e-05, "epoch": 0.16634361233480177, "percentage": 8.32, "elapsed_time": "0:41:44", "remaining_time": "7:40:11"} +{"current_steps": 473, "total_steps": 5676, "loss": 0.687767744064331, "lr": 1.994006901593539e-05, "epoch": 0.16669603524229074, "percentage": 8.33, "elapsed_time": "0:41:49", "remaining_time": "7:39:59"} +{"current_steps": 474, "total_steps": 5676, "loss": 0.7553595304489136, "lr": 1.9939430402734046e-05, "epoch": 0.16704845814977973, "percentage": 8.35, "elapsed_time": "0:41:54", "remaining_time": "7:39:53"} +{"current_steps": 475, "total_steps": 5676, "loss": 0.9263294339179993, "lr": 1.99387884154054e-05, "epoch": 0.16740088105726872, "percentage": 8.37, "elapsed_time": "0:41:58", "remaining_time": "7:39:40"} +{"current_steps": 476, "total_steps": 5676, "loss": 0.7014337182044983, "lr": 1.9938143054167397e-05, "epoch": 0.1677533039647577, "percentage": 8.39, "elapsed_time": "0:42:02", "remaining_time": "7:39:19"} +{"current_steps": 477, "total_steps": 5676, "loss": 0.6454538106918335, "lr": 1.9937494319239112e-05, "epoch": 0.1681057268722467, "percentage": 8.4, "elapsed_time": "0:42:09", "remaining_time": "7:39:26"} +{"current_steps": 478, "total_steps": 5676, "loss": 0.7792352437973022, "lr": 1.9936842210840775e-05, "epoch": 0.1684581497797357, "percentage": 8.42, "elapsed_time": "0:42:13", "remaining_time": "7:39:15"} +{"current_steps": 479, "total_steps": 5676, "loss": 0.8773127794265747, "lr": 1.9936186729193753e-05, "epoch": 0.16881057268722466, "percentage": 8.44, "elapsed_time": "0:42:19", "remaining_time": "7:39:10"} +{"current_steps": 480, "total_steps": 5676, "loss": 0.892439603805542, "lr": 1.993552787452056e-05, "epoch": 0.16916299559471365, "percentage": 8.46, "elapsed_time": "0:42:24", "remaining_time": "7:39:01"} +{"current_steps": 481, "total_steps": 5676, "loss": 0.89835524559021, "lr": 1.993486564704486e-05, "epoch": 0.16951541850220264, "percentage": 8.47, "elapsed_time": "0:42:29", "remaining_time": "7:38:51"} +{"current_steps": 482, "total_steps": 5676, "loss": 0.8013701438903809, "lr": 1.9934200046991453e-05, "epoch": 0.16986784140969163, "percentage": 8.49, "elapsed_time": "0:42:35", "remaining_time": "7:38:53"} +{"current_steps": 483, "total_steps": 5676, "loss": 0.8086763620376587, "lr": 1.9933531074586296e-05, "epoch": 0.17022026431718063, "percentage": 8.51, "elapsed_time": "0:42:38", "remaining_time": "7:38:31"} +{"current_steps": 484, "total_steps": 5676, "loss": 0.7736518383026123, "lr": 1.9932858730056486e-05, "epoch": 0.17057268722466962, "percentage": 8.53, "elapsed_time": "0:42:45", "remaining_time": "7:38:41"} +{"current_steps": 485, "total_steps": 5676, "loss": 0.6247539520263672, "lr": 1.9932183013630257e-05, "epoch": 0.17092511013215858, "percentage": 8.54, "elapsed_time": "0:42:50", "remaining_time": "7:38:35"} +{"current_steps": 486, "total_steps": 5676, "loss": 0.7172006368637085, "lr": 1.9931503925536996e-05, "epoch": 0.17127753303964757, "percentage": 8.56, "elapsed_time": "0:42:54", "remaining_time": "7:38:16"} +{"current_steps": 487, "total_steps": 5676, "loss": 0.7854465246200562, "lr": 1.993082146600723e-05, "epoch": 0.17162995594713656, "percentage": 8.58, "elapsed_time": "0:42:59", "remaining_time": "7:38:06"} +{"current_steps": 488, "total_steps": 5676, "loss": 0.7018419504165649, "lr": 1.9930135635272637e-05, "epoch": 0.17198237885462556, "percentage": 8.6, "elapsed_time": "0:43:05", "remaining_time": "7:38:11"} +{"current_steps": 489, "total_steps": 5676, "loss": 0.783660352230072, "lr": 1.9929446433566033e-05, "epoch": 0.17233480176211455, "percentage": 8.62, "elapsed_time": "0:43:12", "remaining_time": "7:38:18"} +{"current_steps": 490, "total_steps": 5676, "loss": 1.0166207551956177, "lr": 1.992875386112138e-05, "epoch": 0.1726872246696035, "percentage": 8.63, "elapsed_time": "0:43:17", "remaining_time": "7:38:07"} +{"current_steps": 491, "total_steps": 5676, "loss": 0.7692895531654358, "lr": 1.9928057918173786e-05, "epoch": 0.1730396475770925, "percentage": 8.65, "elapsed_time": "0:43:22", "remaining_time": "7:38:06"} +{"current_steps": 492, "total_steps": 5676, "loss": 0.8005259037017822, "lr": 1.9927358604959503e-05, "epoch": 0.1733920704845815, "percentage": 8.67, "elapsed_time": "0:43:26", "remaining_time": "7:37:47"} +{"current_steps": 493, "total_steps": 5676, "loss": 0.6780292987823486, "lr": 1.9926655921715924e-05, "epoch": 0.1737444933920705, "percentage": 8.69, "elapsed_time": "0:43:33", "remaining_time": "7:37:51"} +{"current_steps": 494, "total_steps": 5676, "loss": 0.6501175165176392, "lr": 1.9925949868681587e-05, "epoch": 0.17409691629955948, "percentage": 8.7, "elapsed_time": "0:43:38", "remaining_time": "7:37:42"} +{"current_steps": 495, "total_steps": 5676, "loss": 0.781839907169342, "lr": 1.9925240446096176e-05, "epoch": 0.17444933920704847, "percentage": 8.72, "elapsed_time": "0:43:43", "remaining_time": "7:37:39"} +{"current_steps": 496, "total_steps": 5676, "loss": 0.7617994546890259, "lr": 1.992452765420052e-05, "epoch": 0.17480176211453743, "percentage": 8.74, "elapsed_time": "0:43:47", "remaining_time": "7:37:23"} +{"current_steps": 497, "total_steps": 5676, "loss": 0.8019097447395325, "lr": 1.992381149323659e-05, "epoch": 0.17515418502202643, "percentage": 8.76, "elapsed_time": "0:43:53", "remaining_time": "7:37:21"} +{"current_steps": 498, "total_steps": 5676, "loss": 0.7526847124099731, "lr": 1.9923091963447496e-05, "epoch": 0.17550660792951542, "percentage": 8.77, "elapsed_time": "0:43:59", "remaining_time": "7:37:22"} +{"current_steps": 499, "total_steps": 5676, "loss": 0.7101150751113892, "lr": 1.9922369065077497e-05, "epoch": 0.1758590308370044, "percentage": 8.79, "elapsed_time": "0:44:04", "remaining_time": "7:37:17"} +{"current_steps": 500, "total_steps": 5676, "loss": 0.8519806861877441, "lr": 1.9921642798372e-05, "epoch": 0.1762114537444934, "percentage": 8.81, "elapsed_time": "0:44:10", "remaining_time": "7:37:16"} +{"current_steps": 501, "total_steps": 5676, "loss": 0.774759829044342, "lr": 1.9920913163577542e-05, "epoch": 0.17656387665198237, "percentage": 8.83, "elapsed_time": "0:44:18", "remaining_time": "7:37:44"} +{"current_steps": 502, "total_steps": 5676, "loss": 0.8597595691680908, "lr": 1.992018016094182e-05, "epoch": 0.17691629955947136, "percentage": 8.84, "elapsed_time": "0:44:23", "remaining_time": "7:37:36"} +{"current_steps": 503, "total_steps": 5676, "loss": 0.7023826241493225, "lr": 1.9919443790713658e-05, "epoch": 0.17726872246696035, "percentage": 8.86, "elapsed_time": "0:44:29", "remaining_time": "7:37:32"} +{"current_steps": 504, "total_steps": 5676, "loss": 0.8290892839431763, "lr": 1.991870405314303e-05, "epoch": 0.17762114537444934, "percentage": 8.88, "elapsed_time": "0:44:34", "remaining_time": "7:37:23"} +{"current_steps": 505, "total_steps": 5676, "loss": 0.9240517020225525, "lr": 1.9917960948481062e-05, "epoch": 0.17797356828193833, "percentage": 8.9, "elapsed_time": "0:44:39", "remaining_time": "7:37:15"} +{"current_steps": 506, "total_steps": 5676, "loss": 0.8247153759002686, "lr": 1.9917214476980012e-05, "epoch": 0.17832599118942732, "percentage": 8.91, "elapsed_time": "0:44:44", "remaining_time": "7:37:11"} +{"current_steps": 507, "total_steps": 5676, "loss": 0.9101368188858032, "lr": 1.991646463889328e-05, "epoch": 0.1786784140969163, "percentage": 8.93, "elapsed_time": "0:44:49", "remaining_time": "7:37:01"} +{"current_steps": 508, "total_steps": 5676, "loss": 0.7688114643096924, "lr": 1.9915711434475416e-05, "epoch": 0.17903083700440528, "percentage": 8.95, "elapsed_time": "0:44:54", "remaining_time": "7:36:56"} +{"current_steps": 509, "total_steps": 5676, "loss": 0.820112943649292, "lr": 1.9914954863982106e-05, "epoch": 0.17938325991189427, "percentage": 8.97, "elapsed_time": "0:44:59", "remaining_time": "7:36:39"} +{"current_steps": 510, "total_steps": 5676, "loss": 0.6393542289733887, "lr": 1.9914194927670186e-05, "epoch": 0.17973568281938326, "percentage": 8.99, "elapsed_time": "0:45:04", "remaining_time": "7:36:39"} +{"current_steps": 511, "total_steps": 5676, "loss": 0.9463154673576355, "lr": 1.991343162579763e-05, "epoch": 0.18008810572687226, "percentage": 9.0, "elapsed_time": "0:45:11", "remaining_time": "7:36:47"} +{"current_steps": 512, "total_steps": 5676, "loss": 0.9498215913772583, "lr": 1.9912664958623556e-05, "epoch": 0.18044052863436125, "percentage": 9.02, "elapsed_time": "0:45:16", "remaining_time": "7:36:39"} +{"current_steps": 513, "total_steps": 5676, "loss": 0.7659052014350891, "lr": 1.991189492640822e-05, "epoch": 0.1807929515418502, "percentage": 9.04, "elapsed_time": "0:45:22", "remaining_time": "7:36:38"} +{"current_steps": 514, "total_steps": 5676, "loss": 0.9946317672729492, "lr": 1.9911121529413028e-05, "epoch": 0.1811453744493392, "percentage": 9.06, "elapsed_time": "0:45:28", "remaining_time": "7:36:41"} +{"current_steps": 515, "total_steps": 5676, "loss": 0.762086033821106, "lr": 1.991034476790052e-05, "epoch": 0.1814977973568282, "percentage": 9.07, "elapsed_time": "0:45:34", "remaining_time": "7:36:40"} +{"current_steps": 516, "total_steps": 5676, "loss": 0.7507720589637756, "lr": 1.990956464213438e-05, "epoch": 0.18185022026431719, "percentage": 9.09, "elapsed_time": "0:45:40", "remaining_time": "7:36:42"} +{"current_steps": 517, "total_steps": 5676, "loss": 0.7859716415405273, "lr": 1.990878115237945e-05, "epoch": 0.18220264317180618, "percentage": 9.11, "elapsed_time": "0:45:45", "remaining_time": "7:36:39"} +{"current_steps": 518, "total_steps": 5676, "loss": 0.8585234880447388, "lr": 1.9907994298901688e-05, "epoch": 0.18255506607929514, "percentage": 9.13, "elapsed_time": "0:45:50", "remaining_time": "7:36:32"} +{"current_steps": 519, "total_steps": 5676, "loss": 0.8569823503494263, "lr": 1.990720408196821e-05, "epoch": 0.18290748898678413, "percentage": 9.14, "elapsed_time": "0:45:57", "remaining_time": "7:36:36"} +{"current_steps": 520, "total_steps": 5676, "loss": 0.8297367095947266, "lr": 1.990641050184727e-05, "epoch": 0.18325991189427313, "percentage": 9.16, "elapsed_time": "0:46:03", "remaining_time": "7:36:39"} +{"current_steps": 521, "total_steps": 5676, "loss": 0.7918041348457336, "lr": 1.9905613558808262e-05, "epoch": 0.18361233480176212, "percentage": 9.18, "elapsed_time": "0:46:08", "remaining_time": "7:36:29"} +{"current_steps": 522, "total_steps": 5676, "loss": 0.8322931528091431, "lr": 1.9904813253121727e-05, "epoch": 0.1839647577092511, "percentage": 9.2, "elapsed_time": "0:46:13", "remaining_time": "7:36:24"} +{"current_steps": 523, "total_steps": 5676, "loss": 0.6822292804718018, "lr": 1.990400958505934e-05, "epoch": 0.1843171806167401, "percentage": 9.21, "elapsed_time": "0:46:18", "remaining_time": "7:36:14"} +{"current_steps": 524, "total_steps": 5676, "loss": 0.8989835977554321, "lr": 1.9903202554893925e-05, "epoch": 0.18466960352422906, "percentage": 9.23, "elapsed_time": "0:46:23", "remaining_time": "7:36:06"} +{"current_steps": 525, "total_steps": 5676, "loss": 0.671294093132019, "lr": 1.990239216289944e-05, "epoch": 0.18502202643171806, "percentage": 9.25, "elapsed_time": "0:46:27", "remaining_time": "7:35:48"} +{"current_steps": 526, "total_steps": 5676, "loss": 0.9045379161834717, "lr": 1.990157840935099e-05, "epoch": 0.18537444933920705, "percentage": 9.27, "elapsed_time": "0:46:32", "remaining_time": "7:35:39"} +{"current_steps": 527, "total_steps": 5676, "loss": 0.7117471694946289, "lr": 1.990076129452482e-05, "epoch": 0.18572687224669604, "percentage": 9.28, "elapsed_time": "0:46:38", "remaining_time": "7:35:37"} +{"current_steps": 528, "total_steps": 5676, "loss": 0.890752911567688, "lr": 1.9899940818698315e-05, "epoch": 0.18607929515418503, "percentage": 9.3, "elapsed_time": "0:46:42", "remaining_time": "7:35:28"} +{"current_steps": 529, "total_steps": 5676, "loss": 0.7209222316741943, "lr": 1.9899116982149994e-05, "epoch": 0.186431718061674, "percentage": 9.32, "elapsed_time": "0:46:48", "remaining_time": "7:35:22"} +{"current_steps": 530, "total_steps": 5676, "loss": 0.6912863254547119, "lr": 1.9898289785159534e-05, "epoch": 0.186784140969163, "percentage": 9.34, "elapsed_time": "0:46:52", "remaining_time": "7:35:03"} +{"current_steps": 531, "total_steps": 5676, "loss": 0.7060319185256958, "lr": 1.9897459228007736e-05, "epoch": 0.18713656387665198, "percentage": 9.36, "elapsed_time": "0:46:57", "remaining_time": "7:35:00"} +{"current_steps": 532, "total_steps": 5676, "loss": 0.6975364685058594, "lr": 1.9896625310976553e-05, "epoch": 0.18748898678414097, "percentage": 9.37, "elapsed_time": "0:47:02", "remaining_time": "7:34:51"} +{"current_steps": 533, "total_steps": 5676, "loss": 0.8576006293296814, "lr": 1.989578803434907e-05, "epoch": 0.18784140969162996, "percentage": 9.39, "elapsed_time": "0:47:07", "remaining_time": "7:34:40"} +{"current_steps": 534, "total_steps": 5676, "loss": 0.7182095646858215, "lr": 1.9894947398409516e-05, "epoch": 0.18819383259911895, "percentage": 9.41, "elapsed_time": "0:47:11", "remaining_time": "7:34:26"} +{"current_steps": 535, "total_steps": 5676, "loss": 0.7546031475067139, "lr": 1.9894103403443265e-05, "epoch": 0.18854625550660792, "percentage": 9.43, "elapsed_time": "0:47:16", "remaining_time": "7:34:15"} +{"current_steps": 536, "total_steps": 5676, "loss": 0.7083312273025513, "lr": 1.9893256049736824e-05, "epoch": 0.1888986784140969, "percentage": 9.44, "elapsed_time": "0:47:20", "remaining_time": "7:33:58"} +{"current_steps": 537, "total_steps": 5676, "loss": 0.5614915490150452, "lr": 1.9892405337577846e-05, "epoch": 0.1892511013215859, "percentage": 9.46, "elapsed_time": "0:47:26", "remaining_time": "7:34:01"} +{"current_steps": 538, "total_steps": 5676, "loss": 0.7647485733032227, "lr": 1.9891551267255114e-05, "epoch": 0.1896035242290749, "percentage": 9.48, "elapsed_time": "0:47:33", "remaining_time": "7:34:06"} +{"current_steps": 539, "total_steps": 5676, "loss": 0.776042103767395, "lr": 1.9890693839058566e-05, "epoch": 0.18995594713656389, "percentage": 9.5, "elapsed_time": "0:47:39", "remaining_time": "7:34:08"} +{"current_steps": 540, "total_steps": 5676, "loss": 0.7694810628890991, "lr": 1.9889833053279268e-05, "epoch": 0.19030837004405288, "percentage": 9.51, "elapsed_time": "0:47:45", "remaining_time": "7:34:10"} +{"current_steps": 541, "total_steps": 5676, "loss": 0.6935995817184448, "lr": 1.9888968910209433e-05, "epoch": 0.19066079295154184, "percentage": 9.53, "elapsed_time": "0:47:50", "remaining_time": "7:34:05"} +{"current_steps": 542, "total_steps": 5676, "loss": 0.7538039088249207, "lr": 1.988810141014241e-05, "epoch": 0.19101321585903083, "percentage": 9.55, "elapsed_time": "0:47:55", "remaining_time": "7:34:01"} +{"current_steps": 543, "total_steps": 5676, "loss": 0.8149158954620361, "lr": 1.9887230553372686e-05, "epoch": 0.19136563876651982, "percentage": 9.57, "elapsed_time": "0:48:00", "remaining_time": "7:33:48"} +{"current_steps": 544, "total_steps": 5676, "loss": 0.7776780128479004, "lr": 1.988635634019589e-05, "epoch": 0.19171806167400882, "percentage": 9.58, "elapsed_time": "0:48:04", "remaining_time": "7:33:34"} +{"current_steps": 545, "total_steps": 5676, "loss": 0.8527307510375977, "lr": 1.9885478770908793e-05, "epoch": 0.1920704845814978, "percentage": 9.6, "elapsed_time": "0:48:09", "remaining_time": "7:33:25"} +{"current_steps": 546, "total_steps": 5676, "loss": 0.7239484190940857, "lr": 1.98845978458093e-05, "epoch": 0.19242290748898677, "percentage": 9.62, "elapsed_time": "0:48:14", "remaining_time": "7:33:18"} +{"current_steps": 547, "total_steps": 5676, "loss": 0.6937836408615112, "lr": 1.9883713565196462e-05, "epoch": 0.19277533039647576, "percentage": 9.64, "elapsed_time": "0:48:20", "remaining_time": "7:33:17"} +{"current_steps": 548, "total_steps": 5676, "loss": 0.8567923903465271, "lr": 1.9882825929370456e-05, "epoch": 0.19312775330396476, "percentage": 9.65, "elapsed_time": "0:48:26", "remaining_time": "7:33:18"} +{"current_steps": 549, "total_steps": 5676, "loss": 0.7948861122131348, "lr": 1.9881934938632615e-05, "epoch": 0.19348017621145375, "percentage": 9.67, "elapsed_time": "0:48:31", "remaining_time": "7:33:07"} +{"current_steps": 550, "total_steps": 5676, "loss": 0.6808983087539673, "lr": 1.9881040593285398e-05, "epoch": 0.19383259911894274, "percentage": 9.69, "elapsed_time": "0:48:37", "remaining_time": "7:33:06"} +{"current_steps": 551, "total_steps": 5676, "loss": 0.9089908599853516, "lr": 1.9880142893632412e-05, "epoch": 0.19418502202643173, "percentage": 9.71, "elapsed_time": "0:48:41", "remaining_time": "7:32:54"} +{"current_steps": 552, "total_steps": 5676, "loss": 0.7947918176651001, "lr": 1.9879241839978393e-05, "epoch": 0.1945374449339207, "percentage": 9.73, "elapsed_time": "0:48:46", "remaining_time": "7:32:46"} +{"current_steps": 553, "total_steps": 5676, "loss": 0.880418598651886, "lr": 1.9878337432629224e-05, "epoch": 0.1948898678414097, "percentage": 9.74, "elapsed_time": "0:48:52", "remaining_time": "7:32:42"} +{"current_steps": 554, "total_steps": 5676, "loss": 0.8845832347869873, "lr": 1.9877429671891917e-05, "epoch": 0.19524229074889868, "percentage": 9.76, "elapsed_time": "0:48:57", "remaining_time": "7:32:36"} +{"current_steps": 555, "total_steps": 5676, "loss": 0.7635341286659241, "lr": 1.9876518558074638e-05, "epoch": 0.19559471365638767, "percentage": 9.78, "elapsed_time": "0:49:02", "remaining_time": "7:32:30"} +{"current_steps": 556, "total_steps": 5676, "loss": 0.9301069974899292, "lr": 1.9875604091486678e-05, "epoch": 0.19594713656387666, "percentage": 9.8, "elapsed_time": "0:49:07", "remaining_time": "7:32:20"} +{"current_steps": 557, "total_steps": 5676, "loss": 0.8788589239120483, "lr": 1.9874686272438467e-05, "epoch": 0.19629955947136563, "percentage": 9.81, "elapsed_time": "0:49:11", "remaining_time": "7:32:07"} +{"current_steps": 558, "total_steps": 5676, "loss": 0.7452565431594849, "lr": 1.987376510124158e-05, "epoch": 0.19665198237885462, "percentage": 9.83, "elapsed_time": "0:49:17", "remaining_time": "7:32:05"} +{"current_steps": 559, "total_steps": 5676, "loss": 0.819628119468689, "lr": 1.9872840578208722e-05, "epoch": 0.1970044052863436, "percentage": 9.85, "elapsed_time": "0:49:22", "remaining_time": "7:31:59"} +{"current_steps": 560, "total_steps": 5676, "loss": 0.7807571291923523, "lr": 1.9871912703653744e-05, "epoch": 0.1973568281938326, "percentage": 9.87, "elapsed_time": "0:49:26", "remaining_time": "7:31:43"} +{"current_steps": 561, "total_steps": 5676, "loss": 0.7091392278671265, "lr": 1.9870981477891626e-05, "epoch": 0.1977092511013216, "percentage": 9.88, "elapsed_time": "0:49:31", "remaining_time": "7:31:34"} +{"current_steps": 562, "total_steps": 5676, "loss": 0.8174105882644653, "lr": 1.9870046901238496e-05, "epoch": 0.19806167400881058, "percentage": 9.9, "elapsed_time": "0:49:37", "remaining_time": "7:31:35"} +{"current_steps": 563, "total_steps": 5676, "loss": 0.696865439414978, "lr": 1.9869108974011607e-05, "epoch": 0.19841409691629955, "percentage": 9.92, "elapsed_time": "0:49:43", "remaining_time": "7:31:34"} +{"current_steps": 564, "total_steps": 5676, "loss": 0.914303183555603, "lr": 1.986816769652936e-05, "epoch": 0.19876651982378854, "percentage": 9.94, "elapsed_time": "0:49:48", "remaining_time": "7:31:30"} +{"current_steps": 565, "total_steps": 5676, "loss": 0.8397856950759888, "lr": 1.986722306911129e-05, "epoch": 0.19911894273127753, "percentage": 9.95, "elapsed_time": "0:49:54", "remaining_time": "7:31:31"} +{"current_steps": 566, "total_steps": 5676, "loss": 0.7206380367279053, "lr": 1.9866275092078066e-05, "epoch": 0.19947136563876652, "percentage": 9.97, "elapsed_time": "0:50:00", "remaining_time": "7:31:31"} +{"current_steps": 567, "total_steps": 5676, "loss": 0.7017316818237305, "lr": 1.98653237657515e-05, "epoch": 0.19982378854625552, "percentage": 9.99, "elapsed_time": "0:50:06", "remaining_time": "7:31:31"} +{"current_steps": 568, "total_steps": 5676, "loss": 0.8797772526741028, "lr": 1.9864369090454538e-05, "epoch": 0.2001762114537445, "percentage": 10.01, "elapsed_time": "0:50:12", "remaining_time": "7:31:27"} +{"current_steps": 569, "total_steps": 5676, "loss": 0.6643391847610474, "lr": 1.9863411066511257e-05, "epoch": 0.20052863436123347, "percentage": 10.02, "elapsed_time": "0:50:16", "remaining_time": "7:31:16"} +{"current_steps": 570, "total_steps": 5676, "loss": 0.8662393093109131, "lr": 1.9862449694246878e-05, "epoch": 0.20088105726872246, "percentage": 10.04, "elapsed_time": "0:50:22", "remaining_time": "7:31:14"} +{"current_steps": 571, "total_steps": 5676, "loss": 0.7766140699386597, "lr": 1.9861484973987762e-05, "epoch": 0.20123348017621145, "percentage": 10.06, "elapsed_time": "0:50:26", "remaining_time": "7:31:00"} +{"current_steps": 572, "total_steps": 5676, "loss": 0.8582239151000977, "lr": 1.9860516906061397e-05, "epoch": 0.20158590308370045, "percentage": 10.08, "elapsed_time": "0:50:32", "remaining_time": "7:30:59"} +{"current_steps": 573, "total_steps": 5676, "loss": 0.5838385820388794, "lr": 1.9859545490796414e-05, "epoch": 0.20193832599118944, "percentage": 10.1, "elapsed_time": "0:50:38", "remaining_time": "7:30:59"} +{"current_steps": 574, "total_steps": 5676, "loss": 0.6715164184570312, "lr": 1.9858570728522573e-05, "epoch": 0.2022907488986784, "percentage": 10.11, "elapsed_time": "0:50:43", "remaining_time": "7:30:49"} +{"current_steps": 575, "total_steps": 5676, "loss": 0.7665218114852905, "lr": 1.9857592619570783e-05, "epoch": 0.2026431718061674, "percentage": 10.13, "elapsed_time": "0:50:49", "remaining_time": "7:30:56"} +{"current_steps": 576, "total_steps": 5676, "loss": 0.8060458898544312, "lr": 1.985661116427308e-05, "epoch": 0.20299559471365639, "percentage": 10.15, "elapsed_time": "0:50:55", "remaining_time": "7:30:55"} +{"current_steps": 577, "total_steps": 5676, "loss": 0.8354060649871826, "lr": 1.985562636296264e-05, "epoch": 0.20334801762114538, "percentage": 10.17, "elapsed_time": "0:51:00", "remaining_time": "7:30:50"} +{"current_steps": 578, "total_steps": 5676, "loss": 0.8814351558685303, "lr": 1.985463821597376e-05, "epoch": 0.20370044052863437, "percentage": 10.18, "elapsed_time": "0:51:06", "remaining_time": "7:30:47"} +{"current_steps": 579, "total_steps": 5676, "loss": 0.9068918228149414, "lr": 1.9853646723641895e-05, "epoch": 0.20405286343612336, "percentage": 10.2, "elapsed_time": "0:51:10", "remaining_time": "7:30:28"} +{"current_steps": 580, "total_steps": 5676, "loss": 0.7671997547149658, "lr": 1.9852651886303624e-05, "epoch": 0.20440528634361232, "percentage": 10.22, "elapsed_time": "0:51:14", "remaining_time": "7:30:09"} +{"current_steps": 581, "total_steps": 5676, "loss": 0.7906886339187622, "lr": 1.9851653704296664e-05, "epoch": 0.20475770925110132, "percentage": 10.24, "elapsed_time": "0:51:19", "remaining_time": "7:30:07"} +{"current_steps": 582, "total_steps": 5676, "loss": 0.8424232006072998, "lr": 1.985065217795987e-05, "epoch": 0.2051101321585903, "percentage": 10.25, "elapsed_time": "0:51:24", "remaining_time": "7:29:55"} +{"current_steps": 583, "total_steps": 5676, "loss": 0.8335819244384766, "lr": 1.984964730763322e-05, "epoch": 0.2054625550660793, "percentage": 10.27, "elapsed_time": "0:51:29", "remaining_time": "7:29:53"} +{"current_steps": 584, "total_steps": 5676, "loss": 0.8340694308280945, "lr": 1.9848639093657844e-05, "epoch": 0.2058149779735683, "percentage": 10.29, "elapsed_time": "0:51:35", "remaining_time": "7:29:52"} +{"current_steps": 585, "total_steps": 5676, "loss": 0.9228274822235107, "lr": 1.9847627536376e-05, "epoch": 0.20616740088105726, "percentage": 10.31, "elapsed_time": "0:51:40", "remaining_time": "7:29:40"} +{"current_steps": 586, "total_steps": 5676, "loss": 0.7843449115753174, "lr": 1.984661263613107e-05, "epoch": 0.20651982378854625, "percentage": 10.32, "elapsed_time": "0:51:46", "remaining_time": "7:29:38"} +{"current_steps": 587, "total_steps": 5676, "loss": 0.7411990165710449, "lr": 1.9845594393267594e-05, "epoch": 0.20687224669603524, "percentage": 10.34, "elapsed_time": "0:51:51", "remaining_time": "7:29:34"} +{"current_steps": 588, "total_steps": 5676, "loss": 0.7520540356636047, "lr": 1.9844572808131228e-05, "epoch": 0.20722466960352423, "percentage": 10.36, "elapsed_time": "0:51:57", "remaining_time": "7:29:32"} +{"current_steps": 589, "total_steps": 5676, "loss": 0.795365571975708, "lr": 1.9843547881068763e-05, "epoch": 0.20757709251101322, "percentage": 10.38, "elapsed_time": "0:52:03", "remaining_time": "7:29:37"} +{"current_steps": 590, "total_steps": 5676, "loss": 0.8415528535842896, "lr": 1.984251961242814e-05, "epoch": 0.20792951541850221, "percentage": 10.39, "elapsed_time": "0:52:08", "remaining_time": "7:29:30"} +{"current_steps": 591, "total_steps": 5676, "loss": 0.8555570244789124, "lr": 1.9841488002558416e-05, "epoch": 0.20828193832599118, "percentage": 10.41, "elapsed_time": "0:52:15", "remaining_time": "7:29:33"} +{"current_steps": 592, "total_steps": 5676, "loss": 0.8214600086212158, "lr": 1.9840453051809792e-05, "epoch": 0.20863436123348017, "percentage": 10.43, "elapsed_time": "0:52:19", "remaining_time": "7:29:23"} +{"current_steps": 593, "total_steps": 5676, "loss": 0.7746415138244629, "lr": 1.9839414760533607e-05, "epoch": 0.20898678414096916, "percentage": 10.45, "elapsed_time": "0:52:24", "remaining_time": "7:29:12"} +{"current_steps": 594, "total_steps": 5676, "loss": 1.0861419439315796, "lr": 1.9838373129082325e-05, "epoch": 0.20933920704845815, "percentage": 10.47, "elapsed_time": "0:52:30", "remaining_time": "7:29:10"} +{"current_steps": 595, "total_steps": 5676, "loss": 0.7530953884124756, "lr": 1.9837328157809547e-05, "epoch": 0.20969162995594715, "percentage": 10.48, "elapsed_time": "0:52:36", "remaining_time": "7:29:12"} +{"current_steps": 596, "total_steps": 5676, "loss": 0.8811959624290466, "lr": 1.9836279847070004e-05, "epoch": 0.21004405286343614, "percentage": 10.5, "elapsed_time": "0:52:40", "remaining_time": "7:29:00"} +{"current_steps": 597, "total_steps": 5676, "loss": 0.7956523299217224, "lr": 1.9835228197219573e-05, "epoch": 0.2103964757709251, "percentage": 10.52, "elapsed_time": "0:52:46", "remaining_time": "7:28:57"} +{"current_steps": 598, "total_steps": 5676, "loss": 0.8710414171218872, "lr": 1.9834173208615253e-05, "epoch": 0.2107488986784141, "percentage": 10.54, "elapsed_time": "0:52:51", "remaining_time": "7:28:52"} +{"current_steps": 599, "total_steps": 5676, "loss": 0.9057297706604004, "lr": 1.983311488161518e-05, "epoch": 0.21110132158590308, "percentage": 10.55, "elapsed_time": "0:52:57", "remaining_time": "7:28:48"} +{"current_steps": 600, "total_steps": 5676, "loss": 0.7531988024711609, "lr": 1.983205321657862e-05, "epoch": 0.21145374449339208, "percentage": 10.57, "elapsed_time": "0:53:04", "remaining_time": "7:28:57"} +{"current_steps": 601, "total_steps": 5676, "loss": 0.6508063077926636, "lr": 1.983098821386598e-05, "epoch": 0.21180616740088107, "percentage": 10.59, "elapsed_time": "0:53:13", "remaining_time": "7:29:23"} +{"current_steps": 602, "total_steps": 5676, "loss": 0.7267025709152222, "lr": 1.9829919873838796e-05, "epoch": 0.21215859030837003, "percentage": 10.61, "elapsed_time": "0:53:18", "remaining_time": "7:29:17"} +{"current_steps": 603, "total_steps": 5676, "loss": 0.6930510997772217, "lr": 1.9828848196859727e-05, "epoch": 0.21251101321585902, "percentage": 10.62, "elapsed_time": "0:53:24", "remaining_time": "7:29:19"} +{"current_steps": 604, "total_steps": 5676, "loss": 0.7613120079040527, "lr": 1.9827773183292583e-05, "epoch": 0.21286343612334802, "percentage": 10.64, "elapsed_time": "0:53:29", "remaining_time": "7:29:13"} +{"current_steps": 605, "total_steps": 5676, "loss": 0.763299822807312, "lr": 1.9826694833502295e-05, "epoch": 0.213215859030837, "percentage": 10.66, "elapsed_time": "0:53:35", "remaining_time": "7:29:15"} +{"current_steps": 606, "total_steps": 5676, "loss": 0.7599194049835205, "lr": 1.9825613147854928e-05, "epoch": 0.213568281938326, "percentage": 10.68, "elapsed_time": "0:53:39", "remaining_time": "7:28:59"} +{"current_steps": 607, "total_steps": 5676, "loss": 0.869399905204773, "lr": 1.9824528126717687e-05, "epoch": 0.213920704845815, "percentage": 10.69, "elapsed_time": "0:53:46", "remaining_time": "7:29:02"} +{"current_steps": 608, "total_steps": 5676, "loss": 0.733409583568573, "lr": 1.9823439770458893e-05, "epoch": 0.21427312775330395, "percentage": 10.71, "elapsed_time": "0:53:50", "remaining_time": "7:28:47"} +{"current_steps": 609, "total_steps": 5676, "loss": 0.8302386999130249, "lr": 1.9822348079448014e-05, "epoch": 0.21462555066079295, "percentage": 10.73, "elapsed_time": "0:53:55", "remaining_time": "7:28:36"} +{"current_steps": 610, "total_steps": 5676, "loss": 0.8234561681747437, "lr": 1.9821253054055645e-05, "epoch": 0.21497797356828194, "percentage": 10.75, "elapsed_time": "0:53:59", "remaining_time": "7:28:26"} +{"current_steps": 611, "total_steps": 5676, "loss": 0.81988525390625, "lr": 1.9820154694653514e-05, "epoch": 0.21533039647577093, "percentage": 10.76, "elapsed_time": "0:54:06", "remaining_time": "7:28:29"} +{"current_steps": 612, "total_steps": 5676, "loss": 0.6437678933143616, "lr": 1.9819053001614478e-05, "epoch": 0.21568281938325992, "percentage": 10.78, "elapsed_time": "0:54:12", "remaining_time": "7:28:31"} +{"current_steps": 613, "total_steps": 5676, "loss": 0.8256562948226929, "lr": 1.9817947975312527e-05, "epoch": 0.21603524229074889, "percentage": 10.8, "elapsed_time": "0:54:17", "remaining_time": "7:28:26"} +{"current_steps": 614, "total_steps": 5676, "loss": 0.8204725980758667, "lr": 1.9816839616122787e-05, "epoch": 0.21638766519823788, "percentage": 10.82, "elapsed_time": "0:54:23", "remaining_time": "7:28:25"} +{"current_steps": 615, "total_steps": 5676, "loss": 0.7492775917053223, "lr": 1.9815727924421507e-05, "epoch": 0.21674008810572687, "percentage": 10.84, "elapsed_time": "0:54:29", "remaining_time": "7:28:24"} +{"current_steps": 616, "total_steps": 5676, "loss": 0.629736065864563, "lr": 1.9814612900586075e-05, "epoch": 0.21709251101321586, "percentage": 10.85, "elapsed_time": "0:54:34", "remaining_time": "7:28:18"} +{"current_steps": 617, "total_steps": 5676, "loss": 0.7974159717559814, "lr": 1.9813494544995e-05, "epoch": 0.21744493392070485, "percentage": 10.87, "elapsed_time": "0:54:39", "remaining_time": "7:28:10"} +{"current_steps": 618, "total_steps": 5676, "loss": 0.8367668390274048, "lr": 1.981237285802794e-05, "epoch": 0.21779735682819384, "percentage": 10.89, "elapsed_time": "0:54:44", "remaining_time": "7:28:04"} +{"current_steps": 619, "total_steps": 5676, "loss": 0.7942521572113037, "lr": 1.9811247840065667e-05, "epoch": 0.2181497797356828, "percentage": 10.91, "elapsed_time": "0:54:50", "remaining_time": "7:28:02"} +{"current_steps": 620, "total_steps": 5676, "loss": 0.7863545417785645, "lr": 1.981011949149009e-05, "epoch": 0.2185022026431718, "percentage": 10.92, "elapsed_time": "0:54:55", "remaining_time": "7:27:57"} +{"current_steps": 621, "total_steps": 5676, "loss": 0.8667019605636597, "lr": 1.9808987812684247e-05, "epoch": 0.2188546255506608, "percentage": 10.94, "elapsed_time": "0:55:01", "remaining_time": "7:27:50"} +{"current_steps": 622, "total_steps": 5676, "loss": 0.8555353283882141, "lr": 1.9807852804032306e-05, "epoch": 0.21920704845814978, "percentage": 10.96, "elapsed_time": "0:55:05", "remaining_time": "7:27:39"} +{"current_steps": 623, "total_steps": 5676, "loss": 0.8170013427734375, "lr": 1.9806714465919573e-05, "epoch": 0.21955947136563878, "percentage": 10.98, "elapsed_time": "0:55:11", "remaining_time": "7:27:36"} +{"current_steps": 624, "total_steps": 5676, "loss": 0.9277342557907104, "lr": 1.9805572798732475e-05, "epoch": 0.21991189427312777, "percentage": 10.99, "elapsed_time": "0:55:16", "remaining_time": "7:27:28"} +{"current_steps": 625, "total_steps": 5676, "loss": 0.6536964178085327, "lr": 1.980442780285857e-05, "epoch": 0.22026431718061673, "percentage": 11.01, "elapsed_time": "0:55:19", "remaining_time": "7:27:10"} +{"current_steps": 626, "total_steps": 5676, "loss": 0.7197799682617188, "lr": 1.980327947868655e-05, "epoch": 0.22061674008810572, "percentage": 11.03, "elapsed_time": "0:55:24", "remaining_time": "7:26:59"} +{"current_steps": 627, "total_steps": 5676, "loss": 0.7558401823043823, "lr": 1.980212782660624e-05, "epoch": 0.22096916299559471, "percentage": 11.05, "elapsed_time": "0:55:30", "remaining_time": "7:26:59"} +{"current_steps": 628, "total_steps": 5676, "loss": 0.7918291091918945, "lr": 1.9800972847008586e-05, "epoch": 0.2213215859030837, "percentage": 11.06, "elapsed_time": "0:55:36", "remaining_time": "7:26:58"} +{"current_steps": 629, "total_steps": 5676, "loss": 0.7159492373466492, "lr": 1.979981454028567e-05, "epoch": 0.2216740088105727, "percentage": 11.08, "elapsed_time": "0:55:42", "remaining_time": "7:26:57"} +{"current_steps": 630, "total_steps": 5676, "loss": 0.854686439037323, "lr": 1.9798652906830694e-05, "epoch": 0.22202643171806166, "percentage": 11.1, "elapsed_time": "0:55:47", "remaining_time": "7:26:48"} +{"current_steps": 631, "total_steps": 5676, "loss": 0.736785888671875, "lr": 1.9797487947038007e-05, "epoch": 0.22237885462555065, "percentage": 11.12, "elapsed_time": "0:55:52", "remaining_time": "7:26:40"} +{"current_steps": 632, "total_steps": 5676, "loss": 0.7092996835708618, "lr": 1.9796319661303065e-05, "epoch": 0.22273127753303965, "percentage": 11.13, "elapsed_time": "0:55:57", "remaining_time": "7:26:39"} +{"current_steps": 633, "total_steps": 5676, "loss": 0.8890455961227417, "lr": 1.9795148050022477e-05, "epoch": 0.22308370044052864, "percentage": 11.15, "elapsed_time": "0:56:03", "remaining_time": "7:26:35"} +{"current_steps": 634, "total_steps": 5676, "loss": 0.7476855516433716, "lr": 1.979397311359396e-05, "epoch": 0.22343612334801763, "percentage": 11.17, "elapsed_time": "0:56:10", "remaining_time": "7:26:40"} +{"current_steps": 635, "total_steps": 5676, "loss": 0.7810029983520508, "lr": 1.979279485241637e-05, "epoch": 0.22378854625550662, "percentage": 11.19, "elapsed_time": "0:56:15", "remaining_time": "7:26:33"} +{"current_steps": 636, "total_steps": 5676, "loss": 0.6679891347885132, "lr": 1.9791613266889688e-05, "epoch": 0.22414096916299558, "percentage": 11.21, "elapsed_time": "0:56:21", "remaining_time": "7:26:35"} +{"current_steps": 637, "total_steps": 5676, "loss": 0.809790849685669, "lr": 1.979042835741503e-05, "epoch": 0.22449339207048458, "percentage": 11.22, "elapsed_time": "0:56:27", "remaining_time": "7:26:38"} +{"current_steps": 638, "total_steps": 5676, "loss": 0.8347213268280029, "lr": 1.9789240124394638e-05, "epoch": 0.22484581497797357, "percentage": 11.24, "elapsed_time": "0:56:32", "remaining_time": "7:26:25"} +{"current_steps": 639, "total_steps": 5676, "loss": 0.6620997190475464, "lr": 1.9788048568231875e-05, "epoch": 0.22519823788546256, "percentage": 11.26, "elapsed_time": "0:56:39", "remaining_time": "7:26:36"} +{"current_steps": 640, "total_steps": 5676, "loss": 0.7727694511413574, "lr": 1.9786853689331235e-05, "epoch": 0.22555066079295155, "percentage": 11.28, "elapsed_time": "0:56:45", "remaining_time": "7:26:37"} +{"current_steps": 641, "total_steps": 5676, "loss": 0.7433278560638428, "lr": 1.9785655488098348e-05, "epoch": 0.22590308370044052, "percentage": 11.29, "elapsed_time": "0:56:51", "remaining_time": "7:26:33"} +{"current_steps": 642, "total_steps": 5676, "loss": 0.7375571727752686, "lr": 1.9784453964939966e-05, "epoch": 0.2262555066079295, "percentage": 11.31, "elapsed_time": "0:56:56", "remaining_time": "7:26:29"} +{"current_steps": 643, "total_steps": 5676, "loss": 0.5838407874107361, "lr": 1.9783249120263962e-05, "epoch": 0.2266079295154185, "percentage": 11.33, "elapsed_time": "0:57:00", "remaining_time": "7:26:13"} +{"current_steps": 644, "total_steps": 5676, "loss": 0.7120088934898376, "lr": 1.978204095447935e-05, "epoch": 0.2269603524229075, "percentage": 11.35, "elapsed_time": "0:57:04", "remaining_time": "7:26:01"} +{"current_steps": 645, "total_steps": 5676, "loss": 0.7668102383613586, "lr": 1.9780829467996262e-05, "epoch": 0.22731277533039648, "percentage": 11.36, "elapsed_time": "0:57:08", "remaining_time": "7:25:45"} +{"current_steps": 646, "total_steps": 5676, "loss": 0.748942494392395, "lr": 1.977961466122596e-05, "epoch": 0.22766519823788547, "percentage": 11.38, "elapsed_time": "0:57:14", "remaining_time": "7:25:45"} +{"current_steps": 647, "total_steps": 5676, "loss": 0.7569374442100525, "lr": 1.9778396534580836e-05, "epoch": 0.22801762114537444, "percentage": 11.4, "elapsed_time": "0:57:19", "remaining_time": "7:25:37"} +{"current_steps": 648, "total_steps": 5676, "loss": 0.7981363534927368, "lr": 1.97771750884744e-05, "epoch": 0.22837004405286343, "percentage": 11.42, "elapsed_time": "0:57:25", "remaining_time": "7:25:33"} +{"current_steps": 649, "total_steps": 5676, "loss": 0.7501301765441895, "lr": 1.97759503233213e-05, "epoch": 0.22872246696035242, "percentage": 11.43, "elapsed_time": "0:57:30", "remaining_time": "7:25:26"} +{"current_steps": 650, "total_steps": 5676, "loss": 0.7880003452301025, "lr": 1.9774722239537305e-05, "epoch": 0.2290748898678414, "percentage": 11.45, "elapsed_time": "0:57:35", "remaining_time": "7:25:17"} +{"current_steps": 651, "total_steps": 5676, "loss": 0.9007930755615234, "lr": 1.977349083753931e-05, "epoch": 0.2294273127753304, "percentage": 11.47, "elapsed_time": "0:57:41", "remaining_time": "7:25:19"} +{"current_steps": 652, "total_steps": 5676, "loss": 0.6291126012802124, "lr": 1.9772256117745335e-05, "epoch": 0.2297797356828194, "percentage": 11.49, "elapsed_time": "0:57:47", "remaining_time": "7:25:17"} +{"current_steps": 653, "total_steps": 5676, "loss": 0.8155031204223633, "lr": 1.9771018080574534e-05, "epoch": 0.23013215859030836, "percentage": 11.5, "elapsed_time": "0:57:53", "remaining_time": "7:25:21"} +{"current_steps": 654, "total_steps": 5676, "loss": 0.7103240489959717, "lr": 1.976977672644718e-05, "epoch": 0.23048458149779735, "percentage": 11.52, "elapsed_time": "0:57:59", "remaining_time": "7:25:14"} +{"current_steps": 655, "total_steps": 5676, "loss": 0.8590278625488281, "lr": 1.9768532055784678e-05, "epoch": 0.23083700440528634, "percentage": 11.54, "elapsed_time": "0:58:05", "remaining_time": "7:25:15"} +{"current_steps": 656, "total_steps": 5676, "loss": 0.7729001641273499, "lr": 1.9767284069009545e-05, "epoch": 0.23118942731277534, "percentage": 11.56, "elapsed_time": "0:58:10", "remaining_time": "7:25:08"} +{"current_steps": 657, "total_steps": 5676, "loss": 0.8287409543991089, "lr": 1.9766032766545445e-05, "epoch": 0.23154185022026433, "percentage": 11.58, "elapsed_time": "0:58:15", "remaining_time": "7:25:02"} +{"current_steps": 658, "total_steps": 5676, "loss": 0.8651477098464966, "lr": 1.9764778148817147e-05, "epoch": 0.2318942731277533, "percentage": 11.59, "elapsed_time": "0:58:20", "remaining_time": "7:24:52"} +{"current_steps": 659, "total_steps": 5676, "loss": 0.7582576274871826, "lr": 1.976352021625056e-05, "epoch": 0.23224669603524228, "percentage": 11.61, "elapsed_time": "0:58:24", "remaining_time": "7:24:38"} +{"current_steps": 660, "total_steps": 5676, "loss": 0.6579675078392029, "lr": 1.976225896927271e-05, "epoch": 0.23259911894273128, "percentage": 11.63, "elapsed_time": "0:58:29", "remaining_time": "7:24:31"} +{"current_steps": 661, "total_steps": 5676, "loss": 0.8817700147628784, "lr": 1.9760994408311757e-05, "epoch": 0.23295154185022027, "percentage": 11.65, "elapsed_time": "0:58:34", "remaining_time": "7:24:24"} +{"current_steps": 662, "total_steps": 5676, "loss": 0.7241606712341309, "lr": 1.9759726533796976e-05, "epoch": 0.23330396475770926, "percentage": 11.66, "elapsed_time": "0:58:38", "remaining_time": "7:24:07"} +{"current_steps": 663, "total_steps": 5676, "loss": 0.7434183359146118, "lr": 1.9758455346158768e-05, "epoch": 0.23365638766519825, "percentage": 11.68, "elapsed_time": "0:58:43", "remaining_time": "7:24:04"} +{"current_steps": 664, "total_steps": 5676, "loss": 0.632422685623169, "lr": 1.9757180845828663e-05, "epoch": 0.23400881057268721, "percentage": 11.7, "elapsed_time": "0:58:49", "remaining_time": "7:24:03"} +{"current_steps": 665, "total_steps": 5676, "loss": 0.7276040315628052, "lr": 1.9755903033239318e-05, "epoch": 0.2343612334801762, "percentage": 11.72, "elapsed_time": "0:58:55", "remaining_time": "7:24:01"} +{"current_steps": 666, "total_steps": 5676, "loss": 0.8070325255393982, "lr": 1.975462190882451e-05, "epoch": 0.2347136563876652, "percentage": 11.73, "elapsed_time": "0:59:00", "remaining_time": "7:23:57"} +{"current_steps": 667, "total_steps": 5676, "loss": 0.867915689945221, "lr": 1.9753337473019133e-05, "epoch": 0.2350660792951542, "percentage": 11.75, "elapsed_time": "0:59:05", "remaining_time": "7:23:49"} +{"current_steps": 668, "total_steps": 5676, "loss": 0.7905307412147522, "lr": 1.9752049726259223e-05, "epoch": 0.23541850220264318, "percentage": 11.77, "elapsed_time": "0:59:12", "remaining_time": "7:23:56"} +{"current_steps": 669, "total_steps": 5676, "loss": 0.7721420526504517, "lr": 1.9750758668981925e-05, "epoch": 0.23577092511013215, "percentage": 11.79, "elapsed_time": "0:59:18", "remaining_time": "7:23:55"} +{"current_steps": 670, "total_steps": 5676, "loss": 0.7926005125045776, "lr": 1.9749464301625515e-05, "epoch": 0.23612334801762114, "percentage": 11.8, "elapsed_time": "0:59:23", "remaining_time": "7:23:45"} +{"current_steps": 671, "total_steps": 5676, "loss": 0.7651785612106323, "lr": 1.974816662462939e-05, "epoch": 0.23647577092511013, "percentage": 11.82, "elapsed_time": "0:59:30", "remaining_time": "7:23:51"} +{"current_steps": 672, "total_steps": 5676, "loss": 0.7548795938491821, "lr": 1.974686563843407e-05, "epoch": 0.23682819383259912, "percentage": 11.84, "elapsed_time": "0:59:34", "remaining_time": "7:23:38"} +{"current_steps": 673, "total_steps": 5676, "loss": 0.5405399799346924, "lr": 1.9745561343481197e-05, "epoch": 0.2371806167400881, "percentage": 11.86, "elapsed_time": "0:59:40", "remaining_time": "7:23:38"} +{"current_steps": 674, "total_steps": 5676, "loss": 0.7561137080192566, "lr": 1.9744253740213542e-05, "epoch": 0.2375330396475771, "percentage": 11.87, "elapsed_time": "0:59:46", "remaining_time": "7:23:35"} +{"current_steps": 675, "total_steps": 5676, "loss": 0.8809534907341003, "lr": 1.9742942829074993e-05, "epoch": 0.23788546255506607, "percentage": 11.89, "elapsed_time": "0:59:52", "remaining_time": "7:23:33"} +{"current_steps": 676, "total_steps": 5676, "loss": 0.750350832939148, "lr": 1.974162861051057e-05, "epoch": 0.23823788546255506, "percentage": 11.91, "elapsed_time": "0:59:58", "remaining_time": "7:23:32"} +{"current_steps": 677, "total_steps": 5676, "loss": 0.89476478099823, "lr": 1.9740311084966398e-05, "epoch": 0.23859030837004405, "percentage": 11.93, "elapsed_time": "1:00:02", "remaining_time": "7:23:19"} +{"current_steps": 678, "total_steps": 5676, "loss": 0.8647176027297974, "lr": 1.9738990252889748e-05, "epoch": 0.23894273127753304, "percentage": 11.95, "elapsed_time": "1:00:06", "remaining_time": "7:23:08"} +{"current_steps": 679, "total_steps": 5676, "loss": 0.7331727743148804, "lr": 1.9737666114728996e-05, "epoch": 0.23929515418502204, "percentage": 11.96, "elapsed_time": "1:00:11", "remaining_time": "7:23:00"} +{"current_steps": 680, "total_steps": 5676, "loss": 0.7714364528656006, "lr": 1.9736338670933642e-05, "epoch": 0.239647577092511, "percentage": 11.98, "elapsed_time": "1:00:16", "remaining_time": "7:22:53"} +{"current_steps": 681, "total_steps": 5676, "loss": 0.7840908765792847, "lr": 1.973500792195432e-05, "epoch": 0.24, "percentage": 12.0, "elapsed_time": "1:00:22", "remaining_time": "7:22:51"} +{"current_steps": 682, "total_steps": 5676, "loss": 0.8723878860473633, "lr": 1.9733673868242767e-05, "epoch": 0.24035242290748898, "percentage": 12.02, "elapsed_time": "1:00:28", "remaining_time": "7:22:53"} +{"current_steps": 683, "total_steps": 5676, "loss": 0.782090425491333, "lr": 1.9732336510251864e-05, "epoch": 0.24070484581497797, "percentage": 12.03, "elapsed_time": "1:00:33", "remaining_time": "7:22:40"} +{"current_steps": 684, "total_steps": 5676, "loss": 0.8000990152359009, "lr": 1.9730995848435594e-05, "epoch": 0.24105726872246697, "percentage": 12.05, "elapsed_time": "1:00:38", "remaining_time": "7:22:31"} +{"current_steps": 685, "total_steps": 5676, "loss": 0.7499237060546875, "lr": 1.9729651883249075e-05, "epoch": 0.24140969162995596, "percentage": 12.07, "elapsed_time": "1:00:43", "remaining_time": "7:22:24"} +{"current_steps": 686, "total_steps": 5676, "loss": 0.8786858916282654, "lr": 1.972830461514854e-05, "epoch": 0.24176211453744492, "percentage": 12.09, "elapsed_time": "1:00:47", "remaining_time": "7:22:12"} +{"current_steps": 687, "total_steps": 5676, "loss": 0.7039557695388794, "lr": 1.972695404459134e-05, "epoch": 0.2421145374449339, "percentage": 12.1, "elapsed_time": "1:00:52", "remaining_time": "7:22:07"} +{"current_steps": 688, "total_steps": 5676, "loss": 0.6699448823928833, "lr": 1.9725600172035962e-05, "epoch": 0.2424669603524229, "percentage": 12.12, "elapsed_time": "1:00:58", "remaining_time": "7:22:07"} +{"current_steps": 689, "total_steps": 5676, "loss": 0.6753977537155151, "lr": 1.9724242997941995e-05, "epoch": 0.2428193832599119, "percentage": 12.14, "elapsed_time": "1:01:04", "remaining_time": "7:22:02"} +{"current_steps": 690, "total_steps": 5676, "loss": 0.7139854431152344, "lr": 1.9722882522770163e-05, "epoch": 0.2431718061674009, "percentage": 12.16, "elapsed_time": "1:01:09", "remaining_time": "7:21:58"} +{"current_steps": 691, "total_steps": 5676, "loss": 0.7894896864891052, "lr": 1.9721518746982296e-05, "epoch": 0.24352422907488988, "percentage": 12.17, "elapsed_time": "1:01:15", "remaining_time": "7:21:57"} +{"current_steps": 692, "total_steps": 5676, "loss": 0.5663755536079407, "lr": 1.972015167104136e-05, "epoch": 0.24387665198237884, "percentage": 12.19, "elapsed_time": "1:01:20", "remaining_time": "7:21:44"} +{"current_steps": 693, "total_steps": 5676, "loss": 0.8607856035232544, "lr": 1.971878129541144e-05, "epoch": 0.24422907488986784, "percentage": 12.21, "elapsed_time": "1:01:24", "remaining_time": "7:21:33"} +{"current_steps": 694, "total_steps": 5676, "loss": 0.7384383678436279, "lr": 1.9717407620557724e-05, "epoch": 0.24458149779735683, "percentage": 12.23, "elapsed_time": "1:01:28", "remaining_time": "7:21:22"} +{"current_steps": 695, "total_steps": 5676, "loss": 0.6145502328872681, "lr": 1.971603064694654e-05, "epoch": 0.24493392070484582, "percentage": 12.24, "elapsed_time": "1:01:34", "remaining_time": "7:21:19"} +{"current_steps": 696, "total_steps": 5676, "loss": 0.6758620738983154, "lr": 1.9714650375045328e-05, "epoch": 0.2452863436123348, "percentage": 12.26, "elapsed_time": "1:01:40", "remaining_time": "7:21:16"} +{"current_steps": 697, "total_steps": 5676, "loss": 0.7416598200798035, "lr": 1.9713266805322643e-05, "epoch": 0.24563876651982378, "percentage": 12.28, "elapsed_time": "1:01:44", "remaining_time": "7:21:04"} +{"current_steps": 698, "total_steps": 5676, "loss": 0.7603555917739868, "lr": 1.9711879938248163e-05, "epoch": 0.24599118942731277, "percentage": 12.3, "elapsed_time": "1:01:48", "remaining_time": "7:20:49"} +{"current_steps": 699, "total_steps": 5676, "loss": 0.9119949340820312, "lr": 1.9710489774292692e-05, "epoch": 0.24634361233480176, "percentage": 12.32, "elapsed_time": "1:01:55", "remaining_time": "7:20:52"} +{"current_steps": 700, "total_steps": 5676, "loss": 0.6884537935256958, "lr": 1.9709096313928144e-05, "epoch": 0.24669603524229075, "percentage": 12.33, "elapsed_time": "1:02:00", "remaining_time": "7:20:46"} +{"current_steps": 701, "total_steps": 5676, "loss": 0.7928721904754639, "lr": 1.9707699557627554e-05, "epoch": 0.24704845814977974, "percentage": 12.35, "elapsed_time": "1:02:08", "remaining_time": "7:21:03"} +{"current_steps": 702, "total_steps": 5676, "loss": 0.888218104839325, "lr": 1.970629950586508e-05, "epoch": 0.24740088105726873, "percentage": 12.37, "elapsed_time": "1:02:14", "remaining_time": "7:21:01"} +{"current_steps": 703, "total_steps": 5676, "loss": 0.7949875593185425, "lr": 1.9704896159115997e-05, "epoch": 0.2477533039647577, "percentage": 12.39, "elapsed_time": "1:02:18", "remaining_time": "7:20:44"} +{"current_steps": 704, "total_steps": 5676, "loss": 0.9031823873519897, "lr": 1.970348951785669e-05, "epoch": 0.2481057268722467, "percentage": 12.4, "elapsed_time": "1:02:23", "remaining_time": "7:20:37"} +{"current_steps": 705, "total_steps": 5676, "loss": 0.636865496635437, "lr": 1.9702079582564682e-05, "epoch": 0.24845814977973568, "percentage": 12.42, "elapsed_time": "1:02:27", "remaining_time": "7:20:26"} +{"current_steps": 706, "total_steps": 5676, "loss": 0.731717586517334, "lr": 1.9700666353718593e-05, "epoch": 0.24881057268722467, "percentage": 12.44, "elapsed_time": "1:02:32", "remaining_time": "7:20:14"} +{"current_steps": 707, "total_steps": 5676, "loss": 0.7571220397949219, "lr": 1.9699249831798172e-05, "epoch": 0.24916299559471367, "percentage": 12.46, "elapsed_time": "1:02:37", "remaining_time": "7:20:08"} +{"current_steps": 708, "total_steps": 5676, "loss": 0.6112762689590454, "lr": 1.969783001728429e-05, "epoch": 0.24951541850220263, "percentage": 12.47, "elapsed_time": "1:02:42", "remaining_time": "7:20:03"} +{"current_steps": 709, "total_steps": 5676, "loss": 0.6737902164459229, "lr": 1.9696406910658918e-05, "epoch": 0.24986784140969162, "percentage": 12.49, "elapsed_time": "1:02:46", "remaining_time": "7:19:47"} +{"current_steps": 710, "total_steps": 5676, "loss": 0.6525848507881165, "lr": 1.9694980512405167e-05, "epoch": 0.25022026431718064, "percentage": 12.51, "elapsed_time": "1:02:50", "remaining_time": "7:19:34"} +{"current_steps": 711, "total_steps": 5676, "loss": 0.9107403755187988, "lr": 1.9693550823007248e-05, "epoch": 0.2505726872246696, "percentage": 12.53, "elapsed_time": "1:02:56", "remaining_time": "7:19:29"} +{"current_steps": 712, "total_steps": 5676, "loss": 0.7373934984207153, "lr": 1.96921178429505e-05, "epoch": 0.25092511013215857, "percentage": 12.54, "elapsed_time": "1:03:00", "remaining_time": "7:19:18"} +{"current_steps": 713, "total_steps": 5676, "loss": 0.6383399963378906, "lr": 1.9690681572721377e-05, "epoch": 0.25127753303964756, "percentage": 12.56, "elapsed_time": "1:03:05", "remaining_time": "7:19:06"} +{"current_steps": 714, "total_steps": 5676, "loss": 0.6600236296653748, "lr": 1.9689242012807442e-05, "epoch": 0.25162995594713655, "percentage": 12.58, "elapsed_time": "1:03:09", "remaining_time": "7:18:53"} +{"current_steps": 715, "total_steps": 5676, "loss": 0.9195891618728638, "lr": 1.9687799163697386e-05, "epoch": 0.25198237885462554, "percentage": 12.6, "elapsed_time": "1:03:13", "remaining_time": "7:18:42"} +{"current_steps": 716, "total_steps": 5676, "loss": 0.7122433185577393, "lr": 1.968635302588101e-05, "epoch": 0.25233480176211454, "percentage": 12.61, "elapsed_time": "1:03:19", "remaining_time": "7:18:42"} +{"current_steps": 717, "total_steps": 5676, "loss": 0.7601606845855713, "lr": 1.968490359984923e-05, "epoch": 0.2526872246696035, "percentage": 12.63, "elapsed_time": "1:03:26", "remaining_time": "7:18:48"} +{"current_steps": 718, "total_steps": 5676, "loss": 0.8216352462768555, "lr": 1.9683450886094087e-05, "epoch": 0.2530396475770925, "percentage": 12.65, "elapsed_time": "1:03:32", "remaining_time": "7:18:49"} +{"current_steps": 719, "total_steps": 5676, "loss": 0.8783165216445923, "lr": 1.9681994885108727e-05, "epoch": 0.2533920704845815, "percentage": 12.67, "elapsed_time": "1:03:37", "remaining_time": "7:18:41"} +{"current_steps": 720, "total_steps": 5676, "loss": 0.7323269844055176, "lr": 1.9680535597387416e-05, "epoch": 0.2537444933920705, "percentage": 12.68, "elapsed_time": "1:03:42", "remaining_time": "7:18:32"} +{"current_steps": 721, "total_steps": 5676, "loss": 0.93906170129776, "lr": 1.9679073023425542e-05, "epoch": 0.2540969162995595, "percentage": 12.7, "elapsed_time": "1:03:48", "remaining_time": "7:18:27"} +{"current_steps": 722, "total_steps": 5676, "loss": 0.774397611618042, "lr": 1.96776071637196e-05, "epoch": 0.25444933920704843, "percentage": 12.72, "elapsed_time": "1:03:55", "remaining_time": "7:18:34"} +{"current_steps": 723, "total_steps": 5676, "loss": 0.6634535789489746, "lr": 1.9676138018767204e-05, "epoch": 0.2548017621145374, "percentage": 12.74, "elapsed_time": "1:03:59", "remaining_time": "7:18:20"} +{"current_steps": 724, "total_steps": 5676, "loss": 0.7705625295639038, "lr": 1.9674665589067082e-05, "epoch": 0.2551541850220264, "percentage": 12.76, "elapsed_time": "1:04:04", "remaining_time": "7:18:15"} +{"current_steps": 725, "total_steps": 5676, "loss": 0.706364631652832, "lr": 1.9673189875119082e-05, "epoch": 0.2555066079295154, "percentage": 12.77, "elapsed_time": "1:04:10", "remaining_time": "7:18:15"} +{"current_steps": 726, "total_steps": 5676, "loss": 0.7295894622802734, "lr": 1.9671710877424158e-05, "epoch": 0.2558590308370044, "percentage": 12.79, "elapsed_time": "1:04:16", "remaining_time": "7:18:15"} +{"current_steps": 727, "total_steps": 5676, "loss": 0.8135089874267578, "lr": 1.9670228596484383e-05, "epoch": 0.2562114537444934, "percentage": 12.81, "elapsed_time": "1:04:21", "remaining_time": "7:18:06"} +{"current_steps": 728, "total_steps": 5676, "loss": 0.801734209060669, "lr": 1.966874303280295e-05, "epoch": 0.2565638766519824, "percentage": 12.83, "elapsed_time": "1:04:25", "remaining_time": "7:17:51"} +{"current_steps": 729, "total_steps": 5676, "loss": 0.8405104875564575, "lr": 1.9667254186884164e-05, "epoch": 0.2569162995594714, "percentage": 12.84, "elapsed_time": "1:04:28", "remaining_time": "7:17:33"} +{"current_steps": 730, "total_steps": 5676, "loss": 0.8320014476776123, "lr": 1.9665762059233434e-05, "epoch": 0.25726872246696036, "percentage": 12.86, "elapsed_time": "1:04:33", "remaining_time": "7:17:27"} +{"current_steps": 731, "total_steps": 5676, "loss": 0.8701308965682983, "lr": 1.96642666503573e-05, "epoch": 0.25762114537444936, "percentage": 12.88, "elapsed_time": "1:04:39", "remaining_time": "7:17:24"} +{"current_steps": 732, "total_steps": 5676, "loss": 0.7980693578720093, "lr": 1.9662767960763394e-05, "epoch": 0.25797356828193835, "percentage": 12.9, "elapsed_time": "1:04:45", "remaining_time": "7:17:25"} +{"current_steps": 733, "total_steps": 5676, "loss": 0.7258214950561523, "lr": 1.9661265990960486e-05, "epoch": 0.25832599118942734, "percentage": 12.91, "elapsed_time": "1:04:50", "remaining_time": "7:17:13"} +{"current_steps": 734, "total_steps": 5676, "loss": 0.6860172748565674, "lr": 1.9659760741458444e-05, "epoch": 0.2586784140969163, "percentage": 12.93, "elapsed_time": "1:04:55", "remaining_time": "7:17:07"} +{"current_steps": 735, "total_steps": 5676, "loss": 0.7438071370124817, "lr": 1.9658252212768252e-05, "epoch": 0.25903083700440527, "percentage": 12.95, "elapsed_time": "1:05:00", "remaining_time": "7:16:59"} +{"current_steps": 736, "total_steps": 5676, "loss": 0.8680309057235718, "lr": 1.9656740405402007e-05, "epoch": 0.25938325991189426, "percentage": 12.97, "elapsed_time": "1:05:05", "remaining_time": "7:16:52"} +{"current_steps": 737, "total_steps": 5676, "loss": 0.933163046836853, "lr": 1.9655225319872925e-05, "epoch": 0.25973568281938325, "percentage": 12.98, "elapsed_time": "1:05:10", "remaining_time": "7:16:43"} +{"current_steps": 738, "total_steps": 5676, "loss": 0.8746597170829773, "lr": 1.9653706956695333e-05, "epoch": 0.26008810572687224, "percentage": 13.0, "elapsed_time": "1:05:14", "remaining_time": "7:16:31"} +{"current_steps": 739, "total_steps": 5676, "loss": 0.857211709022522, "lr": 1.965218531638466e-05, "epoch": 0.26044052863436123, "percentage": 13.02, "elapsed_time": "1:05:21", "remaining_time": "7:16:36"} +{"current_steps": 740, "total_steps": 5676, "loss": 0.7837733030319214, "lr": 1.965066039945746e-05, "epoch": 0.2607929515418502, "percentage": 13.04, "elapsed_time": "1:05:26", "remaining_time": "7:16:27"} +{"current_steps": 741, "total_steps": 5676, "loss": 0.8401491641998291, "lr": 1.9649132206431395e-05, "epoch": 0.2611453744493392, "percentage": 13.05, "elapsed_time": "1:05:31", "remaining_time": "7:16:22"} +{"current_steps": 742, "total_steps": 5676, "loss": 0.7070307731628418, "lr": 1.9647600737825235e-05, "epoch": 0.2614977973568282, "percentage": 13.07, "elapsed_time": "1:05:36", "remaining_time": "7:16:16"} +{"current_steps": 743, "total_steps": 5676, "loss": 0.7649509310722351, "lr": 1.9646065994158873e-05, "epoch": 0.2618502202643172, "percentage": 13.09, "elapsed_time": "1:05:42", "remaining_time": "7:16:15"} +{"current_steps": 744, "total_steps": 5676, "loss": 0.7759182453155518, "lr": 1.9644527975953302e-05, "epoch": 0.2622026431718062, "percentage": 13.11, "elapsed_time": "1:05:47", "remaining_time": "7:16:04"} +{"current_steps": 745, "total_steps": 5676, "loss": 0.8176295757293701, "lr": 1.9642986683730626e-05, "epoch": 0.26255506607929513, "percentage": 13.13, "elapsed_time": "1:05:51", "remaining_time": "7:15:52"} +{"current_steps": 746, "total_steps": 5676, "loss": 0.8406162261962891, "lr": 1.9641442118014078e-05, "epoch": 0.2629074889867841, "percentage": 13.14, "elapsed_time": "1:05:57", "remaining_time": "7:15:50"} +{"current_steps": 747, "total_steps": 5676, "loss": 0.8064795732498169, "lr": 1.9639894279327985e-05, "epoch": 0.2632599118942731, "percentage": 13.16, "elapsed_time": "1:06:03", "remaining_time": "7:15:52"} +{"current_steps": 748, "total_steps": 5676, "loss": 0.6662956476211548, "lr": 1.9638343168197784e-05, "epoch": 0.2636123348017621, "percentage": 13.18, "elapsed_time": "1:06:09", "remaining_time": "7:15:48"} +{"current_steps": 749, "total_steps": 5676, "loss": 0.8747783899307251, "lr": 1.9636788785150037e-05, "epoch": 0.2639647577092511, "percentage": 13.2, "elapsed_time": "1:06:13", "remaining_time": "7:15:36"} +{"current_steps": 750, "total_steps": 5676, "loss": 0.7893349528312683, "lr": 1.9635231130712406e-05, "epoch": 0.2643171806167401, "percentage": 13.21, "elapsed_time": "1:06:18", "remaining_time": "7:15:28"} +{"current_steps": 751, "total_steps": 5676, "loss": 0.7380903959274292, "lr": 1.9633670205413665e-05, "epoch": 0.2646696035242291, "percentage": 13.23, "elapsed_time": "1:06:23", "remaining_time": "7:15:21"} +{"current_steps": 752, "total_steps": 5676, "loss": 0.9164873957633972, "lr": 1.96321060097837e-05, "epoch": 0.26502202643171807, "percentage": 13.25, "elapsed_time": "1:06:28", "remaining_time": "7:15:14"} +{"current_steps": 753, "total_steps": 5676, "loss": 0.7664264440536499, "lr": 1.9630538544353505e-05, "epoch": 0.26537444933920706, "percentage": 13.27, "elapsed_time": "1:06:32", "remaining_time": "7:15:02"} +{"current_steps": 754, "total_steps": 5676, "loss": 0.8117275238037109, "lr": 1.9628967809655187e-05, "epoch": 0.26572687224669606, "percentage": 13.28, "elapsed_time": "1:06:37", "remaining_time": "7:14:55"} +{"current_steps": 755, "total_steps": 5676, "loss": 0.6203808784484863, "lr": 1.9627393806221967e-05, "epoch": 0.26607929515418505, "percentage": 13.3, "elapsed_time": "1:06:42", "remaining_time": "7:14:50"} +{"current_steps": 756, "total_steps": 5676, "loss": 0.8777878284454346, "lr": 1.9625816534588163e-05, "epoch": 0.266431718061674, "percentage": 13.32, "elapsed_time": "1:06:49", "remaining_time": "7:14:51"} +{"current_steps": 757, "total_steps": 5676, "loss": 0.6984438300132751, "lr": 1.9624235995289212e-05, "epoch": 0.266784140969163, "percentage": 13.34, "elapsed_time": "1:06:54", "remaining_time": "7:14:48"} +{"current_steps": 758, "total_steps": 5676, "loss": 0.7806228399276733, "lr": 1.962265218886166e-05, "epoch": 0.26713656387665197, "percentage": 13.35, "elapsed_time": "1:06:59", "remaining_time": "7:14:37"} +{"current_steps": 759, "total_steps": 5676, "loss": 0.6924373507499695, "lr": 1.9621065115843155e-05, "epoch": 0.26748898678414096, "percentage": 13.37, "elapsed_time": "1:07:03", "remaining_time": "7:14:26"} +{"current_steps": 760, "total_steps": 5676, "loss": 0.6809841394424438, "lr": 1.9619474776772462e-05, "epoch": 0.26784140969162995, "percentage": 13.39, "elapsed_time": "1:07:09", "remaining_time": "7:14:27"} +{"current_steps": 761, "total_steps": 5676, "loss": 0.8346723318099976, "lr": 1.961788117218945e-05, "epoch": 0.26819383259911894, "percentage": 13.41, "elapsed_time": "1:07:14", "remaining_time": "7:14:18"} +{"current_steps": 762, "total_steps": 5676, "loss": 0.8000205755233765, "lr": 1.96162843026351e-05, "epoch": 0.26854625550660793, "percentage": 13.42, "elapsed_time": "1:07:20", "remaining_time": "7:14:14"} +{"current_steps": 763, "total_steps": 5676, "loss": 0.8026692271232605, "lr": 1.9614684168651504e-05, "epoch": 0.2688986784140969, "percentage": 13.44, "elapsed_time": "1:07:25", "remaining_time": "7:14:08"} +{"current_steps": 764, "total_steps": 5676, "loss": 0.921292781829834, "lr": 1.961308077078185e-05, "epoch": 0.2692511013215859, "percentage": 13.46, "elapsed_time": "1:07:30", "remaining_time": "7:13:59"} +{"current_steps": 765, "total_steps": 5676, "loss": 0.8018487095832825, "lr": 1.9611474109570446e-05, "epoch": 0.2696035242290749, "percentage": 13.48, "elapsed_time": "1:07:34", "remaining_time": "7:13:45"} +{"current_steps": 766, "total_steps": 5676, "loss": 0.7400588989257812, "lr": 1.9609864185562698e-05, "epoch": 0.2699559471365639, "percentage": 13.5, "elapsed_time": "1:07:39", "remaining_time": "7:13:43"} +{"current_steps": 767, "total_steps": 5676, "loss": 0.6243399977684021, "lr": 1.960825099930513e-05, "epoch": 0.27030837004405284, "percentage": 13.51, "elapsed_time": "1:07:44", "remaining_time": "7:13:32"} +{"current_steps": 768, "total_steps": 5676, "loss": 0.7680903673171997, "lr": 1.9606634551345373e-05, "epoch": 0.27066079295154183, "percentage": 13.53, "elapsed_time": "1:07:49", "remaining_time": "7:13:25"} +{"current_steps": 769, "total_steps": 5676, "loss": 0.8783930540084839, "lr": 1.960501484223215e-05, "epoch": 0.2710132158590308, "percentage": 13.55, "elapsed_time": "1:07:55", "remaining_time": "7:13:26"} +{"current_steps": 770, "total_steps": 5676, "loss": 0.7910561561584473, "lr": 1.9603391872515308e-05, "epoch": 0.2713656387665198, "percentage": 13.57, "elapsed_time": "1:08:00", "remaining_time": "7:13:19"} +{"current_steps": 771, "total_steps": 5676, "loss": 0.7325295209884644, "lr": 1.9601765642745795e-05, "epoch": 0.2717180616740088, "percentage": 13.58, "elapsed_time": "1:08:05", "remaining_time": "7:13:11"} +{"current_steps": 772, "total_steps": 5676, "loss": 0.7017170190811157, "lr": 1.9600136153475666e-05, "epoch": 0.2720704845814978, "percentage": 13.6, "elapsed_time": "1:08:10", "remaining_time": "7:13:05"} +{"current_steps": 773, "total_steps": 5676, "loss": 0.9281908273696899, "lr": 1.959850340525808e-05, "epoch": 0.2724229074889868, "percentage": 13.62, "elapsed_time": "1:08:15", "remaining_time": "7:12:58"} +{"current_steps": 774, "total_steps": 5676, "loss": 0.7421029806137085, "lr": 1.95968673986473e-05, "epoch": 0.2727753303964758, "percentage": 13.64, "elapsed_time": "1:08:20", "remaining_time": "7:12:48"} +{"current_steps": 775, "total_steps": 5676, "loss": 0.7474848031997681, "lr": 1.9595228134198708e-05, "epoch": 0.27312775330396477, "percentage": 13.65, "elapsed_time": "1:08:25", "remaining_time": "7:12:42"} +{"current_steps": 776, "total_steps": 5676, "loss": 0.7267760038375854, "lr": 1.9593585612468776e-05, "epoch": 0.27348017621145376, "percentage": 13.67, "elapsed_time": "1:08:31", "remaining_time": "7:12:42"} +{"current_steps": 777, "total_steps": 5676, "loss": 0.739683985710144, "lr": 1.9591939834015096e-05, "epoch": 0.27383259911894275, "percentage": 13.69, "elapsed_time": "1:08:35", "remaining_time": "7:12:30"} +{"current_steps": 778, "total_steps": 5676, "loss": 0.6615399122238159, "lr": 1.9590290799396353e-05, "epoch": 0.2741850220264317, "percentage": 13.71, "elapsed_time": "1:08:41", "remaining_time": "7:12:26"} +{"current_steps": 779, "total_steps": 5676, "loss": 0.8045977354049683, "lr": 1.9588638509172343e-05, "epoch": 0.2745374449339207, "percentage": 13.72, "elapsed_time": "1:08:45", "remaining_time": "7:12:11"} +{"current_steps": 780, "total_steps": 5676, "loss": 0.8760169744491577, "lr": 1.958698296390397e-05, "epoch": 0.2748898678414097, "percentage": 13.74, "elapsed_time": "1:08:50", "remaining_time": "7:12:04"} +{"current_steps": 781, "total_steps": 5676, "loss": 0.6676662564277649, "lr": 1.9585324164153236e-05, "epoch": 0.27524229074889867, "percentage": 13.76, "elapsed_time": "1:08:54", "remaining_time": "7:11:52"} +{"current_steps": 782, "total_steps": 5676, "loss": 0.6650630235671997, "lr": 1.958366211048326e-05, "epoch": 0.27559471365638766, "percentage": 13.78, "elapsed_time": "1:08:59", "remaining_time": "7:11:47"} +{"current_steps": 783, "total_steps": 5676, "loss": 0.7399466037750244, "lr": 1.9581996803458248e-05, "epoch": 0.27594713656387665, "percentage": 13.79, "elapsed_time": "1:09:06", "remaining_time": "7:11:50"} +{"current_steps": 784, "total_steps": 5676, "loss": 0.6121753454208374, "lr": 1.9580328243643528e-05, "epoch": 0.27629955947136564, "percentage": 13.81, "elapsed_time": "1:09:11", "remaining_time": "7:11:43"} +{"current_steps": 785, "total_steps": 5676, "loss": 0.8562870025634766, "lr": 1.9578656431605515e-05, "epoch": 0.27665198237885463, "percentage": 13.83, "elapsed_time": "1:09:16", "remaining_time": "7:11:39"} +{"current_steps": 786, "total_steps": 5676, "loss": 0.717842161655426, "lr": 1.9576981367911746e-05, "epoch": 0.2770044052863436, "percentage": 13.85, "elapsed_time": "1:09:21", "remaining_time": "7:11:28"} +{"current_steps": 787, "total_steps": 5676, "loss": 0.802294135093689, "lr": 1.9575303053130847e-05, "epoch": 0.2773568281938326, "percentage": 13.87, "elapsed_time": "1:09:26", "remaining_time": "7:11:21"} +{"current_steps": 788, "total_steps": 5676, "loss": 0.6636664867401123, "lr": 1.957362148783256e-05, "epoch": 0.2777092511013216, "percentage": 13.88, "elapsed_time": "1:09:31", "remaining_time": "7:11:13"} +{"current_steps": 789, "total_steps": 5676, "loss": 0.7177780866622925, "lr": 1.9571936672587718e-05, "epoch": 0.2780616740088106, "percentage": 13.9, "elapsed_time": "1:09:35", "remaining_time": "7:11:04"} +{"current_steps": 790, "total_steps": 5676, "loss": 0.8263623714447021, "lr": 1.957024860796826e-05, "epoch": 0.27841409691629954, "percentage": 13.92, "elapsed_time": "1:09:41", "remaining_time": "7:10:59"} +{"current_steps": 791, "total_steps": 5676, "loss": 0.7620534896850586, "lr": 1.9568557294547244e-05, "epoch": 0.27876651982378853, "percentage": 13.94, "elapsed_time": "1:09:45", "remaining_time": "7:10:46"} +{"current_steps": 792, "total_steps": 5676, "loss": 0.812814474105835, "lr": 1.956686273289881e-05, "epoch": 0.2791189427312775, "percentage": 13.95, "elapsed_time": "1:09:50", "remaining_time": "7:10:42"} +{"current_steps": 793, "total_steps": 5676, "loss": 0.6494747400283813, "lr": 1.956516492359821e-05, "epoch": 0.2794713656387665, "percentage": 13.97, "elapsed_time": "1:09:56", "remaining_time": "7:10:38"} +{"current_steps": 794, "total_steps": 5676, "loss": 0.7152044773101807, "lr": 1.9563463867221793e-05, "epoch": 0.2798237885462555, "percentage": 13.99, "elapsed_time": "1:10:01", "remaining_time": "7:10:33"} +{"current_steps": 795, "total_steps": 5676, "loss": 0.7607219815254211, "lr": 1.956175956434702e-05, "epoch": 0.2801762114537445, "percentage": 14.01, "elapsed_time": "1:10:07", "remaining_time": "7:10:33"} +{"current_steps": 796, "total_steps": 5676, "loss": 0.8793845176696777, "lr": 1.9560052015552455e-05, "epoch": 0.2805286343612335, "percentage": 14.02, "elapsed_time": "1:10:12", "remaining_time": "7:10:23"} +{"current_steps": 797, "total_steps": 5676, "loss": 0.8314816951751709, "lr": 1.9558341221417744e-05, "epoch": 0.2808810572687225, "percentage": 14.04, "elapsed_time": "1:10:17", "remaining_time": "7:10:19"} +{"current_steps": 798, "total_steps": 5676, "loss": 0.8195264339447021, "lr": 1.9556627182523656e-05, "epoch": 0.28123348017621147, "percentage": 14.06, "elapsed_time": "1:10:23", "remaining_time": "7:10:17"} +{"current_steps": 799, "total_steps": 5676, "loss": 0.8079999685287476, "lr": 1.9554909899452055e-05, "epoch": 0.28158590308370046, "percentage": 14.08, "elapsed_time": "1:10:29", "remaining_time": "7:10:19"} +{"current_steps": 800, "total_steps": 5676, "loss": 0.7614034414291382, "lr": 1.9553189372785903e-05, "epoch": 0.28193832599118945, "percentage": 14.09, "elapsed_time": "1:10:34", "remaining_time": "7:10:10"} +{"current_steps": 801, "total_steps": 5676, "loss": 0.6271458268165588, "lr": 1.9551465603109263e-05, "epoch": 0.2822907488986784, "percentage": 14.11, "elapsed_time": "1:10:44", "remaining_time": "7:10:32"} +{"current_steps": 802, "total_steps": 5676, "loss": 0.8061915040016174, "lr": 1.9549738591007302e-05, "epoch": 0.2826431718061674, "percentage": 14.13, "elapsed_time": "1:10:51", "remaining_time": "7:10:34"} +{"current_steps": 803, "total_steps": 5676, "loss": 0.663912296295166, "lr": 1.9548008337066294e-05, "epoch": 0.2829955947136564, "percentage": 14.15, "elapsed_time": "1:10:55", "remaining_time": "7:10:23"} +{"current_steps": 804, "total_steps": 5676, "loss": 0.7582170963287354, "lr": 1.9546274841873597e-05, "epoch": 0.28334801762114536, "percentage": 14.16, "elapsed_time": "1:10:59", "remaining_time": "7:10:10"} +{"current_steps": 805, "total_steps": 5676, "loss": 0.7855465412139893, "lr": 1.9544538106017682e-05, "epoch": 0.28370044052863436, "percentage": 14.18, "elapsed_time": "1:11:04", "remaining_time": "7:10:02"} +{"current_steps": 806, "total_steps": 5676, "loss": 0.6976481676101685, "lr": 1.9542798130088116e-05, "epoch": 0.28405286343612335, "percentage": 14.2, "elapsed_time": "1:11:09", "remaining_time": "7:09:55"} +{"current_steps": 807, "total_steps": 5676, "loss": 0.7678342461585999, "lr": 1.954105491467557e-05, "epoch": 0.28440528634361234, "percentage": 14.22, "elapsed_time": "1:11:15", "remaining_time": "7:09:55"} +{"current_steps": 808, "total_steps": 5676, "loss": 0.6238858699798584, "lr": 1.9539308460371812e-05, "epoch": 0.28475770925110133, "percentage": 14.24, "elapsed_time": "1:11:20", "remaining_time": "7:09:46"} +{"current_steps": 809, "total_steps": 5676, "loss": 0.7756681442260742, "lr": 1.95375587677697e-05, "epoch": 0.2851101321585903, "percentage": 14.25, "elapsed_time": "1:11:26", "remaining_time": "7:09:48"} +{"current_steps": 810, "total_steps": 5676, "loss": 0.8908202648162842, "lr": 1.953580583746321e-05, "epoch": 0.2854625550660793, "percentage": 14.27, "elapsed_time": "1:11:32", "remaining_time": "7:09:45"} +{"current_steps": 811, "total_steps": 5676, "loss": 0.6769838929176331, "lr": 1.9534049670047402e-05, "epoch": 0.2858149779735683, "percentage": 14.29, "elapsed_time": "1:11:38", "remaining_time": "7:09:44"} +{"current_steps": 812, "total_steps": 5676, "loss": 0.8452527523040771, "lr": 1.953229026611844e-05, "epoch": 0.28616740088105724, "percentage": 14.31, "elapsed_time": "1:11:43", "remaining_time": "7:09:40"} +{"current_steps": 813, "total_steps": 5676, "loss": 0.7494348287582397, "lr": 1.9530527626273592e-05, "epoch": 0.28651982378854624, "percentage": 14.32, "elapsed_time": "1:11:50", "remaining_time": "7:09:42"} +{"current_steps": 814, "total_steps": 5676, "loss": 0.7691028714179993, "lr": 1.9528761751111215e-05, "epoch": 0.2868722466960352, "percentage": 14.34, "elapsed_time": "1:11:55", "remaining_time": "7:09:38"} +{"current_steps": 815, "total_steps": 5676, "loss": 0.6854703426361084, "lr": 1.9526992641230768e-05, "epoch": 0.2872246696035242, "percentage": 14.36, "elapsed_time": "1:12:02", "remaining_time": "7:09:38"} +{"current_steps": 816, "total_steps": 5676, "loss": 0.7520424127578735, "lr": 1.9525220297232815e-05, "epoch": 0.2875770925110132, "percentage": 14.38, "elapsed_time": "1:12:07", "remaining_time": "7:09:35"} +{"current_steps": 817, "total_steps": 5676, "loss": 0.7894444465637207, "lr": 1.9523444719719003e-05, "epoch": 0.2879295154185022, "percentage": 14.39, "elapsed_time": "1:12:12", "remaining_time": "7:09:24"} +{"current_steps": 818, "total_steps": 5676, "loss": 0.7835032939910889, "lr": 1.952166590929209e-05, "epoch": 0.2882819383259912, "percentage": 14.41, "elapsed_time": "1:12:16", "remaining_time": "7:09:15"} +{"current_steps": 819, "total_steps": 5676, "loss": 0.7932062149047852, "lr": 1.9519883866555928e-05, "epoch": 0.2886343612334802, "percentage": 14.43, "elapsed_time": "1:12:21", "remaining_time": "7:09:09"} +{"current_steps": 820, "total_steps": 5676, "loss": 0.7917006015777588, "lr": 1.951809859211546e-05, "epoch": 0.2889867841409692, "percentage": 14.45, "elapsed_time": "1:12:27", "remaining_time": "7:09:06"} +{"current_steps": 821, "total_steps": 5676, "loss": 0.5330606698989868, "lr": 1.9516310086576734e-05, "epoch": 0.28933920704845817, "percentage": 14.46, "elapsed_time": "1:12:33", "remaining_time": "7:09:04"} +{"current_steps": 822, "total_steps": 5676, "loss": 0.7243788242340088, "lr": 1.9514518350546893e-05, "epoch": 0.28969162995594716, "percentage": 14.48, "elapsed_time": "1:12:38", "remaining_time": "7:08:57"} +{"current_steps": 823, "total_steps": 5676, "loss": 0.7692278623580933, "lr": 1.9512723384634175e-05, "epoch": 0.2900440528634361, "percentage": 14.5, "elapsed_time": "1:12:43", "remaining_time": "7:08:53"} +{"current_steps": 824, "total_steps": 5676, "loss": 0.7537804841995239, "lr": 1.9510925189447916e-05, "epoch": 0.2903964757709251, "percentage": 14.52, "elapsed_time": "1:12:48", "remaining_time": "7:08:41"} +{"current_steps": 825, "total_steps": 5676, "loss": 0.9168751239776611, "lr": 1.9509123765598545e-05, "epoch": 0.2907488986784141, "percentage": 14.53, "elapsed_time": "1:12:53", "remaining_time": "7:08:33"} +{"current_steps": 826, "total_steps": 5676, "loss": 0.7863682508468628, "lr": 1.9507319113697592e-05, "epoch": 0.2911013215859031, "percentage": 14.55, "elapsed_time": "1:12:58", "remaining_time": "7:08:27"} +{"current_steps": 827, "total_steps": 5676, "loss": 0.7119239568710327, "lr": 1.9505511234357677e-05, "epoch": 0.29145374449339206, "percentage": 14.57, "elapsed_time": "1:13:03", "remaining_time": "7:08:19"} +{"current_steps": 828, "total_steps": 5676, "loss": 0.6071019172668457, "lr": 1.950370012819252e-05, "epoch": 0.29180616740088106, "percentage": 14.59, "elapsed_time": "1:13:08", "remaining_time": "7:08:13"} +{"current_steps": 829, "total_steps": 5676, "loss": 0.9750580787658691, "lr": 1.9501885795816937e-05, "epoch": 0.29215859030837005, "percentage": 14.61, "elapsed_time": "1:13:14", "remaining_time": "7:08:12"} +{"current_steps": 830, "total_steps": 5676, "loss": 0.7465370297431946, "lr": 1.9500068237846837e-05, "epoch": 0.29251101321585904, "percentage": 14.62, "elapsed_time": "1:13:19", "remaining_time": "7:08:08"} +{"current_steps": 831, "total_steps": 5676, "loss": 0.7821183204650879, "lr": 1.949824745489922e-05, "epoch": 0.29286343612334803, "percentage": 14.64, "elapsed_time": "1:13:25", "remaining_time": "7:08:04"} +{"current_steps": 832, "total_steps": 5676, "loss": 0.7555009126663208, "lr": 1.949642344759219e-05, "epoch": 0.293215859030837, "percentage": 14.66, "elapsed_time": "1:13:29", "remaining_time": "7:07:55"} +{"current_steps": 833, "total_steps": 5676, "loss": 0.841058075428009, "lr": 1.9494596216544942e-05, "epoch": 0.293568281938326, "percentage": 14.68, "elapsed_time": "1:13:35", "remaining_time": "7:07:49"} +{"current_steps": 834, "total_steps": 5676, "loss": 0.737910270690918, "lr": 1.9492765762377762e-05, "epoch": 0.29392070484581495, "percentage": 14.69, "elapsed_time": "1:13:40", "remaining_time": "7:07:41"} +{"current_steps": 835, "total_steps": 5676, "loss": 0.6817367076873779, "lr": 1.9490932085712027e-05, "epoch": 0.29427312775330394, "percentage": 14.71, "elapsed_time": "1:13:46", "remaining_time": "7:07:41"} +{"current_steps": 836, "total_steps": 5676, "loss": 0.6739218235015869, "lr": 1.9489095187170218e-05, "epoch": 0.29462555066079293, "percentage": 14.73, "elapsed_time": "1:13:51", "remaining_time": "7:07:35"} +{"current_steps": 837, "total_steps": 5676, "loss": 0.8632504940032959, "lr": 1.9487255067375907e-05, "epoch": 0.2949779735682819, "percentage": 14.75, "elapsed_time": "1:13:56", "remaining_time": "7:07:26"} +{"current_steps": 838, "total_steps": 5676, "loss": 0.6615850925445557, "lr": 1.9485411726953753e-05, "epoch": 0.2953303964757709, "percentage": 14.76, "elapsed_time": "1:14:01", "remaining_time": "7:07:23"} +{"current_steps": 839, "total_steps": 5676, "loss": 0.8647087812423706, "lr": 1.9483565166529515e-05, "epoch": 0.2956828193832599, "percentage": 14.78, "elapsed_time": "1:14:07", "remaining_time": "7:07:18"} +{"current_steps": 840, "total_steps": 5676, "loss": 0.5152087807655334, "lr": 1.9481715386730044e-05, "epoch": 0.2960352422907489, "percentage": 14.8, "elapsed_time": "1:14:13", "remaining_time": "7:07:17"} +{"current_steps": 841, "total_steps": 5676, "loss": 0.7942806482315063, "lr": 1.9479862388183283e-05, "epoch": 0.2963876651982379, "percentage": 14.82, "elapsed_time": "1:14:17", "remaining_time": "7:07:08"} +{"current_steps": 842, "total_steps": 5676, "loss": 0.6364283561706543, "lr": 1.947800617151826e-05, "epoch": 0.2967400881057269, "percentage": 14.83, "elapsed_time": "1:14:22", "remaining_time": "7:06:57"} +{"current_steps": 843, "total_steps": 5676, "loss": 0.8278179168701172, "lr": 1.9476146737365112e-05, "epoch": 0.2970925110132159, "percentage": 14.85, "elapsed_time": "1:14:27", "remaining_time": "7:06:52"} +{"current_steps": 844, "total_steps": 5676, "loss": 0.7369956970214844, "lr": 1.9474284086355057e-05, "epoch": 0.29744493392070487, "percentage": 14.87, "elapsed_time": "1:14:32", "remaining_time": "7:06:47"} +{"current_steps": 845, "total_steps": 5676, "loss": 0.6879928112030029, "lr": 1.9472418219120403e-05, "epoch": 0.29779735682819386, "percentage": 14.89, "elapsed_time": "1:14:38", "remaining_time": "7:06:41"} +{"current_steps": 846, "total_steps": 5676, "loss": 0.8312973976135254, "lr": 1.9470549136294554e-05, "epoch": 0.2981497797356828, "percentage": 14.9, "elapsed_time": "1:14:42", "remaining_time": "7:06:32"} +{"current_steps": 847, "total_steps": 5676, "loss": 0.8102964162826538, "lr": 1.946867683851201e-05, "epoch": 0.2985022026431718, "percentage": 14.92, "elapsed_time": "1:14:47", "remaining_time": "7:06:23"} +{"current_steps": 848, "total_steps": 5676, "loss": 0.6136792898178101, "lr": 1.9466801326408355e-05, "epoch": 0.2988546255506608, "percentage": 14.94, "elapsed_time": "1:14:51", "remaining_time": "7:06:09"} +{"current_steps": 849, "total_steps": 5676, "loss": 0.6388760805130005, "lr": 1.946492260062027e-05, "epoch": 0.29920704845814977, "percentage": 14.96, "elapsed_time": "1:14:55", "remaining_time": "7:06:00"} +{"current_steps": 850, "total_steps": 5676, "loss": 0.6443628072738647, "lr": 1.9463040661785516e-05, "epoch": 0.29955947136563876, "percentage": 14.98, "elapsed_time": "1:15:00", "remaining_time": "7:05:54"} +{"current_steps": 851, "total_steps": 5676, "loss": 0.7763667702674866, "lr": 1.9461155510542962e-05, "epoch": 0.29991189427312775, "percentage": 14.99, "elapsed_time": "1:15:06", "remaining_time": "7:05:52"} +{"current_steps": 852, "total_steps": 5676, "loss": 0.8040921688079834, "lr": 1.9459267147532555e-05, "epoch": 0.30026431718061675, "percentage": 15.01, "elapsed_time": "1:15:12", "remaining_time": "7:05:51"} +{"current_steps": 853, "total_steps": 5676, "loss": 0.6271079778671265, "lr": 1.9457375573395334e-05, "epoch": 0.30061674008810574, "percentage": 15.03, "elapsed_time": "1:15:17", "remaining_time": "7:05:44"} +{"current_steps": 854, "total_steps": 5676, "loss": 0.6970022916793823, "lr": 1.945548078877343e-05, "epoch": 0.30096916299559473, "percentage": 15.05, "elapsed_time": "1:15:22", "remaining_time": "7:05:35"} +{"current_steps": 855, "total_steps": 5676, "loss": 0.8283002972602844, "lr": 1.9453582794310063e-05, "epoch": 0.3013215859030837, "percentage": 15.06, "elapsed_time": "1:15:28", "remaining_time": "7:05:32"} +{"current_steps": 856, "total_steps": 5676, "loss": 0.7989551424980164, "lr": 1.9451681590649545e-05, "epoch": 0.3016740088105727, "percentage": 15.08, "elapsed_time": "1:15:33", "remaining_time": "7:05:28"} +{"current_steps": 857, "total_steps": 5676, "loss": 0.7000687122344971, "lr": 1.9449777178437274e-05, "epoch": 0.30202643171806165, "percentage": 15.1, "elapsed_time": "1:15:39", "remaining_time": "7:05:25"} +{"current_steps": 858, "total_steps": 5676, "loss": 0.8005126714706421, "lr": 1.944786955831974e-05, "epoch": 0.30237885462555064, "percentage": 15.12, "elapsed_time": "1:15:43", "remaining_time": "7:05:15"} +{"current_steps": 859, "total_steps": 5676, "loss": 0.7060712575912476, "lr": 1.9445958730944515e-05, "epoch": 0.30273127753303963, "percentage": 15.13, "elapsed_time": "1:15:48", "remaining_time": "7:05:07"} +{"current_steps": 860, "total_steps": 5676, "loss": 0.6979726552963257, "lr": 1.9444044696960277e-05, "epoch": 0.3030837004405286, "percentage": 15.15, "elapsed_time": "1:15:54", "remaining_time": "7:05:03"} +{"current_steps": 861, "total_steps": 5676, "loss": 0.7916465401649475, "lr": 1.9442127457016768e-05, "epoch": 0.3034361233480176, "percentage": 15.17, "elapsed_time": "1:16:00", "remaining_time": "7:05:01"} +{"current_steps": 862, "total_steps": 5676, "loss": 0.6980502009391785, "lr": 1.944020701176484e-05, "epoch": 0.3037885462555066, "percentage": 15.19, "elapsed_time": "1:16:05", "remaining_time": "7:04:55"} +{"current_steps": 863, "total_steps": 5676, "loss": 0.8479218482971191, "lr": 1.943828336185642e-05, "epoch": 0.3041409691629956, "percentage": 15.2, "elapsed_time": "1:16:09", "remaining_time": "7:04:42"} +{"current_steps": 864, "total_steps": 5676, "loss": 0.8374297022819519, "lr": 1.9436356507944532e-05, "epoch": 0.3044933920704846, "percentage": 15.22, "elapsed_time": "1:16:14", "remaining_time": "7:04:35"} +{"current_steps": 865, "total_steps": 5676, "loss": 0.6871248483657837, "lr": 1.943442645068328e-05, "epoch": 0.3048458149779736, "percentage": 15.24, "elapsed_time": "1:16:20", "remaining_time": "7:04:36"} +{"current_steps": 866, "total_steps": 5676, "loss": 0.92267906665802, "lr": 1.9432493190727854e-05, "epoch": 0.3051982378854626, "percentage": 15.26, "elapsed_time": "1:16:25", "remaining_time": "7:04:27"} +{"current_steps": 867, "total_steps": 5676, "loss": 0.7068654298782349, "lr": 1.9430556728734543e-05, "epoch": 0.30555066079295157, "percentage": 15.27, "elapsed_time": "1:16:29", "remaining_time": "7:04:18"} +{"current_steps": 868, "total_steps": 5676, "loss": 0.830272912979126, "lr": 1.942861706536071e-05, "epoch": 0.3059030837004405, "percentage": 15.29, "elapsed_time": "1:16:34", "remaining_time": "7:04:11"} +{"current_steps": 869, "total_steps": 5676, "loss": 0.7996113300323486, "lr": 1.9426674201264814e-05, "epoch": 0.3062555066079295, "percentage": 15.31, "elapsed_time": "1:16:39", "remaining_time": "7:04:04"} +{"current_steps": 870, "total_steps": 5676, "loss": 0.7519441843032837, "lr": 1.9424728137106398e-05, "epoch": 0.3066079295154185, "percentage": 15.33, "elapsed_time": "1:16:44", "remaining_time": "7:03:57"} +{"current_steps": 871, "total_steps": 5676, "loss": 0.5812790393829346, "lr": 1.9422778873546084e-05, "epoch": 0.3069603524229075, "percentage": 15.35, "elapsed_time": "1:16:50", "remaining_time": "7:03:53"} +{"current_steps": 872, "total_steps": 5676, "loss": 0.5953323841094971, "lr": 1.9420826411245595e-05, "epoch": 0.30731277533039647, "percentage": 15.36, "elapsed_time": "1:16:56", "remaining_time": "7:03:52"} +{"current_steps": 873, "total_steps": 5676, "loss": 0.8307937979698181, "lr": 1.941887075086772e-05, "epoch": 0.30766519823788546, "percentage": 15.38, "elapsed_time": "1:17:00", "remaining_time": "7:03:41"} +{"current_steps": 874, "total_steps": 5676, "loss": 0.7753443121910095, "lr": 1.9416911893076358e-05, "epoch": 0.30801762114537445, "percentage": 15.4, "elapsed_time": "1:17:05", "remaining_time": "7:03:33"} +{"current_steps": 875, "total_steps": 5676, "loss": 0.8803520798683167, "lr": 1.9414949838536468e-05, "epoch": 0.30837004405286345, "percentage": 15.42, "elapsed_time": "1:17:10", "remaining_time": "7:03:26"} +{"current_steps": 876, "total_steps": 5676, "loss": 0.6811587810516357, "lr": 1.9412984587914115e-05, "epoch": 0.30872246696035244, "percentage": 15.43, "elapsed_time": "1:17:17", "remaining_time": "7:03:29"} +{"current_steps": 877, "total_steps": 5676, "loss": 0.802099347114563, "lr": 1.9411016141876438e-05, "epoch": 0.30907488986784143, "percentage": 15.45, "elapsed_time": "1:17:21", "remaining_time": "7:03:16"} +{"current_steps": 878, "total_steps": 5676, "loss": 0.7325229644775391, "lr": 1.940904450109166e-05, "epoch": 0.3094273127753304, "percentage": 15.47, "elapsed_time": "1:17:26", "remaining_time": "7:03:11"} +{"current_steps": 879, "total_steps": 5676, "loss": 0.6515973210334778, "lr": 1.9407069666229097e-05, "epoch": 0.30977973568281936, "percentage": 15.49, "elapsed_time": "1:17:32", "remaining_time": "7:03:10"} +{"current_steps": 880, "total_steps": 5676, "loss": 0.7314589619636536, "lr": 1.9405091637959138e-05, "epoch": 0.31013215859030835, "percentage": 15.5, "elapsed_time": "1:17:37", "remaining_time": "7:03:05"} +{"current_steps": 881, "total_steps": 5676, "loss": 0.6668078303337097, "lr": 1.9403110416953267e-05, "epoch": 0.31048458149779734, "percentage": 15.52, "elapsed_time": "1:17:42", "remaining_time": "7:02:57"} +{"current_steps": 882, "total_steps": 5676, "loss": 0.693236231803894, "lr": 1.9401126003884047e-05, "epoch": 0.31083700440528633, "percentage": 15.54, "elapsed_time": "1:17:47", "remaining_time": "7:02:51"} +{"current_steps": 883, "total_steps": 5676, "loss": 0.8242754936218262, "lr": 1.939913839942512e-05, "epoch": 0.3111894273127753, "percentage": 15.56, "elapsed_time": "1:17:52", "remaining_time": "7:02:44"} +{"current_steps": 884, "total_steps": 5676, "loss": 0.7776592373847961, "lr": 1.939714760425122e-05, "epoch": 0.3115418502202643, "percentage": 15.57, "elapsed_time": "1:17:59", "remaining_time": "7:02:45"} +{"current_steps": 885, "total_steps": 5676, "loss": 0.7023555636405945, "lr": 1.9395153619038158e-05, "epoch": 0.3118942731277533, "percentage": 15.59, "elapsed_time": "1:18:04", "remaining_time": "7:02:37"} +{"current_steps": 886, "total_steps": 5676, "loss": 0.690382182598114, "lr": 1.939315644446283e-05, "epoch": 0.3122466960352423, "percentage": 15.61, "elapsed_time": "1:18:09", "remaining_time": "7:02:30"} +{"current_steps": 887, "total_steps": 5676, "loss": 0.7590082287788391, "lr": 1.9391156081203214e-05, "epoch": 0.3125991189427313, "percentage": 15.63, "elapsed_time": "1:18:14", "remaining_time": "7:02:24"} +{"current_steps": 888, "total_steps": 5676, "loss": 0.7378168702125549, "lr": 1.9389152529938377e-05, "epoch": 0.3129515418502203, "percentage": 15.64, "elapsed_time": "1:18:18", "remaining_time": "7:02:14"} +{"current_steps": 889, "total_steps": 5676, "loss": 0.7036890983581543, "lr": 1.938714579134845e-05, "epoch": 0.3133039647577093, "percentage": 15.66, "elapsed_time": "1:18:24", "remaining_time": "7:02:10"} +{"current_steps": 890, "total_steps": 5676, "loss": 0.8881829977035522, "lr": 1.938513586611467e-05, "epoch": 0.3136563876651982, "percentage": 15.68, "elapsed_time": "1:18:28", "remaining_time": "7:01:59"} +{"current_steps": 891, "total_steps": 5676, "loss": 0.7467600107192993, "lr": 1.9383122754919342e-05, "epoch": 0.3140088105726872, "percentage": 15.7, "elapsed_time": "1:18:34", "remaining_time": "7:01:59"} +{"current_steps": 892, "total_steps": 5676, "loss": 0.9358077049255371, "lr": 1.938110645844585e-05, "epoch": 0.3143612334801762, "percentage": 15.72, "elapsed_time": "1:18:40", "remaining_time": "7:01:58"} +{"current_steps": 893, "total_steps": 5676, "loss": 0.7751256227493286, "lr": 1.9379086977378664e-05, "epoch": 0.3147136563876652, "percentage": 15.73, "elapsed_time": "1:18:45", "remaining_time": "7:01:49"} +{"current_steps": 894, "total_steps": 5676, "loss": 0.8020666837692261, "lr": 1.9377064312403338e-05, "epoch": 0.3150660792951542, "percentage": 15.75, "elapsed_time": "1:18:49", "remaining_time": "7:01:36"} +{"current_steps": 895, "total_steps": 5676, "loss": 0.7251513004302979, "lr": 1.9375038464206507e-05, "epoch": 0.31541850220264317, "percentage": 15.77, "elapsed_time": "1:18:54", "remaining_time": "7:01:30"} +{"current_steps": 896, "total_steps": 5676, "loss": 0.7163990139961243, "lr": 1.9373009433475874e-05, "epoch": 0.31577092511013216, "percentage": 15.79, "elapsed_time": "1:19:00", "remaining_time": "7:01:27"} +{"current_steps": 897, "total_steps": 5676, "loss": 0.7208842039108276, "lr": 1.937097722090024e-05, "epoch": 0.31612334801762115, "percentage": 15.8, "elapsed_time": "1:19:05", "remaining_time": "7:01:22"} +{"current_steps": 898, "total_steps": 5676, "loss": 0.7660849690437317, "lr": 1.9368941827169475e-05, "epoch": 0.31647577092511014, "percentage": 15.82, "elapsed_time": "1:19:10", "remaining_time": "7:01:17"} +{"current_steps": 899, "total_steps": 5676, "loss": 0.7017598152160645, "lr": 1.9366903252974532e-05, "epoch": 0.31682819383259914, "percentage": 15.84, "elapsed_time": "1:19:14", "remaining_time": "7:01:04"} +{"current_steps": 900, "total_steps": 5676, "loss": 0.6831692457199097, "lr": 1.9364861499007443e-05, "epoch": 0.31718061674008813, "percentage": 15.86, "elapsed_time": "1:19:21", "remaining_time": "7:01:05"} +{"current_steps": 901, "total_steps": 5676, "loss": 0.6555520296096802, "lr": 1.936281656596132e-05, "epoch": 0.3175330396475771, "percentage": 15.87, "elapsed_time": "1:19:30", "remaining_time": "7:01:20"} +{"current_steps": 902, "total_steps": 5676, "loss": 0.7401334047317505, "lr": 1.9360768454530356e-05, "epoch": 0.31788546255506606, "percentage": 15.89, "elapsed_time": "1:19:35", "remaining_time": "7:01:14"} +{"current_steps": 903, "total_steps": 5676, "loss": 0.7415893077850342, "lr": 1.935871716540982e-05, "epoch": 0.31823788546255505, "percentage": 15.91, "elapsed_time": "1:19:40", "remaining_time": "7:01:07"} +{"current_steps": 904, "total_steps": 5676, "loss": 0.8254752159118652, "lr": 1.935666269929606e-05, "epoch": 0.31859030837004404, "percentage": 15.93, "elapsed_time": "1:19:45", "remaining_time": "7:01:02"} +{"current_steps": 905, "total_steps": 5676, "loss": 0.708149254322052, "lr": 1.9354605056886505e-05, "epoch": 0.31894273127753303, "percentage": 15.94, "elapsed_time": "1:19:52", "remaining_time": "7:01:04"} +{"current_steps": 906, "total_steps": 5676, "loss": 0.8084006905555725, "lr": 1.9352544238879654e-05, "epoch": 0.319295154185022, "percentage": 15.96, "elapsed_time": "1:19:58", "remaining_time": "7:01:01"} +{"current_steps": 907, "total_steps": 5676, "loss": 0.8039542436599731, "lr": 1.93504802459751e-05, "epoch": 0.319647577092511, "percentage": 15.98, "elapsed_time": "1:20:03", "remaining_time": "7:00:58"} +{"current_steps": 908, "total_steps": 5676, "loss": 0.7563241720199585, "lr": 1.93484130788735e-05, "epoch": 0.32, "percentage": 16.0, "elapsed_time": "1:20:08", "remaining_time": "7:00:52"} +{"current_steps": 909, "total_steps": 5676, "loss": 0.7972971200942993, "lr": 1.9346342738276593e-05, "epoch": 0.320352422907489, "percentage": 16.01, "elapsed_time": "1:20:14", "remaining_time": "7:00:47"} +{"current_steps": 910, "total_steps": 5676, "loss": 0.6693121790885925, "lr": 1.93442692248872e-05, "epoch": 0.320704845814978, "percentage": 16.03, "elapsed_time": "1:20:18", "remaining_time": "7:00:37"} +{"current_steps": 911, "total_steps": 5676, "loss": 0.6597858667373657, "lr": 1.9342192539409203e-05, "epoch": 0.321057268722467, "percentage": 16.05, "elapsed_time": "1:20:23", "remaining_time": "7:00:31"} +{"current_steps": 912, "total_steps": 5676, "loss": 0.6771499514579773, "lr": 1.934011268254758e-05, "epoch": 0.321409691629956, "percentage": 16.07, "elapsed_time": "1:20:28", "remaining_time": "7:00:22"} +{"current_steps": 913, "total_steps": 5676, "loss": 0.6903397440910339, "lr": 1.9338029655008375e-05, "epoch": 0.3217621145374449, "percentage": 16.09, "elapsed_time": "1:20:33", "remaining_time": "7:00:15"} +{"current_steps": 914, "total_steps": 5676, "loss": 0.6287999153137207, "lr": 1.9335943457498717e-05, "epoch": 0.3221145374449339, "percentage": 16.1, "elapsed_time": "1:20:39", "remaining_time": "7:00:16"} +{"current_steps": 915, "total_steps": 5676, "loss": 0.7199264764785767, "lr": 1.93338540907268e-05, "epoch": 0.3224669603524229, "percentage": 16.12, "elapsed_time": "1:20:44", "remaining_time": "7:00:07"} +{"current_steps": 916, "total_steps": 5676, "loss": 0.6960160732269287, "lr": 1.9331761555401896e-05, "epoch": 0.3228193832599119, "percentage": 16.14, "elapsed_time": "1:20:49", "remaining_time": "7:00:01"} +{"current_steps": 917, "total_steps": 5676, "loss": 0.8981958627700806, "lr": 1.932966585223436e-05, "epoch": 0.3231718061674009, "percentage": 16.16, "elapsed_time": "1:20:54", "remaining_time": "6:59:51"} +{"current_steps": 918, "total_steps": 5676, "loss": 0.786432147026062, "lr": 1.932756698193562e-05, "epoch": 0.32352422907488987, "percentage": 16.17, "elapsed_time": "1:20:59", "remaining_time": "6:59:47"} +{"current_steps": 919, "total_steps": 5676, "loss": 0.7260904312133789, "lr": 1.9325464945218172e-05, "epoch": 0.32387665198237886, "percentage": 16.19, "elapsed_time": "1:21:04", "remaining_time": "6:59:42"} +{"current_steps": 920, "total_steps": 5676, "loss": 0.715835452079773, "lr": 1.9323359742795595e-05, "epoch": 0.32422907488986785, "percentage": 16.21, "elapsed_time": "1:21:10", "remaining_time": "6:59:39"} +{"current_steps": 921, "total_steps": 5676, "loss": 0.6312157511711121, "lr": 1.932125137538254e-05, "epoch": 0.32458149779735684, "percentage": 16.23, "elapsed_time": "1:21:14", "remaining_time": "6:59:27"} +{"current_steps": 922, "total_steps": 5676, "loss": 0.7565821409225464, "lr": 1.931913984369473e-05, "epoch": 0.32493392070484584, "percentage": 16.24, "elapsed_time": "1:21:20", "remaining_time": "6:59:22"} +{"current_steps": 923, "total_steps": 5676, "loss": 0.6866531372070312, "lr": 1.931702514844896e-05, "epoch": 0.3252863436123348, "percentage": 16.26, "elapsed_time": "1:21:25", "remaining_time": "6:59:18"} +{"current_steps": 924, "total_steps": 5676, "loss": 0.879021167755127, "lr": 1.9314907290363117e-05, "epoch": 0.32563876651982376, "percentage": 16.28, "elapsed_time": "1:21:31", "remaining_time": "6:59:14"} +{"current_steps": 925, "total_steps": 5676, "loss": 0.6972150802612305, "lr": 1.9312786270156135e-05, "epoch": 0.32599118942731276, "percentage": 16.3, "elapsed_time": "1:21:35", "remaining_time": "6:59:02"} +{"current_steps": 926, "total_steps": 5676, "loss": 0.8735189437866211, "lr": 1.9310662088548042e-05, "epoch": 0.32634361233480175, "percentage": 16.31, "elapsed_time": "1:21:39", "remaining_time": "6:58:53"} +{"current_steps": 927, "total_steps": 5676, "loss": 0.6114254593849182, "lr": 1.930853474625993e-05, "epoch": 0.32669603524229074, "percentage": 16.33, "elapsed_time": "1:21:45", "remaining_time": "6:58:51"} +{"current_steps": 928, "total_steps": 5676, "loss": 0.8032322525978088, "lr": 1.930640424401396e-05, "epoch": 0.32704845814977973, "percentage": 16.35, "elapsed_time": "1:21:51", "remaining_time": "6:58:49"} +{"current_steps": 929, "total_steps": 5676, "loss": 0.7391160726547241, "lr": 1.9304270582533376e-05, "epoch": 0.3274008810572687, "percentage": 16.37, "elapsed_time": "1:21:55", "remaining_time": "6:58:36"} +{"current_steps": 930, "total_steps": 5676, "loss": 0.7055366039276123, "lr": 1.930213376254249e-05, "epoch": 0.3277533039647577, "percentage": 16.38, "elapsed_time": "1:22:02", "remaining_time": "6:58:38"} +{"current_steps": 931, "total_steps": 5676, "loss": 0.671670138835907, "lr": 1.9299993784766684e-05, "epoch": 0.3281057268722467, "percentage": 16.4, "elapsed_time": "1:22:07", "remaining_time": "6:58:34"} +{"current_steps": 932, "total_steps": 5676, "loss": 0.7486976385116577, "lr": 1.9297850649932416e-05, "epoch": 0.3284581497797357, "percentage": 16.42, "elapsed_time": "1:22:13", "remaining_time": "6:58:31"} +{"current_steps": 933, "total_steps": 5676, "loss": 0.8767625093460083, "lr": 1.929570435876721e-05, "epoch": 0.3288105726872247, "percentage": 16.44, "elapsed_time": "1:22:19", "remaining_time": "6:58:32"} +{"current_steps": 934, "total_steps": 5676, "loss": 0.6841862797737122, "lr": 1.929355491199967e-05, "epoch": 0.3291629955947137, "percentage": 16.46, "elapsed_time": "1:22:25", "remaining_time": "6:58:29"} +{"current_steps": 935, "total_steps": 5676, "loss": 0.7745054960250854, "lr": 1.929140231035946e-05, "epoch": 0.3295154185022026, "percentage": 16.47, "elapsed_time": "1:22:31", "remaining_time": "6:58:24"} +{"current_steps": 936, "total_steps": 5676, "loss": 0.5879434943199158, "lr": 1.928924655457733e-05, "epoch": 0.3298678414096916, "percentage": 16.49, "elapsed_time": "1:22:37", "remaining_time": "6:58:25"} +{"current_steps": 937, "total_steps": 5676, "loss": 0.8484170436859131, "lr": 1.9287087645385084e-05, "epoch": 0.3302202643171806, "percentage": 16.51, "elapsed_time": "1:22:41", "remaining_time": "6:58:15"} +{"current_steps": 938, "total_steps": 5676, "loss": 0.6518877148628235, "lr": 1.9284925583515604e-05, "epoch": 0.3305726872246696, "percentage": 16.53, "elapsed_time": "1:22:46", "remaining_time": "6:58:08"} +{"current_steps": 939, "total_steps": 5676, "loss": 0.7694787383079529, "lr": 1.928276036970285e-05, "epoch": 0.3309251101321586, "percentage": 16.54, "elapsed_time": "1:22:53", "remaining_time": "6:58:10"} +{"current_steps": 940, "total_steps": 5676, "loss": 0.6893239617347717, "lr": 1.928059200468184e-05, "epoch": 0.3312775330396476, "percentage": 16.56, "elapsed_time": "1:22:59", "remaining_time": "6:58:09"} +{"current_steps": 941, "total_steps": 5676, "loss": 0.7731181383132935, "lr": 1.927842048918867e-05, "epoch": 0.33162995594713657, "percentage": 16.58, "elapsed_time": "1:23:04", "remaining_time": "6:58:03"} +{"current_steps": 942, "total_steps": 5676, "loss": 0.652579665184021, "lr": 1.9276245823960495e-05, "epoch": 0.33198237885462556, "percentage": 16.6, "elapsed_time": "1:23:09", "remaining_time": "6:57:52"} +{"current_steps": 943, "total_steps": 5676, "loss": 0.7504575252532959, "lr": 1.927406800973555e-05, "epoch": 0.33233480176211455, "percentage": 16.61, "elapsed_time": "1:23:13", "remaining_time": "6:57:44"} +{"current_steps": 944, "total_steps": 5676, "loss": 0.6199444532394409, "lr": 1.927188704725314e-05, "epoch": 0.33268722466960354, "percentage": 16.63, "elapsed_time": "1:23:20", "remaining_time": "6:57:47"} +{"current_steps": 945, "total_steps": 5676, "loss": 0.7452073693275452, "lr": 1.9269702937253623e-05, "epoch": 0.33303964757709253, "percentage": 16.65, "elapsed_time": "1:23:26", "remaining_time": "6:57:43"} +{"current_steps": 946, "total_steps": 5676, "loss": 0.7538012266159058, "lr": 1.926751568047845e-05, "epoch": 0.33339207048458147, "percentage": 16.67, "elapsed_time": "1:23:31", "remaining_time": "6:57:36"} +{"current_steps": 947, "total_steps": 5676, "loss": 0.6670408248901367, "lr": 1.9265325277670114e-05, "epoch": 0.33374449339207046, "percentage": 16.68, "elapsed_time": "1:23:36", "remaining_time": "6:57:31"} +{"current_steps": 948, "total_steps": 5676, "loss": 0.8060495853424072, "lr": 1.926313172957219e-05, "epoch": 0.33409691629955945, "percentage": 16.7, "elapsed_time": "1:23:42", "remaining_time": "6:57:26"} +{"current_steps": 949, "total_steps": 5676, "loss": 0.7494044303894043, "lr": 1.926093503692933e-05, "epoch": 0.33444933920704845, "percentage": 16.72, "elapsed_time": "1:23:47", "remaining_time": "6:57:20"} +{"current_steps": 950, "total_steps": 5676, "loss": 0.5751914978027344, "lr": 1.9258735200487235e-05, "epoch": 0.33480176211453744, "percentage": 16.74, "elapsed_time": "1:23:52", "remaining_time": "6:57:16"} +{"current_steps": 951, "total_steps": 5676, "loss": 0.7234281301498413, "lr": 1.9256532220992683e-05, "epoch": 0.33515418502202643, "percentage": 16.75, "elapsed_time": "1:23:58", "remaining_time": "6:57:15"} +{"current_steps": 952, "total_steps": 5676, "loss": 0.7721251249313354, "lr": 1.9254326099193515e-05, "epoch": 0.3355066079295154, "percentage": 16.77, "elapsed_time": "1:24:03", "remaining_time": "6:57:08"} +{"current_steps": 953, "total_steps": 5676, "loss": 0.7240835428237915, "lr": 1.925211683583864e-05, "epoch": 0.3358590308370044, "percentage": 16.79, "elapsed_time": "1:24:08", "remaining_time": "6:56:59"} +{"current_steps": 954, "total_steps": 5676, "loss": 0.6622776985168457, "lr": 1.9249904431678037e-05, "epoch": 0.3362114537444934, "percentage": 16.81, "elapsed_time": "1:24:12", "remaining_time": "6:56:48"} +{"current_steps": 955, "total_steps": 5676, "loss": 0.9682766199111938, "lr": 1.9247688887462747e-05, "epoch": 0.3365638766519824, "percentage": 16.83, "elapsed_time": "1:24:16", "remaining_time": "6:56:36"} +{"current_steps": 956, "total_steps": 5676, "loss": 0.8363134860992432, "lr": 1.9245470203944878e-05, "epoch": 0.3369162995594714, "percentage": 16.84, "elapsed_time": "1:24:21", "remaining_time": "6:56:30"} +{"current_steps": 957, "total_steps": 5676, "loss": 0.6530857086181641, "lr": 1.9243248381877605e-05, "epoch": 0.3372687224669604, "percentage": 16.86, "elapsed_time": "1:24:27", "remaining_time": "6:56:29"} +{"current_steps": 958, "total_steps": 5676, "loss": 0.5186585187911987, "lr": 1.924102342201517e-05, "epoch": 0.3376211453744493, "percentage": 16.88, "elapsed_time": "1:24:33", "remaining_time": "6:56:26"} +{"current_steps": 959, "total_steps": 5676, "loss": 0.6729516983032227, "lr": 1.9238795325112867e-05, "epoch": 0.3379735682819383, "percentage": 16.9, "elapsed_time": "1:24:38", "remaining_time": "6:56:20"} +{"current_steps": 960, "total_steps": 5676, "loss": 0.6991842985153198, "lr": 1.9236564091927083e-05, "epoch": 0.3383259911894273, "percentage": 16.91, "elapsed_time": "1:24:43", "remaining_time": "6:56:14"} +{"current_steps": 961, "total_steps": 5676, "loss": 0.7738245725631714, "lr": 1.9234329723215235e-05, "epoch": 0.3386784140969163, "percentage": 16.93, "elapsed_time": "1:24:47", "remaining_time": "6:56:03"} +{"current_steps": 962, "total_steps": 5676, "loss": 0.7027466893196106, "lr": 1.923209221973583e-05, "epoch": 0.3390308370044053, "percentage": 16.95, "elapsed_time": "1:24:54", "remaining_time": "6:56:05"} +{"current_steps": 963, "total_steps": 5676, "loss": 0.7868508696556091, "lr": 1.922985158224843e-05, "epoch": 0.3393832599118943, "percentage": 16.97, "elapsed_time": "1:24:59", "remaining_time": "6:55:58"} +{"current_steps": 964, "total_steps": 5676, "loss": 0.7499512434005737, "lr": 1.9227607811513662e-05, "epoch": 0.33973568281938327, "percentage": 16.98, "elapsed_time": "1:25:03", "remaining_time": "6:55:47"} +{"current_steps": 965, "total_steps": 5676, "loss": 0.6662228107452393, "lr": 1.9225360908293217e-05, "epoch": 0.34008810572687226, "percentage": 17.0, "elapsed_time": "1:25:08", "remaining_time": "6:55:37"} +{"current_steps": 966, "total_steps": 5676, "loss": 0.8570939302444458, "lr": 1.9223110873349847e-05, "epoch": 0.34044052863436125, "percentage": 17.02, "elapsed_time": "1:25:13", "remaining_time": "6:55:30"} +{"current_steps": 967, "total_steps": 5676, "loss": 0.7497669458389282, "lr": 1.9220857707447372e-05, "epoch": 0.34079295154185024, "percentage": 17.04, "elapsed_time": "1:25:19", "remaining_time": "6:55:29"} +{"current_steps": 968, "total_steps": 5676, "loss": 0.7356737852096558, "lr": 1.9218601411350663e-05, "epoch": 0.34114537444933923, "percentage": 17.05, "elapsed_time": "1:25:25", "remaining_time": "6:55:26"} +{"current_steps": 969, "total_steps": 5676, "loss": 0.7880491018295288, "lr": 1.9216341985825672e-05, "epoch": 0.34149779735682817, "percentage": 17.07, "elapsed_time": "1:25:30", "remaining_time": "6:55:19"} +{"current_steps": 970, "total_steps": 5676, "loss": 0.734922468662262, "lr": 1.92140794316394e-05, "epoch": 0.34185022026431716, "percentage": 17.09, "elapsed_time": "1:25:34", "remaining_time": "6:55:08"} +{"current_steps": 971, "total_steps": 5676, "loss": 0.6710363626480103, "lr": 1.9211813749559916e-05, "epoch": 0.34220264317180615, "percentage": 17.11, "elapsed_time": "1:25:39", "remaining_time": "6:55:01"} +{"current_steps": 972, "total_steps": 5676, "loss": 0.7300584316253662, "lr": 1.920954494035634e-05, "epoch": 0.34255506607929515, "percentage": 17.12, "elapsed_time": "1:25:44", "remaining_time": "6:54:57"} +{"current_steps": 973, "total_steps": 5676, "loss": 0.8584152460098267, "lr": 1.9207273004798873e-05, "epoch": 0.34290748898678414, "percentage": 17.14, "elapsed_time": "1:25:50", "remaining_time": "6:54:56"} +{"current_steps": 974, "total_steps": 5676, "loss": 0.7307419776916504, "lr": 1.9204997943658764e-05, "epoch": 0.34325991189427313, "percentage": 17.16, "elapsed_time": "1:25:57", "remaining_time": "6:54:56"} +{"current_steps": 975, "total_steps": 5676, "loss": 0.6004960536956787, "lr": 1.920271975770832e-05, "epoch": 0.3436123348017621, "percentage": 17.18, "elapsed_time": "1:26:01", "remaining_time": "6:54:47"} +{"current_steps": 976, "total_steps": 5676, "loss": 0.7951763868331909, "lr": 1.920043844772092e-05, "epoch": 0.3439647577092511, "percentage": 17.2, "elapsed_time": "1:26:06", "remaining_time": "6:54:39"} +{"current_steps": 977, "total_steps": 5676, "loss": 0.6835082769393921, "lr": 1.919815401447099e-05, "epoch": 0.3443171806167401, "percentage": 17.21, "elapsed_time": "1:26:10", "remaining_time": "6:54:27"} +{"current_steps": 978, "total_steps": 5676, "loss": 0.7556526064872742, "lr": 1.9195866458734034e-05, "epoch": 0.3446696035242291, "percentage": 17.23, "elapsed_time": "1:26:15", "remaining_time": "6:54:21"} +{"current_steps": 979, "total_steps": 5676, "loss": 0.6918114423751831, "lr": 1.91935757812866e-05, "epoch": 0.3450220264317181, "percentage": 17.25, "elapsed_time": "1:26:22", "remaining_time": "6:54:24"} +{"current_steps": 980, "total_steps": 5676, "loss": 0.8197037577629089, "lr": 1.9191281982906304e-05, "epoch": 0.345374449339207, "percentage": 17.27, "elapsed_time": "1:26:26", "remaining_time": "6:54:13"} +{"current_steps": 981, "total_steps": 5676, "loss": 0.833138644695282, "lr": 1.9188985064371818e-05, "epoch": 0.345726872246696, "percentage": 17.28, "elapsed_time": "1:26:31", "remaining_time": "6:54:08"} +{"current_steps": 982, "total_steps": 5676, "loss": 0.6593397855758667, "lr": 1.9186685026462874e-05, "epoch": 0.346079295154185, "percentage": 17.3, "elapsed_time": "1:26:36", "remaining_time": "6:53:59"} +{"current_steps": 983, "total_steps": 5676, "loss": 0.7535643577575684, "lr": 1.918438186996026e-05, "epoch": 0.346431718061674, "percentage": 17.32, "elapsed_time": "1:26:41", "remaining_time": "6:53:50"} +{"current_steps": 984, "total_steps": 5676, "loss": 0.6959745287895203, "lr": 1.9182075595645836e-05, "epoch": 0.346784140969163, "percentage": 17.34, "elapsed_time": "1:26:46", "remaining_time": "6:53:44"} +{"current_steps": 985, "total_steps": 5676, "loss": 0.7349518537521362, "lr": 1.91797662043025e-05, "epoch": 0.347136563876652, "percentage": 17.35, "elapsed_time": "1:26:51", "remaining_time": "6:53:41"} +{"current_steps": 986, "total_steps": 5676, "loss": 0.7677974700927734, "lr": 1.9177453696714224e-05, "epoch": 0.347488986784141, "percentage": 17.37, "elapsed_time": "1:26:57", "remaining_time": "6:53:37"} +{"current_steps": 987, "total_steps": 5676, "loss": 0.7302255630493164, "lr": 1.917513807366603e-05, "epoch": 0.34784140969162997, "percentage": 17.39, "elapsed_time": "1:27:02", "remaining_time": "6:53:30"} +{"current_steps": 988, "total_steps": 5676, "loss": 0.838138222694397, "lr": 1.9172819335944003e-05, "epoch": 0.34819383259911896, "percentage": 17.41, "elapsed_time": "1:27:09", "remaining_time": "6:53:31"} +{"current_steps": 989, "total_steps": 5676, "loss": 0.8018180131912231, "lr": 1.9170497484335276e-05, "epoch": 0.34854625550660795, "percentage": 17.42, "elapsed_time": "1:27:13", "remaining_time": "6:53:23"} +{"current_steps": 990, "total_steps": 5676, "loss": 0.8085787296295166, "lr": 1.9168172519628056e-05, "epoch": 0.34889867841409694, "percentage": 17.44, "elapsed_time": "1:27:19", "remaining_time": "6:53:18"} +{"current_steps": 991, "total_steps": 5676, "loss": 0.8419004082679749, "lr": 1.9165844442611584e-05, "epoch": 0.3492511013215859, "percentage": 17.46, "elapsed_time": "1:27:24", "remaining_time": "6:53:11"} +{"current_steps": 992, "total_steps": 5676, "loss": 0.8255139589309692, "lr": 1.916351325407618e-05, "epoch": 0.34960352422907487, "percentage": 17.48, "elapsed_time": "1:27:28", "remaining_time": "6:53:02"} +{"current_steps": 993, "total_steps": 5676, "loss": 0.7588528990745544, "lr": 1.9161178954813203e-05, "epoch": 0.34995594713656386, "percentage": 17.49, "elapsed_time": "1:27:33", "remaining_time": "6:52:57"} +{"current_steps": 994, "total_steps": 5676, "loss": 0.7057096362113953, "lr": 1.9158841545615076e-05, "epoch": 0.35030837004405285, "percentage": 17.51, "elapsed_time": "1:27:39", "remaining_time": "6:52:52"} +{"current_steps": 995, "total_steps": 5676, "loss": 0.6913125514984131, "lr": 1.915650102727528e-05, "epoch": 0.35066079295154184, "percentage": 17.53, "elapsed_time": "1:27:44", "remaining_time": "6:52:47"} +{"current_steps": 996, "total_steps": 5676, "loss": 0.7622898817062378, "lr": 1.9154157400588348e-05, "epoch": 0.35101321585903084, "percentage": 17.55, "elapsed_time": "1:27:49", "remaining_time": "6:52:41"} +{"current_steps": 997, "total_steps": 5676, "loss": 0.6918702125549316, "lr": 1.915181066634986e-05, "epoch": 0.3513656387665198, "percentage": 17.57, "elapsed_time": "1:27:54", "remaining_time": "6:52:33"} +{"current_steps": 998, "total_steps": 5676, "loss": 0.8801462650299072, "lr": 1.914946082535647e-05, "epoch": 0.3517180616740088, "percentage": 17.58, "elapsed_time": "1:28:00", "remaining_time": "6:52:29"} +{"current_steps": 999, "total_steps": 5676, "loss": 0.7901172637939453, "lr": 1.9147107878405873e-05, "epoch": 0.3520704845814978, "percentage": 17.6, "elapsed_time": "1:28:05", "remaining_time": "6:52:23"} +{"current_steps": 1000, "total_steps": 5676, "loss": 0.7308447360992432, "lr": 1.9144751826296818e-05, "epoch": 0.3524229074889868, "percentage": 17.62, "elapsed_time": "1:28:09", "remaining_time": "6:52:14"} +{"current_steps": 1001, "total_steps": 5676, "loss": 0.5733275413513184, "lr": 1.9142392669829114e-05, "epoch": 0.3527753303964758, "percentage": 17.64, "elapsed_time": "1:28:19", "remaining_time": "6:52:28"} +{"current_steps": 1002, "total_steps": 5676, "loss": 0.7251306772232056, "lr": 1.9140030409803622e-05, "epoch": 0.35312775330396473, "percentage": 17.65, "elapsed_time": "1:28:23", "remaining_time": "6:52:19"} +{"current_steps": 1003, "total_steps": 5676, "loss": 0.7983027696609497, "lr": 1.913766504702225e-05, "epoch": 0.3534801762114537, "percentage": 17.67, "elapsed_time": "1:28:28", "remaining_time": "6:52:13"} +{"current_steps": 1004, "total_steps": 5676, "loss": 0.7464017868041992, "lr": 1.9135296582287973e-05, "epoch": 0.3538325991189427, "percentage": 17.69, "elapsed_time": "1:28:32", "remaining_time": "6:52:02"} +{"current_steps": 1005, "total_steps": 5676, "loss": 0.7333002686500549, "lr": 1.9132925016404805e-05, "epoch": 0.3541850220264317, "percentage": 17.71, "elapsed_time": "1:28:38", "remaining_time": "6:52:01"} +{"current_steps": 1006, "total_steps": 5676, "loss": 0.729085385799408, "lr": 1.9130550350177823e-05, "epoch": 0.3545374449339207, "percentage": 17.72, "elapsed_time": "1:28:45", "remaining_time": "6:52:03"} +{"current_steps": 1007, "total_steps": 5676, "loss": 0.7599227428436279, "lr": 1.9128172584413148e-05, "epoch": 0.3548898678414097, "percentage": 17.74, "elapsed_time": "1:28:51", "remaining_time": "6:51:59"} +{"current_steps": 1008, "total_steps": 5676, "loss": 0.8110464811325073, "lr": 1.9125791719917962e-05, "epoch": 0.3552422907488987, "percentage": 17.76, "elapsed_time": "1:28:55", "remaining_time": "6:51:50"} +{"current_steps": 1009, "total_steps": 5676, "loss": 0.7431697845458984, "lr": 1.912340775750049e-05, "epoch": 0.3555947136563877, "percentage": 17.78, "elapsed_time": "1:29:01", "remaining_time": "6:51:44"} +{"current_steps": 1010, "total_steps": 5676, "loss": 0.7833640575408936, "lr": 1.9121020697970016e-05, "epoch": 0.35594713656387666, "percentage": 17.79, "elapsed_time": "1:29:06", "remaining_time": "6:51:38"} +{"current_steps": 1011, "total_steps": 5676, "loss": 0.7693058252334595, "lr": 1.9118630542136874e-05, "epoch": 0.35629955947136566, "percentage": 17.81, "elapsed_time": "1:29:12", "remaining_time": "6:51:37"} +{"current_steps": 1012, "total_steps": 5676, "loss": 0.7724676132202148, "lr": 1.9116237290812445e-05, "epoch": 0.35665198237885465, "percentage": 17.83, "elapsed_time": "1:29:19", "remaining_time": "6:51:38"} +{"current_steps": 1013, "total_steps": 5676, "loss": 0.6024055480957031, "lr": 1.911384094480916e-05, "epoch": 0.3570044052863436, "percentage": 17.85, "elapsed_time": "1:29:24", "remaining_time": "6:51:31"} +{"current_steps": 1014, "total_steps": 5676, "loss": 0.7710703611373901, "lr": 1.9111441504940514e-05, "epoch": 0.3573568281938326, "percentage": 17.86, "elapsed_time": "1:29:30", "remaining_time": "6:51:30"} +{"current_steps": 1015, "total_steps": 5676, "loss": 0.7591651678085327, "lr": 1.910903897202103e-05, "epoch": 0.35770925110132157, "percentage": 17.88, "elapsed_time": "1:29:35", "remaining_time": "6:51:27"} +{"current_steps": 1016, "total_steps": 5676, "loss": 0.7721874713897705, "lr": 1.9106633346866302e-05, "epoch": 0.35806167400881056, "percentage": 17.9, "elapsed_time": "1:29:42", "remaining_time": "6:51:27"} +{"current_steps": 1017, "total_steps": 5676, "loss": 0.6767420172691345, "lr": 1.910422463029296e-05, "epoch": 0.35841409691629955, "percentage": 17.92, "elapsed_time": "1:29:48", "remaining_time": "6:51:24"} +{"current_steps": 1018, "total_steps": 5676, "loss": 0.6704902648925781, "lr": 1.910181282311869e-05, "epoch": 0.35876651982378854, "percentage": 17.94, "elapsed_time": "1:29:53", "remaining_time": "6:51:17"} +{"current_steps": 1019, "total_steps": 5676, "loss": 0.8871079683303833, "lr": 1.9099397926162227e-05, "epoch": 0.35911894273127754, "percentage": 17.95, "elapsed_time": "1:29:57", "remaining_time": "6:51:09"} +{"current_steps": 1020, "total_steps": 5676, "loss": 0.7222549319267273, "lr": 1.909697994024335e-05, "epoch": 0.3594713656387665, "percentage": 17.97, "elapsed_time": "1:30:02", "remaining_time": "6:50:59"} +{"current_steps": 1021, "total_steps": 5676, "loss": 0.7443021535873413, "lr": 1.9094558866182892e-05, "epoch": 0.3598237885462555, "percentage": 17.99, "elapsed_time": "1:30:08", "remaining_time": "6:50:56"} +{"current_steps": 1022, "total_steps": 5676, "loss": 0.7698349952697754, "lr": 1.9092134704802735e-05, "epoch": 0.3601762114537445, "percentage": 18.01, "elapsed_time": "1:30:12", "remaining_time": "6:50:48"} +{"current_steps": 1023, "total_steps": 5676, "loss": 0.863248348236084, "lr": 1.9089707456925798e-05, "epoch": 0.3605286343612335, "percentage": 18.02, "elapsed_time": "1:30:17", "remaining_time": "6:50:41"} +{"current_steps": 1024, "total_steps": 5676, "loss": 0.7036338448524475, "lr": 1.9087277123376068e-05, "epoch": 0.3608810572687225, "percentage": 18.04, "elapsed_time": "1:30:22", "remaining_time": "6:50:35"} +{"current_steps": 1025, "total_steps": 5676, "loss": 0.7427274584770203, "lr": 1.9084843704978558e-05, "epoch": 0.36123348017621143, "percentage": 18.06, "elapsed_time": "1:30:27", "remaining_time": "6:50:27"} +{"current_steps": 1026, "total_steps": 5676, "loss": 0.6548313498497009, "lr": 1.908240720255934e-05, "epoch": 0.3615859030837004, "percentage": 18.08, "elapsed_time": "1:30:33", "remaining_time": "6:50:23"} +{"current_steps": 1027, "total_steps": 5676, "loss": 0.7586454749107361, "lr": 1.9079967616945534e-05, "epoch": 0.3619383259911894, "percentage": 18.09, "elapsed_time": "1:30:38", "remaining_time": "6:50:21"} +{"current_steps": 1028, "total_steps": 5676, "loss": 0.6954889297485352, "lr": 1.90775249489653e-05, "epoch": 0.3622907488986784, "percentage": 18.11, "elapsed_time": "1:30:44", "remaining_time": "6:50:15"} +{"current_steps": 1029, "total_steps": 5676, "loss": 0.8798770904541016, "lr": 1.907507919944785e-05, "epoch": 0.3626431718061674, "percentage": 18.13, "elapsed_time": "1:30:49", "remaining_time": "6:50:09"} +{"current_steps": 1030, "total_steps": 5676, "loss": 0.6600694060325623, "lr": 1.9072630369223433e-05, "epoch": 0.3629955947136564, "percentage": 18.15, "elapsed_time": "1:30:54", "remaining_time": "6:50:05"} +{"current_steps": 1031, "total_steps": 5676, "loss": 0.6830897927284241, "lr": 1.9070178459123366e-05, "epoch": 0.3633480176211454, "percentage": 18.16, "elapsed_time": "1:31:00", "remaining_time": "6:49:59"} +{"current_steps": 1032, "total_steps": 5676, "loss": 0.6283613443374634, "lr": 1.906772346997998e-05, "epoch": 0.36370044052863437, "percentage": 18.18, "elapsed_time": "1:31:06", "remaining_time": "6:50:00"} +{"current_steps": 1033, "total_steps": 5676, "loss": 0.6451754570007324, "lr": 1.9065265402626676e-05, "epoch": 0.36405286343612336, "percentage": 18.2, "elapsed_time": "1:31:11", "remaining_time": "6:49:54"} +{"current_steps": 1034, "total_steps": 5676, "loss": 0.7949883937835693, "lr": 1.9062804257897887e-05, "epoch": 0.36440528634361236, "percentage": 18.22, "elapsed_time": "1:31:17", "remaining_time": "6:49:49"} +{"current_steps": 1035, "total_steps": 5676, "loss": 0.625343918800354, "lr": 1.90603400366291e-05, "epoch": 0.36475770925110135, "percentage": 18.23, "elapsed_time": "1:31:22", "remaining_time": "6:49:43"} +{"current_steps": 1036, "total_steps": 5676, "loss": 0.8398839235305786, "lr": 1.9057872739656843e-05, "epoch": 0.3651101321585903, "percentage": 18.25, "elapsed_time": "1:31:28", "remaining_time": "6:49:39"} +{"current_steps": 1037, "total_steps": 5676, "loss": 0.8628166913986206, "lr": 1.9055402367818673e-05, "epoch": 0.3654625550660793, "percentage": 18.27, "elapsed_time": "1:31:32", "remaining_time": "6:49:31"} +{"current_steps": 1038, "total_steps": 5676, "loss": 0.7494110465049744, "lr": 1.905292892195322e-05, "epoch": 0.36581497797356827, "percentage": 18.29, "elapsed_time": "1:31:37", "remaining_time": "6:49:25"} +{"current_steps": 1039, "total_steps": 5676, "loss": 0.7695099115371704, "lr": 1.9050452402900134e-05, "epoch": 0.36616740088105726, "percentage": 18.31, "elapsed_time": "1:31:42", "remaining_time": "6:49:17"} +{"current_steps": 1040, "total_steps": 5676, "loss": 0.8067067861557007, "lr": 1.904797281150012e-05, "epoch": 0.36651982378854625, "percentage": 18.32, "elapsed_time": "1:31:48", "remaining_time": "6:49:13"} +{"current_steps": 1041, "total_steps": 5676, "loss": 0.7542074918746948, "lr": 1.9045490148594917e-05, "epoch": 0.36687224669603524, "percentage": 18.34, "elapsed_time": "1:31:53", "remaining_time": "6:49:09"} +{"current_steps": 1042, "total_steps": 5676, "loss": 0.7027335166931152, "lr": 1.9043004415027314e-05, "epoch": 0.36722466960352423, "percentage": 18.36, "elapsed_time": "1:31:57", "remaining_time": "6:48:58"} +{"current_steps": 1043, "total_steps": 5676, "loss": 0.7779253721237183, "lr": 1.9040515611641142e-05, "epoch": 0.3675770925110132, "percentage": 18.38, "elapsed_time": "1:32:03", "remaining_time": "6:48:55"} +{"current_steps": 1044, "total_steps": 5676, "loss": 0.6840049028396606, "lr": 1.9038023739281275e-05, "epoch": 0.3679295154185022, "percentage": 18.39, "elapsed_time": "1:32:07", "remaining_time": "6:48:43"} +{"current_steps": 1045, "total_steps": 5676, "loss": 0.6183794736862183, "lr": 1.903552879879362e-05, "epoch": 0.3682819383259912, "percentage": 18.41, "elapsed_time": "1:32:13", "remaining_time": "6:48:42"} +{"current_steps": 1046, "total_steps": 5676, "loss": 0.7770168781280518, "lr": 1.9033030791025127e-05, "epoch": 0.3686343612334802, "percentage": 18.43, "elapsed_time": "1:32:18", "remaining_time": "6:48:37"} +{"current_steps": 1047, "total_steps": 5676, "loss": 0.7192036509513855, "lr": 1.9030529716823806e-05, "epoch": 0.36898678414096914, "percentage": 18.45, "elapsed_time": "1:32:23", "remaining_time": "6:48:27"} +{"current_steps": 1048, "total_steps": 5676, "loss": 0.6604419946670532, "lr": 1.9028025577038688e-05, "epoch": 0.36933920704845813, "percentage": 18.46, "elapsed_time": "1:32:29", "remaining_time": "6:48:27"} +{"current_steps": 1049, "total_steps": 5676, "loss": 0.7999060153961182, "lr": 1.9025518372519847e-05, "epoch": 0.3696916299559471, "percentage": 18.48, "elapsed_time": "1:32:35", "remaining_time": "6:48:22"} +{"current_steps": 1050, "total_steps": 5676, "loss": 0.7487536668777466, "lr": 1.9023008104118404e-05, "epoch": 0.3700440528634361, "percentage": 18.5, "elapsed_time": "1:32:40", "remaining_time": "6:48:18"} +{"current_steps": 1051, "total_steps": 5676, "loss": 0.7786455154418945, "lr": 1.9020494772686513e-05, "epoch": 0.3703964757709251, "percentage": 18.52, "elapsed_time": "1:32:46", "remaining_time": "6:48:15"} +{"current_steps": 1052, "total_steps": 5676, "loss": 0.7592626214027405, "lr": 1.9017978379077378e-05, "epoch": 0.3707488986784141, "percentage": 18.53, "elapsed_time": "1:32:50", "remaining_time": "6:48:06"} +{"current_steps": 1053, "total_steps": 5676, "loss": 0.774850606918335, "lr": 1.901545892414523e-05, "epoch": 0.3711013215859031, "percentage": 18.55, "elapsed_time": "1:32:56", "remaining_time": "6:48:03"} +{"current_steps": 1054, "total_steps": 5676, "loss": 0.5430009365081787, "lr": 1.901293640874535e-05, "epoch": 0.3714537444933921, "percentage": 18.57, "elapsed_time": "1:33:01", "remaining_time": "6:47:57"} +{"current_steps": 1055, "total_steps": 5676, "loss": 0.7459923624992371, "lr": 1.9010410833734053e-05, "epoch": 0.37180616740088107, "percentage": 18.59, "elapsed_time": "1:33:06", "remaining_time": "6:47:49"} +{"current_steps": 1056, "total_steps": 5676, "loss": 0.6372017860412598, "lr": 1.9007882199968692e-05, "epoch": 0.37215859030837006, "percentage": 18.6, "elapsed_time": "1:33:11", "remaining_time": "6:47:41"} +{"current_steps": 1057, "total_steps": 5676, "loss": 0.6773583292961121, "lr": 1.900535050830766e-05, "epoch": 0.37251101321585905, "percentage": 18.62, "elapsed_time": "1:33:16", "remaining_time": "6:47:34"} +{"current_steps": 1058, "total_steps": 5676, "loss": 0.8431004285812378, "lr": 1.900281575961039e-05, "epoch": 0.372863436123348, "percentage": 18.64, "elapsed_time": "1:33:20", "remaining_time": "6:47:25"} +{"current_steps": 1059, "total_steps": 5676, "loss": 0.6361340284347534, "lr": 1.9000277954737342e-05, "epoch": 0.373215859030837, "percentage": 18.66, "elapsed_time": "1:33:25", "remaining_time": "6:47:18"} +{"current_steps": 1060, "total_steps": 5676, "loss": 0.7153787612915039, "lr": 1.8997737094550033e-05, "epoch": 0.373568281938326, "percentage": 18.68, "elapsed_time": "1:33:30", "remaining_time": "6:47:10"} +{"current_steps": 1061, "total_steps": 5676, "loss": 0.7244935631752014, "lr": 1.8995193179911e-05, "epoch": 0.37392070484581497, "percentage": 18.69, "elapsed_time": "1:33:34", "remaining_time": "6:47:00"} +{"current_steps": 1062, "total_steps": 5676, "loss": 0.6648202538490295, "lr": 1.8992646211683817e-05, "epoch": 0.37427312775330396, "percentage": 18.71, "elapsed_time": "1:33:39", "remaining_time": "6:46:56"} +{"current_steps": 1063, "total_steps": 5676, "loss": 0.6528836488723755, "lr": 1.8990096190733113e-05, "epoch": 0.37462555066079295, "percentage": 18.73, "elapsed_time": "1:33:44", "remaining_time": "6:46:47"} +{"current_steps": 1064, "total_steps": 5676, "loss": 0.6749341487884521, "lr": 1.8987543117924532e-05, "epoch": 0.37497797356828194, "percentage": 18.75, "elapsed_time": "1:33:48", "remaining_time": "6:46:39"} +{"current_steps": 1065, "total_steps": 5676, "loss": 0.7402256727218628, "lr": 1.8984986994124766e-05, "epoch": 0.37533039647577093, "percentage": 18.76, "elapsed_time": "1:33:53", "remaining_time": "6:46:29"} +{"current_steps": 1066, "total_steps": 5676, "loss": 0.5638695955276489, "lr": 1.898242782020154e-05, "epoch": 0.3756828193832599, "percentage": 18.78, "elapsed_time": "1:33:59", "remaining_time": "6:46:29"} +{"current_steps": 1067, "total_steps": 5676, "loss": 0.829822838306427, "lr": 1.897986559702361e-05, "epoch": 0.3760352422907489, "percentage": 18.8, "elapsed_time": "1:34:05", "remaining_time": "6:46:28"} +{"current_steps": 1068, "total_steps": 5676, "loss": 0.6796025037765503, "lr": 1.8977300325460774e-05, "epoch": 0.3763876651982379, "percentage": 18.82, "elapsed_time": "1:34:10", "remaining_time": "6:46:17"} +{"current_steps": 1069, "total_steps": 5676, "loss": 0.8584038615226746, "lr": 1.897473200638386e-05, "epoch": 0.37674008810572684, "percentage": 18.83, "elapsed_time": "1:34:15", "remaining_time": "6:46:15"} +{"current_steps": 1070, "total_steps": 5676, "loss": 0.8069149255752563, "lr": 1.897216064066474e-05, "epoch": 0.37709251101321584, "percentage": 18.85, "elapsed_time": "1:34:21", "remaining_time": "6:46:11"} +{"current_steps": 1071, "total_steps": 5676, "loss": 0.5762223601341248, "lr": 1.89695862291763e-05, "epoch": 0.37744493392070483, "percentage": 18.87, "elapsed_time": "1:34:26", "remaining_time": "6:46:02"} +{"current_steps": 1072, "total_steps": 5676, "loss": 0.6626466512680054, "lr": 1.8967008772792483e-05, "epoch": 0.3777973568281938, "percentage": 18.89, "elapsed_time": "1:34:30", "remaining_time": "6:45:54"} +{"current_steps": 1073, "total_steps": 5676, "loss": 0.6260244250297546, "lr": 1.896442827238825e-05, "epoch": 0.3781497797356828, "percentage": 18.9, "elapsed_time": "1:34:37", "remaining_time": "6:45:55"} +{"current_steps": 1074, "total_steps": 5676, "loss": 0.8090343475341797, "lr": 1.8961844728839602e-05, "epoch": 0.3785022026431718, "percentage": 18.92, "elapsed_time": "1:34:42", "remaining_time": "6:45:48"} +{"current_steps": 1075, "total_steps": 5676, "loss": 0.66957026720047, "lr": 1.8959258143023575e-05, "epoch": 0.3788546255506608, "percentage": 18.94, "elapsed_time": "1:34:47", "remaining_time": "6:45:44"} +{"current_steps": 1076, "total_steps": 5676, "loss": 0.7103087306022644, "lr": 1.8956668515818223e-05, "epoch": 0.3792070484581498, "percentage": 18.96, "elapsed_time": "1:34:53", "remaining_time": "6:45:42"} +{"current_steps": 1077, "total_steps": 5676, "loss": 0.7469112277030945, "lr": 1.895407584810266e-05, "epoch": 0.3795594713656388, "percentage": 18.97, "elapsed_time": "1:34:58", "remaining_time": "6:45:33"} +{"current_steps": 1078, "total_steps": 5676, "loss": 0.8252213001251221, "lr": 1.8951480140757003e-05, "epoch": 0.37991189427312777, "percentage": 18.99, "elapsed_time": "1:35:04", "remaining_time": "6:45:30"} +{"current_steps": 1079, "total_steps": 5676, "loss": 0.7204562425613403, "lr": 1.8948881394662417e-05, "epoch": 0.38026431718061676, "percentage": 19.01, "elapsed_time": "1:35:08", "remaining_time": "6:45:21"} +{"current_steps": 1080, "total_steps": 5676, "loss": 0.7325669527053833, "lr": 1.89462796107011e-05, "epoch": 0.38061674008810575, "percentage": 19.03, "elapsed_time": "1:35:13", "remaining_time": "6:45:15"} +{"current_steps": 1081, "total_steps": 5676, "loss": 0.738972008228302, "lr": 1.8943674789756276e-05, "epoch": 0.3809691629955947, "percentage": 19.05, "elapsed_time": "1:35:19", "remaining_time": "6:45:12"} +{"current_steps": 1082, "total_steps": 5676, "loss": 0.7468631267547607, "lr": 1.8941066932712194e-05, "epoch": 0.3813215859030837, "percentage": 19.06, "elapsed_time": "1:35:25", "remaining_time": "6:45:11"} +{"current_steps": 1083, "total_steps": 5676, "loss": 0.6479831337928772, "lr": 1.893845604045415e-05, "epoch": 0.3816740088105727, "percentage": 19.08, "elapsed_time": "1:35:30", "remaining_time": "6:45:01"} +{"current_steps": 1084, "total_steps": 5676, "loss": 0.7615871429443359, "lr": 1.893584211386845e-05, "epoch": 0.38202643171806167, "percentage": 19.1, "elapsed_time": "1:35:36", "remaining_time": "6:44:59"} +{"current_steps": 1085, "total_steps": 5676, "loss": 0.6934449076652527, "lr": 1.8933225153842446e-05, "epoch": 0.38237885462555066, "percentage": 19.12, "elapsed_time": "1:35:41", "remaining_time": "6:44:54"} +{"current_steps": 1086, "total_steps": 5676, "loss": 0.5267079472541809, "lr": 1.8930605161264517e-05, "epoch": 0.38273127753303965, "percentage": 19.13, "elapsed_time": "1:35:45", "remaining_time": "6:44:44"} +{"current_steps": 1087, "total_steps": 5676, "loss": 0.7309125661849976, "lr": 1.892798213702407e-05, "epoch": 0.38308370044052864, "percentage": 19.15, "elapsed_time": "1:35:50", "remaining_time": "6:44:37"} +{"current_steps": 1088, "total_steps": 5676, "loss": 0.8133678436279297, "lr": 1.892535608201153e-05, "epoch": 0.38343612334801763, "percentage": 19.17, "elapsed_time": "1:35:56", "remaining_time": "6:44:35"} +{"current_steps": 1089, "total_steps": 5676, "loss": 0.6097027063369751, "lr": 1.892272699711837e-05, "epoch": 0.3837885462555066, "percentage": 19.19, "elapsed_time": "1:36:02", "remaining_time": "6:44:30"} +{"current_steps": 1090, "total_steps": 5676, "loss": 0.70456862449646, "lr": 1.8920094883237082e-05, "epoch": 0.3841409691629956, "percentage": 19.2, "elapsed_time": "1:36:07", "remaining_time": "6:44:25"} +{"current_steps": 1091, "total_steps": 5676, "loss": 0.7236523628234863, "lr": 1.8917459741261183e-05, "epoch": 0.3844933920704846, "percentage": 19.22, "elapsed_time": "1:36:12", "remaining_time": "6:44:18"} +{"current_steps": 1092, "total_steps": 5676, "loss": 0.7251272201538086, "lr": 1.8914821572085224e-05, "epoch": 0.38484581497797354, "percentage": 19.24, "elapsed_time": "1:36:18", "remaining_time": "6:44:15"} +{"current_steps": 1093, "total_steps": 5676, "loss": 0.7381070852279663, "lr": 1.8912180376604777e-05, "epoch": 0.38519823788546254, "percentage": 19.26, "elapsed_time": "1:36:23", "remaining_time": "6:44:08"} +{"current_steps": 1094, "total_steps": 5676, "loss": 0.6654129028320312, "lr": 1.8909536155716458e-05, "epoch": 0.3855506607929515, "percentage": 19.27, "elapsed_time": "1:36:27", "remaining_time": "6:43:58"} +{"current_steps": 1095, "total_steps": 5676, "loss": 0.7823128700256348, "lr": 1.8906888910317883e-05, "epoch": 0.3859030837004405, "percentage": 19.29, "elapsed_time": "1:36:33", "remaining_time": "6:43:59"} +{"current_steps": 1096, "total_steps": 5676, "loss": 0.5988126993179321, "lr": 1.8904238641307718e-05, "epoch": 0.3862555066079295, "percentage": 19.31, "elapsed_time": "1:36:40", "remaining_time": "6:43:58"} +{"current_steps": 1097, "total_steps": 5676, "loss": 0.7671465873718262, "lr": 1.8901585349585643e-05, "epoch": 0.3866079295154185, "percentage": 19.33, "elapsed_time": "1:36:45", "remaining_time": "6:43:52"} +{"current_steps": 1098, "total_steps": 5676, "loss": 0.7878838777542114, "lr": 1.889892903605237e-05, "epoch": 0.3869603524229075, "percentage": 19.34, "elapsed_time": "1:36:50", "remaining_time": "6:43:46"} +{"current_steps": 1099, "total_steps": 5676, "loss": 0.72254878282547, "lr": 1.8896269701609634e-05, "epoch": 0.3873127753303965, "percentage": 19.36, "elapsed_time": "1:36:56", "remaining_time": "6:43:42"} +{"current_steps": 1100, "total_steps": 5676, "loss": 0.6796868443489075, "lr": 1.8893607347160198e-05, "epoch": 0.3876651982378855, "percentage": 19.38, "elapsed_time": "1:37:01", "remaining_time": "6:43:39"} +{"current_steps": 1101, "total_steps": 5676, "loss": 0.6378471851348877, "lr": 1.8890941973607843e-05, "epoch": 0.38801762114537447, "percentage": 19.4, "elapsed_time": "1:37:11", "remaining_time": "6:43:52"} +{"current_steps": 1102, "total_steps": 5676, "loss": 0.8473032712936401, "lr": 1.888827358185739e-05, "epoch": 0.38837004405286346, "percentage": 19.42, "elapsed_time": "1:37:16", "remaining_time": "6:43:46"} +{"current_steps": 1103, "total_steps": 5676, "loss": 0.8272742033004761, "lr": 1.8885602172814667e-05, "epoch": 0.3887224669603524, "percentage": 19.43, "elapsed_time": "1:37:20", "remaining_time": "6:43:36"} +{"current_steps": 1104, "total_steps": 5676, "loss": 0.7244507670402527, "lr": 1.8882927747386533e-05, "epoch": 0.3890748898678414, "percentage": 19.45, "elapsed_time": "1:37:26", "remaining_time": "6:43:30"} +{"current_steps": 1105, "total_steps": 5676, "loss": 0.5764014720916748, "lr": 1.888025030648088e-05, "epoch": 0.3894273127753304, "percentage": 19.47, "elapsed_time": "1:37:32", "remaining_time": "6:43:29"} +{"current_steps": 1106, "total_steps": 5676, "loss": 0.6944009065628052, "lr": 1.887756985100661e-05, "epoch": 0.3897797356828194, "percentage": 19.49, "elapsed_time": "1:37:38", "remaining_time": "6:43:27"} +{"current_steps": 1107, "total_steps": 5676, "loss": 0.7096902132034302, "lr": 1.8874886381873657e-05, "epoch": 0.39013215859030836, "percentage": 19.5, "elapsed_time": "1:37:44", "remaining_time": "6:43:24"} +{"current_steps": 1108, "total_steps": 5676, "loss": 0.6304805278778076, "lr": 1.8872199899992973e-05, "epoch": 0.39048458149779736, "percentage": 19.52, "elapsed_time": "1:37:51", "remaining_time": "6:43:25"} +{"current_steps": 1109, "total_steps": 5676, "loss": 0.7091327905654907, "lr": 1.8869510406276538e-05, "epoch": 0.39083700440528635, "percentage": 19.54, "elapsed_time": "1:37:54", "remaining_time": "6:43:13"} +{"current_steps": 1110, "total_steps": 5676, "loss": 0.6575565338134766, "lr": 1.886681790163735e-05, "epoch": 0.39118942731277534, "percentage": 19.56, "elapsed_time": "1:38:00", "remaining_time": "6:43:07"} +{"current_steps": 1111, "total_steps": 5676, "loss": 0.837468147277832, "lr": 1.8864122386989426e-05, "epoch": 0.39154185022026433, "percentage": 19.57, "elapsed_time": "1:38:04", "remaining_time": "6:42:57"} +{"current_steps": 1112, "total_steps": 5676, "loss": 0.6861380338668823, "lr": 1.8861423863247816e-05, "epoch": 0.3918942731277533, "percentage": 19.59, "elapsed_time": "1:38:09", "remaining_time": "6:42:53"} +{"current_steps": 1113, "total_steps": 5676, "loss": 0.7421156167984009, "lr": 1.8858722331328582e-05, "epoch": 0.3922466960352423, "percentage": 19.61, "elapsed_time": "1:38:15", "remaining_time": "6:42:50"} +{"current_steps": 1114, "total_steps": 5676, "loss": 0.8037575483322144, "lr": 1.8856017792148807e-05, "epoch": 0.39259911894273125, "percentage": 19.63, "elapsed_time": "1:38:21", "remaining_time": "6:42:48"} +{"current_steps": 1115, "total_steps": 5676, "loss": 0.6530179381370544, "lr": 1.8853310246626608e-05, "epoch": 0.39295154185022024, "percentage": 19.64, "elapsed_time": "1:38:27", "remaining_time": "6:42:44"} +{"current_steps": 1116, "total_steps": 5676, "loss": 0.9039478302001953, "lr": 1.88505996956811e-05, "epoch": 0.39330396475770923, "percentage": 19.66, "elapsed_time": "1:38:32", "remaining_time": "6:42:37"} +{"current_steps": 1117, "total_steps": 5676, "loss": 0.7734917998313904, "lr": 1.8847886140232438e-05, "epoch": 0.3936563876651982, "percentage": 19.68, "elapsed_time": "1:38:37", "remaining_time": "6:42:32"} +{"current_steps": 1118, "total_steps": 5676, "loss": 0.7146204113960266, "lr": 1.8845169581201786e-05, "epoch": 0.3940088105726872, "percentage": 19.7, "elapsed_time": "1:38:43", "remaining_time": "6:42:29"} +{"current_steps": 1119, "total_steps": 5676, "loss": 0.6427414417266846, "lr": 1.8842450019511337e-05, "epoch": 0.3943612334801762, "percentage": 19.71, "elapsed_time": "1:38:48", "remaining_time": "6:42:24"} +{"current_steps": 1120, "total_steps": 5676, "loss": 0.7241504192352295, "lr": 1.883972745608429e-05, "epoch": 0.3947136563876652, "percentage": 19.73, "elapsed_time": "1:38:54", "remaining_time": "6:42:20"} +{"current_steps": 1121, "total_steps": 5676, "loss": 0.7085466384887695, "lr": 1.8837001891844875e-05, "epoch": 0.3950660792951542, "percentage": 19.75, "elapsed_time": "1:38:59", "remaining_time": "6:42:12"} +{"current_steps": 1122, "total_steps": 5676, "loss": 0.6099711656570435, "lr": 1.8834273327718334e-05, "epoch": 0.3954185022026432, "percentage": 19.77, "elapsed_time": "1:39:04", "remaining_time": "6:42:07"} +{"current_steps": 1123, "total_steps": 5676, "loss": 0.9153809547424316, "lr": 1.8831541764630936e-05, "epoch": 0.3957709251101322, "percentage": 19.79, "elapsed_time": "1:39:10", "remaining_time": "6:42:05"} +{"current_steps": 1124, "total_steps": 5676, "loss": 0.7025514841079712, "lr": 1.8828807203509953e-05, "epoch": 0.39612334801762117, "percentage": 19.8, "elapsed_time": "1:39:15", "remaining_time": "6:41:59"} +{"current_steps": 1125, "total_steps": 5676, "loss": 0.8254855275154114, "lr": 1.882606964528369e-05, "epoch": 0.3964757709251101, "percentage": 19.82, "elapsed_time": "1:39:21", "remaining_time": "6:41:55"} +{"current_steps": 1126, "total_steps": 5676, "loss": 0.6812278032302856, "lr": 1.8823329090881457e-05, "epoch": 0.3968281938325991, "percentage": 19.84, "elapsed_time": "1:39:28", "remaining_time": "6:41:55"} +{"current_steps": 1127, "total_steps": 5676, "loss": 0.6570114493370056, "lr": 1.8820585541233592e-05, "epoch": 0.3971806167400881, "percentage": 19.86, "elapsed_time": "1:39:32", "remaining_time": "6:41:48"} +{"current_steps": 1128, "total_steps": 5676, "loss": 0.636163592338562, "lr": 1.881783899727144e-05, "epoch": 0.3975330396475771, "percentage": 19.87, "elapsed_time": "1:39:37", "remaining_time": "6:41:39"} +{"current_steps": 1129, "total_steps": 5676, "loss": 0.6744807958602905, "lr": 1.8815089459927373e-05, "epoch": 0.39788546255506607, "percentage": 19.89, "elapsed_time": "1:39:41", "remaining_time": "6:41:31"} +{"current_steps": 1130, "total_steps": 5676, "loss": 0.6739502549171448, "lr": 1.8812336930134768e-05, "epoch": 0.39823788546255506, "percentage": 19.91, "elapsed_time": "1:39:46", "remaining_time": "6:41:23"} +{"current_steps": 1131, "total_steps": 5676, "loss": 0.800058126449585, "lr": 1.8809581408828026e-05, "epoch": 0.39859030837004406, "percentage": 19.93, "elapsed_time": "1:39:52", "remaining_time": "6:41:22"} +{"current_steps": 1132, "total_steps": 5676, "loss": 0.7158734798431396, "lr": 1.880682289694256e-05, "epoch": 0.39894273127753305, "percentage": 19.94, "elapsed_time": "1:39:58", "remaining_time": "6:41:18"} +{"current_steps": 1133, "total_steps": 5676, "loss": 0.7142150402069092, "lr": 1.8804061395414795e-05, "epoch": 0.39929515418502204, "percentage": 19.96, "elapsed_time": "1:40:02", "remaining_time": "6:41:08"} +{"current_steps": 1134, "total_steps": 5676, "loss": 0.7830438613891602, "lr": 1.8801296905182184e-05, "epoch": 0.39964757709251103, "percentage": 19.98, "elapsed_time": "1:40:07", "remaining_time": "6:41:02"} +{"current_steps": 1135, "total_steps": 5676, "loss": 0.7037091255187988, "lr": 1.879852942718318e-05, "epoch": 0.4, "percentage": 20.0, "elapsed_time": "1:40:11", "remaining_time": "6:40:50"} +{"current_steps": 1136, "total_steps": 5676, "loss": 0.7634316682815552, "lr": 1.8795758962357254e-05, "epoch": 0.400352422907489, "percentage": 20.01, "elapsed_time": "1:40:16", "remaining_time": "6:40:45"} +{"current_steps": 1137, "total_steps": 5676, "loss": 0.8569636344909668, "lr": 1.8792985511644895e-05, "epoch": 0.40070484581497795, "percentage": 20.03, "elapsed_time": "1:40:22", "remaining_time": "6:40:43"} +{"current_steps": 1138, "total_steps": 5676, "loss": 0.8589881062507629, "lr": 1.8790209075987603e-05, "epoch": 0.40105726872246694, "percentage": 20.05, "elapsed_time": "1:40:27", "remaining_time": "6:40:36"} +{"current_steps": 1139, "total_steps": 5676, "loss": 0.6667177677154541, "lr": 1.8787429656327892e-05, "epoch": 0.40140969162995593, "percentage": 20.07, "elapsed_time": "1:40:33", "remaining_time": "6:40:33"} +{"current_steps": 1140, "total_steps": 5676, "loss": 0.8272922039031982, "lr": 1.8784647253609286e-05, "epoch": 0.4017621145374449, "percentage": 20.08, "elapsed_time": "1:40:39", "remaining_time": "6:40:29"} +{"current_steps": 1141, "total_steps": 5676, "loss": 0.735906720161438, "lr": 1.8781861868776328e-05, "epoch": 0.4021145374449339, "percentage": 20.1, "elapsed_time": "1:40:43", "remaining_time": "6:40:21"} +{"current_steps": 1142, "total_steps": 5676, "loss": 0.7496200799942017, "lr": 1.8779073502774567e-05, "epoch": 0.4024669603524229, "percentage": 20.12, "elapsed_time": "1:40:49", "remaining_time": "6:40:16"} +{"current_steps": 1143, "total_steps": 5676, "loss": 0.741244912147522, "lr": 1.8776282156550563e-05, "epoch": 0.4028193832599119, "percentage": 20.14, "elapsed_time": "1:40:55", "remaining_time": "6:40:13"} +{"current_steps": 1144, "total_steps": 5676, "loss": 0.6074572205543518, "lr": 1.87734878310519e-05, "epoch": 0.4031718061674009, "percentage": 20.16, "elapsed_time": "1:40:58", "remaining_time": "6:40:02"} +{"current_steps": 1145, "total_steps": 5676, "loss": 0.7852963805198669, "lr": 1.8770690527227156e-05, "epoch": 0.4035242290748899, "percentage": 20.17, "elapsed_time": "1:41:04", "remaining_time": "6:39:56"} +{"current_steps": 1146, "total_steps": 5676, "loss": 0.8041664361953735, "lr": 1.8767890246025934e-05, "epoch": 0.4038766519823789, "percentage": 20.19, "elapsed_time": "1:41:09", "remaining_time": "6:39:51"} +{"current_steps": 1147, "total_steps": 5676, "loss": 0.6014564037322998, "lr": 1.876508698839884e-05, "epoch": 0.40422907488986787, "percentage": 20.21, "elapsed_time": "1:41:14", "remaining_time": "6:39:45"} +{"current_steps": 1148, "total_steps": 5676, "loss": 0.7389206886291504, "lr": 1.876228075529749e-05, "epoch": 0.4045814977973568, "percentage": 20.23, "elapsed_time": "1:41:19", "remaining_time": "6:39:38"} +{"current_steps": 1149, "total_steps": 5676, "loss": 0.7540062665939331, "lr": 1.875947154767452e-05, "epoch": 0.4049339207048458, "percentage": 20.24, "elapsed_time": "1:41:24", "remaining_time": "6:39:31"} +{"current_steps": 1150, "total_steps": 5676, "loss": 0.6953487396240234, "lr": 1.8756659366483564e-05, "epoch": 0.4052863436123348, "percentage": 20.26, "elapsed_time": "1:41:29", "remaining_time": "6:39:27"} +{"current_steps": 1151, "total_steps": 5676, "loss": 0.6715666055679321, "lr": 1.875384421267927e-05, "epoch": 0.4056387665198238, "percentage": 20.28, "elapsed_time": "1:41:35", "remaining_time": "6:39:24"} +{"current_steps": 1152, "total_steps": 5676, "loss": 0.7763206362724304, "lr": 1.8751026087217294e-05, "epoch": 0.40599118942731277, "percentage": 20.3, "elapsed_time": "1:41:40", "remaining_time": "6:39:16"} +{"current_steps": 1153, "total_steps": 5676, "loss": 0.8445626497268677, "lr": 1.8748204991054304e-05, "epoch": 0.40634361233480176, "percentage": 20.31, "elapsed_time": "1:41:45", "remaining_time": "6:39:10"} +{"current_steps": 1154, "total_steps": 5676, "loss": 0.6789584159851074, "lr": 1.8745380925147976e-05, "epoch": 0.40669603524229075, "percentage": 20.33, "elapsed_time": "1:41:50", "remaining_time": "6:39:05"} +{"current_steps": 1155, "total_steps": 5676, "loss": 0.6301349401473999, "lr": 1.8742553890456986e-05, "epoch": 0.40704845814977975, "percentage": 20.35, "elapsed_time": "1:41:55", "remaining_time": "6:38:59"} +{"current_steps": 1156, "total_steps": 5676, "loss": 0.5968909859657288, "lr": 1.873972388794103e-05, "epoch": 0.40740088105726874, "percentage": 20.37, "elapsed_time": "1:42:02", "remaining_time": "6:38:57"} +{"current_steps": 1157, "total_steps": 5676, "loss": 0.759127676486969, "lr": 1.873689091856081e-05, "epoch": 0.40775330396475773, "percentage": 20.38, "elapsed_time": "1:42:07", "remaining_time": "6:38:52"} +{"current_steps": 1158, "total_steps": 5676, "loss": 0.8113895654678345, "lr": 1.873405498327802e-05, "epoch": 0.4081057268722467, "percentage": 20.4, "elapsed_time": "1:42:13", "remaining_time": "6:38:50"} +{"current_steps": 1159, "total_steps": 5676, "loss": 0.6294944286346436, "lr": 1.8731216083055373e-05, "epoch": 0.40845814977973566, "percentage": 20.42, "elapsed_time": "1:42:18", "remaining_time": "6:38:45"} +{"current_steps": 1160, "total_steps": 5676, "loss": 0.7024469375610352, "lr": 1.87283742188566e-05, "epoch": 0.40881057268722465, "percentage": 20.44, "elapsed_time": "1:42:23", "remaining_time": "6:38:39"} +{"current_steps": 1161, "total_steps": 5676, "loss": 0.6593793034553528, "lr": 1.8725529391646413e-05, "epoch": 0.40916299559471364, "percentage": 20.45, "elapsed_time": "1:42:29", "remaining_time": "6:38:33"} +{"current_steps": 1162, "total_steps": 5676, "loss": 0.72177654504776, "lr": 1.8722681602390548e-05, "epoch": 0.40951541850220263, "percentage": 20.47, "elapsed_time": "1:42:33", "remaining_time": "6:38:23"} +{"current_steps": 1163, "total_steps": 5676, "loss": 0.7099393606185913, "lr": 1.8719830852055736e-05, "epoch": 0.4098678414096916, "percentage": 20.49, "elapsed_time": "1:42:38", "remaining_time": "6:38:16"} +{"current_steps": 1164, "total_steps": 5676, "loss": 0.6221687197685242, "lr": 1.871697714160972e-05, "epoch": 0.4102202643171806, "percentage": 20.51, "elapsed_time": "1:42:43", "remaining_time": "6:38:10"} +{"current_steps": 1165, "total_steps": 5676, "loss": 0.7236911058425903, "lr": 1.8714120472021252e-05, "epoch": 0.4105726872246696, "percentage": 20.53, "elapsed_time": "1:42:47", "remaining_time": "6:38:01"} +{"current_steps": 1166, "total_steps": 5676, "loss": 0.6777583360671997, "lr": 1.8711260844260072e-05, "epoch": 0.4109251101321586, "percentage": 20.54, "elapsed_time": "1:42:52", "remaining_time": "6:37:54"} +{"current_steps": 1167, "total_steps": 5676, "loss": 0.6408713459968567, "lr": 1.870839825929694e-05, "epoch": 0.4112775330396476, "percentage": 20.56, "elapsed_time": "1:42:58", "remaining_time": "6:37:53"} +{"current_steps": 1168, "total_steps": 5676, "loss": 0.6081968545913696, "lr": 1.870553271810362e-05, "epoch": 0.4116299559471366, "percentage": 20.58, "elapsed_time": "1:43:03", "remaining_time": "6:37:46"} +{"current_steps": 1169, "total_steps": 5676, "loss": 0.7269757986068726, "lr": 1.8702664221652864e-05, "epoch": 0.4119823788546256, "percentage": 20.6, "elapsed_time": "1:43:09", "remaining_time": "6:37:43"} +{"current_steps": 1170, "total_steps": 5676, "loss": 0.6563149094581604, "lr": 1.8699792770918443e-05, "epoch": 0.4123348017621145, "percentage": 20.61, "elapsed_time": "1:43:14", "remaining_time": "6:37:38"} +{"current_steps": 1171, "total_steps": 5676, "loss": 0.6900039911270142, "lr": 1.8696918366875123e-05, "epoch": 0.4126872246696035, "percentage": 20.63, "elapsed_time": "1:43:20", "remaining_time": "6:37:35"} +{"current_steps": 1172, "total_steps": 5676, "loss": 0.6575014591217041, "lr": 1.869404101049868e-05, "epoch": 0.4130396475770925, "percentage": 20.65, "elapsed_time": "1:43:28", "remaining_time": "6:37:37"} +{"current_steps": 1173, "total_steps": 5676, "loss": 0.8178410530090332, "lr": 1.8691160702765878e-05, "epoch": 0.4133920704845815, "percentage": 20.67, "elapsed_time": "1:43:33", "remaining_time": "6:37:31"} +{"current_steps": 1174, "total_steps": 5676, "loss": 0.6247331500053406, "lr": 1.8688277444654495e-05, "epoch": 0.4137444933920705, "percentage": 20.68, "elapsed_time": "1:43:38", "remaining_time": "6:37:27"} +{"current_steps": 1175, "total_steps": 5676, "loss": 0.7220792770385742, "lr": 1.868539123714331e-05, "epoch": 0.41409691629955947, "percentage": 20.7, "elapsed_time": "1:43:43", "remaining_time": "6:37:21"} +{"current_steps": 1176, "total_steps": 5676, "loss": 0.6279594302177429, "lr": 1.8682502081212104e-05, "epoch": 0.41444933920704846, "percentage": 20.72, "elapsed_time": "1:43:50", "remaining_time": "6:37:19"} +{"current_steps": 1177, "total_steps": 5676, "loss": 0.8814467787742615, "lr": 1.8679609977841646e-05, "epoch": 0.41480176211453745, "percentage": 20.74, "elapsed_time": "1:43:54", "remaining_time": "6:37:11"} +{"current_steps": 1178, "total_steps": 5676, "loss": 0.6601974368095398, "lr": 1.867671492801372e-05, "epoch": 0.41515418502202645, "percentage": 20.75, "elapsed_time": "1:43:59", "remaining_time": "6:37:06"} +{"current_steps": 1179, "total_steps": 5676, "loss": 0.7004785537719727, "lr": 1.8673816932711107e-05, "epoch": 0.41550660792951544, "percentage": 20.77, "elapsed_time": "1:44:04", "remaining_time": "6:36:59"} +{"current_steps": 1180, "total_steps": 5676, "loss": 0.7409330606460571, "lr": 1.8670915992917586e-05, "epoch": 0.41585903083700443, "percentage": 20.79, "elapsed_time": "1:44:10", "remaining_time": "6:36:54"} +{"current_steps": 1181, "total_steps": 5676, "loss": 0.6698065996170044, "lr": 1.8668012109617933e-05, "epoch": 0.41621145374449336, "percentage": 20.81, "elapsed_time": "1:44:15", "remaining_time": "6:36:49"} +{"current_steps": 1182, "total_steps": 5676, "loss": 0.7420671582221985, "lr": 1.8665105283797927e-05, "epoch": 0.41656387665198236, "percentage": 20.82, "elapsed_time": "1:44:19", "remaining_time": "6:36:39"} +{"current_steps": 1183, "total_steps": 5676, "loss": 0.7719774842262268, "lr": 1.8662195516444345e-05, "epoch": 0.41691629955947135, "percentage": 20.84, "elapsed_time": "1:44:24", "remaining_time": "6:36:31"} +{"current_steps": 1184, "total_steps": 5676, "loss": 0.6206108331680298, "lr": 1.8659282808544966e-05, "epoch": 0.41726872246696034, "percentage": 20.86, "elapsed_time": "1:44:30", "remaining_time": "6:36:30"} +{"current_steps": 1185, "total_steps": 5676, "loss": 0.799741268157959, "lr": 1.865636716108856e-05, "epoch": 0.41762114537444933, "percentage": 20.88, "elapsed_time": "1:44:37", "remaining_time": "6:36:29"} +{"current_steps": 1186, "total_steps": 5676, "loss": 0.6839771866798401, "lr": 1.8653448575064893e-05, "epoch": 0.4179735682819383, "percentage": 20.89, "elapsed_time": "1:44:42", "remaining_time": "6:36:24"} +{"current_steps": 1187, "total_steps": 5676, "loss": 0.7937930822372437, "lr": 1.8650527051464744e-05, "epoch": 0.4183259911894273, "percentage": 20.91, "elapsed_time": "1:44:47", "remaining_time": "6:36:18"} +{"current_steps": 1188, "total_steps": 5676, "loss": 0.6819020509719849, "lr": 1.8647602591279873e-05, "epoch": 0.4186784140969163, "percentage": 20.93, "elapsed_time": "1:44:53", "remaining_time": "6:36:15"} +{"current_steps": 1189, "total_steps": 5676, "loss": 0.75614994764328, "lr": 1.864467519550305e-05, "epoch": 0.4190308370044053, "percentage": 20.95, "elapsed_time": "1:44:59", "remaining_time": "6:36:11"} +{"current_steps": 1190, "total_steps": 5676, "loss": 0.6966177225112915, "lr": 1.864174486512803e-05, "epoch": 0.4193832599118943, "percentage": 20.97, "elapsed_time": "1:45:05", "remaining_time": "6:36:08"} +{"current_steps": 1191, "total_steps": 5676, "loss": 0.821509838104248, "lr": 1.8638811601149568e-05, "epoch": 0.4197356828193833, "percentage": 20.98, "elapsed_time": "1:45:09", "remaining_time": "6:35:58"} +{"current_steps": 1192, "total_steps": 5676, "loss": 0.5905138254165649, "lr": 1.8635875404563414e-05, "epoch": 0.4200881057268723, "percentage": 21.0, "elapsed_time": "1:45:15", "remaining_time": "6:35:58"} +{"current_steps": 1193, "total_steps": 5676, "loss": 0.6856247186660767, "lr": 1.8632936276366323e-05, "epoch": 0.4204405286343612, "percentage": 21.02, "elapsed_time": "1:45:21", "remaining_time": "6:35:55"} +{"current_steps": 1194, "total_steps": 5676, "loss": 0.745036244392395, "lr": 1.862999421755603e-05, "epoch": 0.4207929515418502, "percentage": 21.04, "elapsed_time": "1:45:27", "remaining_time": "6:35:51"} +{"current_steps": 1195, "total_steps": 5676, "loss": 0.6503005027770996, "lr": 1.8627049229131276e-05, "epoch": 0.4211453744493392, "percentage": 21.05, "elapsed_time": "1:45:33", "remaining_time": "6:35:48"} +{"current_steps": 1196, "total_steps": 5676, "loss": 0.7498307228088379, "lr": 1.86241013120918e-05, "epoch": 0.4214977973568282, "percentage": 21.07, "elapsed_time": "1:45:37", "remaining_time": "6:35:38"} +{"current_steps": 1197, "total_steps": 5676, "loss": 0.7928652763366699, "lr": 1.862115046743831e-05, "epoch": 0.4218502202643172, "percentage": 21.09, "elapsed_time": "1:45:44", "remaining_time": "6:35:39"} +{"current_steps": 1198, "total_steps": 5676, "loss": 0.6854137182235718, "lr": 1.861819669617254e-05, "epoch": 0.42220264317180617, "percentage": 21.11, "elapsed_time": "1:45:50", "remaining_time": "6:35:37"} +{"current_steps": 1199, "total_steps": 5676, "loss": 0.6196715831756592, "lr": 1.86152399992972e-05, "epoch": 0.42255506607929516, "percentage": 21.12, "elapsed_time": "1:45:56", "remaining_time": "6:35:34"} +{"current_steps": 1200, "total_steps": 5676, "loss": 0.6937464475631714, "lr": 1.8612280377816e-05, "epoch": 0.42290748898678415, "percentage": 21.14, "elapsed_time": "1:46:01", "remaining_time": "6:35:27"} +{"current_steps": 1201, "total_steps": 5676, "loss": 0.7681070566177368, "lr": 1.860931783273363e-05, "epoch": 0.42325991189427314, "percentage": 21.16, "elapsed_time": "1:46:10", "remaining_time": "6:35:35"} +{"current_steps": 1202, "total_steps": 5676, "loss": 0.676302969455719, "lr": 1.860635236505579e-05, "epoch": 0.42361233480176214, "percentage": 21.18, "elapsed_time": "1:46:15", "remaining_time": "6:35:29"} +{"current_steps": 1203, "total_steps": 5676, "loss": 0.6533253192901611, "lr": 1.8603383975789168e-05, "epoch": 0.4239647577092511, "percentage": 21.19, "elapsed_time": "1:46:20", "remaining_time": "6:35:24"} +{"current_steps": 1204, "total_steps": 5676, "loss": 0.689995288848877, "lr": 1.860041266594143e-05, "epoch": 0.42431718061674006, "percentage": 21.21, "elapsed_time": "1:46:26", "remaining_time": "6:35:19"} +{"current_steps": 1205, "total_steps": 5676, "loss": 0.8129922747612, "lr": 1.859743843652124e-05, "epoch": 0.42466960352422906, "percentage": 21.23, "elapsed_time": "1:46:32", "remaining_time": "6:35:19"} +{"current_steps": 1206, "total_steps": 5676, "loss": 0.8388077616691589, "lr": 1.859446128853827e-05, "epoch": 0.42502202643171805, "percentage": 21.25, "elapsed_time": "1:46:39", "remaining_time": "6:35:19"} +{"current_steps": 1207, "total_steps": 5676, "loss": 0.8795225024223328, "lr": 1.859148122300316e-05, "epoch": 0.42537444933920704, "percentage": 21.26, "elapsed_time": "1:46:45", "remaining_time": "6:35:14"} +{"current_steps": 1208, "total_steps": 5676, "loss": 0.7340251803398132, "lr": 1.858849824092755e-05, "epoch": 0.42572687224669603, "percentage": 21.28, "elapsed_time": "1:46:50", "remaining_time": "6:35:08"} +{"current_steps": 1209, "total_steps": 5676, "loss": 0.8028355240821838, "lr": 1.8585512343324073e-05, "epoch": 0.426079295154185, "percentage": 21.3, "elapsed_time": "1:46:55", "remaining_time": "6:35:03"} +{"current_steps": 1210, "total_steps": 5676, "loss": 0.8469998836517334, "lr": 1.8582523531206345e-05, "epoch": 0.426431718061674, "percentage": 21.32, "elapsed_time": "1:47:00", "remaining_time": "6:34:57"} +{"current_steps": 1211, "total_steps": 5676, "loss": 0.7562716007232666, "lr": 1.857953180558898e-05, "epoch": 0.426784140969163, "percentage": 21.34, "elapsed_time": "1:47:04", "remaining_time": "6:34:47"} +{"current_steps": 1212, "total_steps": 5676, "loss": 0.7166177034378052, "lr": 1.857653716748757e-05, "epoch": 0.427136563876652, "percentage": 21.35, "elapsed_time": "1:47:10", "remaining_time": "6:34:44"} +{"current_steps": 1213, "total_steps": 5676, "loss": 0.6946159601211548, "lr": 1.85735396179187e-05, "epoch": 0.427488986784141, "percentage": 21.37, "elapsed_time": "1:47:16", "remaining_time": "6:34:41"} +{"current_steps": 1214, "total_steps": 5676, "loss": 0.5341482758522034, "lr": 1.8570539157899953e-05, "epoch": 0.42784140969163, "percentage": 21.39, "elapsed_time": "1:47:22", "remaining_time": "6:34:39"} +{"current_steps": 1215, "total_steps": 5676, "loss": 0.8128249645233154, "lr": 1.8567535788449886e-05, "epoch": 0.4281938325991189, "percentage": 21.41, "elapsed_time": "1:47:28", "remaining_time": "6:34:36"} +{"current_steps": 1216, "total_steps": 5676, "loss": 0.7136335372924805, "lr": 1.8564529510588046e-05, "epoch": 0.4285462555066079, "percentage": 21.42, "elapsed_time": "1:47:33", "remaining_time": "6:34:30"} +{"current_steps": 1217, "total_steps": 5676, "loss": 0.6737562417984009, "lr": 1.856152032533498e-05, "epoch": 0.4288986784140969, "percentage": 21.44, "elapsed_time": "1:47:39", "remaining_time": "6:34:27"} +{"current_steps": 1218, "total_steps": 5676, "loss": 0.8102772235870361, "lr": 1.855850823371221e-05, "epoch": 0.4292511013215859, "percentage": 21.46, "elapsed_time": "1:47:45", "remaining_time": "6:34:22"} +{"current_steps": 1219, "total_steps": 5676, "loss": 0.7389130592346191, "lr": 1.855549323674224e-05, "epoch": 0.4296035242290749, "percentage": 21.48, "elapsed_time": "1:47:50", "remaining_time": "6:34:19"} +{"current_steps": 1220, "total_steps": 5676, "loss": 0.9127305746078491, "lr": 1.8552475335448575e-05, "epoch": 0.4299559471365639, "percentage": 21.49, "elapsed_time": "1:47:55", "remaining_time": "6:34:12"} +{"current_steps": 1221, "total_steps": 5676, "loss": 0.7599691152572632, "lr": 1.8549454530855697e-05, "epoch": 0.43030837004405287, "percentage": 21.51, "elapsed_time": "1:48:00", "remaining_time": "6:34:05"} +{"current_steps": 1222, "total_steps": 5676, "loss": 0.8343819379806519, "lr": 1.8546430823989075e-05, "epoch": 0.43066079295154186, "percentage": 21.53, "elapsed_time": "1:48:06", "remaining_time": "6:34:02"} +{"current_steps": 1223, "total_steps": 5676, "loss": 0.7759256362915039, "lr": 1.8543404215875163e-05, "epoch": 0.43101321585903085, "percentage": 21.55, "elapsed_time": "1:48:10", "remaining_time": "6:33:53"} +{"current_steps": 1224, "total_steps": 5676, "loss": 0.7803373336791992, "lr": 1.8540374707541398e-05, "epoch": 0.43136563876651984, "percentage": 21.56, "elapsed_time": "1:48:15", "remaining_time": "6:33:47"} +{"current_steps": 1225, "total_steps": 5676, "loss": 0.6292921304702759, "lr": 1.8537342300016208e-05, "epoch": 0.43171806167400884, "percentage": 21.58, "elapsed_time": "1:48:20", "remaining_time": "6:33:40"} +{"current_steps": 1226, "total_steps": 5676, "loss": 0.8495175838470459, "lr": 1.8534306994329e-05, "epoch": 0.43207048458149777, "percentage": 21.6, "elapsed_time": "1:48:26", "remaining_time": "6:33:37"} +{"current_steps": 1227, "total_steps": 5676, "loss": 0.6141406297683716, "lr": 1.8531268791510167e-05, "epoch": 0.43242290748898676, "percentage": 21.62, "elapsed_time": "1:48:32", "remaining_time": "6:33:34"} +{"current_steps": 1228, "total_steps": 5676, "loss": 0.7087793350219727, "lr": 1.8528227692591076e-05, "epoch": 0.43277533039647575, "percentage": 21.63, "elapsed_time": "1:48:38", "remaining_time": "6:33:29"} +{"current_steps": 1229, "total_steps": 5676, "loss": 0.7919498682022095, "lr": 1.8525183698604098e-05, "epoch": 0.43312775330396475, "percentage": 21.65, "elapsed_time": "1:48:43", "remaining_time": "6:33:24"} +{"current_steps": 1230, "total_steps": 5676, "loss": 0.7408226728439331, "lr": 1.8522136810582563e-05, "epoch": 0.43348017621145374, "percentage": 21.67, "elapsed_time": "1:48:48", "remaining_time": "6:33:18"} +{"current_steps": 1231, "total_steps": 5676, "loss": 0.7140083312988281, "lr": 1.85190870295608e-05, "epoch": 0.43383259911894273, "percentage": 21.69, "elapsed_time": "1:48:55", "remaining_time": "6:33:18"} +{"current_steps": 1232, "total_steps": 5676, "loss": 0.7211521863937378, "lr": 1.8516034356574118e-05, "epoch": 0.4341850220264317, "percentage": 21.71, "elapsed_time": "1:49:00", "remaining_time": "6:33:13"} +{"current_steps": 1233, "total_steps": 5676, "loss": 0.7103208303451538, "lr": 1.85129787926588e-05, "epoch": 0.4345374449339207, "percentage": 21.72, "elapsed_time": "1:49:07", "remaining_time": "6:33:13"} +{"current_steps": 1234, "total_steps": 5676, "loss": 0.816985011100769, "lr": 1.850992033885211e-05, "epoch": 0.4348898678414097, "percentage": 21.74, "elapsed_time": "1:49:12", "remaining_time": "6:33:06"} +{"current_steps": 1235, "total_steps": 5676, "loss": 0.6678498983383179, "lr": 1.850685899619231e-05, "epoch": 0.4352422907488987, "percentage": 21.76, "elapsed_time": "1:49:17", "remaining_time": "6:33:01"} +{"current_steps": 1236, "total_steps": 5676, "loss": 0.7895394563674927, "lr": 1.8503794765718622e-05, "epoch": 0.4355947136563877, "percentage": 21.78, "elapsed_time": "1:49:23", "remaining_time": "6:32:56"} +{"current_steps": 1237, "total_steps": 5676, "loss": 0.7295971512794495, "lr": 1.8500727648471258e-05, "epoch": 0.4359471365638766, "percentage": 21.79, "elapsed_time": "1:49:28", "remaining_time": "6:32:52"} +{"current_steps": 1238, "total_steps": 5676, "loss": 0.7216300964355469, "lr": 1.849765764549141e-05, "epoch": 0.4362995594713656, "percentage": 21.81, "elapsed_time": "1:49:33", "remaining_time": "6:32:46"} +{"current_steps": 1239, "total_steps": 5676, "loss": 0.8088986873626709, "lr": 1.8494584757821252e-05, "epoch": 0.4366519823788546, "percentage": 21.83, "elapsed_time": "1:49:40", "remaining_time": "6:32:46"} +{"current_steps": 1240, "total_steps": 5676, "loss": 0.7380663156509399, "lr": 1.8491508986503928e-05, "epoch": 0.4370044052863436, "percentage": 21.85, "elapsed_time": "1:49:46", "remaining_time": "6:32:42"} +{"current_steps": 1241, "total_steps": 5676, "loss": 0.8671622276306152, "lr": 1.8488430332583566e-05, "epoch": 0.4373568281938326, "percentage": 21.86, "elapsed_time": "1:49:52", "remaining_time": "6:32:39"} +{"current_steps": 1242, "total_steps": 5676, "loss": 0.6649274826049805, "lr": 1.8485348797105277e-05, "epoch": 0.4377092511013216, "percentage": 21.88, "elapsed_time": "1:49:59", "remaining_time": "6:32:39"} +{"current_steps": 1243, "total_steps": 5676, "loss": 0.740972638130188, "lr": 1.848226438111515e-05, "epoch": 0.4380616740088106, "percentage": 21.9, "elapsed_time": "1:50:04", "remaining_time": "6:32:35"} +{"current_steps": 1244, "total_steps": 5676, "loss": 0.6593915820121765, "lr": 1.8479177085660237e-05, "epoch": 0.43841409691629957, "percentage": 21.92, "elapsed_time": "1:50:10", "remaining_time": "6:32:30"} +{"current_steps": 1245, "total_steps": 5676, "loss": 0.792604923248291, "lr": 1.8476086911788588e-05, "epoch": 0.43876651982378856, "percentage": 21.93, "elapsed_time": "1:50:14", "remaining_time": "6:32:22"} +{"current_steps": 1246, "total_steps": 5676, "loss": 0.7521885633468628, "lr": 1.8472993860549216e-05, "epoch": 0.43911894273127755, "percentage": 21.95, "elapsed_time": "1:50:18", "remaining_time": "6:32:12"} +{"current_steps": 1247, "total_steps": 5676, "loss": 0.7242246270179749, "lr": 1.846989793299212e-05, "epoch": 0.43947136563876654, "percentage": 21.97, "elapsed_time": "1:50:23", "remaining_time": "6:32:05"} +{"current_steps": 1248, "total_steps": 5676, "loss": 0.7343394160270691, "lr": 1.846679913016827e-05, "epoch": 0.43982378854625553, "percentage": 21.99, "elapsed_time": "1:50:28", "remaining_time": "6:31:59"} +{"current_steps": 1249, "total_steps": 5676, "loss": 0.747876763343811, "lr": 1.846369745312961e-05, "epoch": 0.44017621145374447, "percentage": 22.0, "elapsed_time": "1:50:34", "remaining_time": "6:31:55"} +{"current_steps": 1250, "total_steps": 5676, "loss": 0.7280946969985962, "lr": 1.8460592902929064e-05, "epoch": 0.44052863436123346, "percentage": 22.02, "elapsed_time": "1:50:39", "remaining_time": "6:31:50"} +{"current_steps": 1251, "total_steps": 5676, "loss": 0.7288519144058228, "lr": 1.845748548062053e-05, "epoch": 0.44088105726872245, "percentage": 22.04, "elapsed_time": "1:50:45", "remaining_time": "6:31:45"} +{"current_steps": 1252, "total_steps": 5676, "loss": 0.6269914507865906, "lr": 1.8454375187258885e-05, "epoch": 0.44123348017621145, "percentage": 22.06, "elapsed_time": "1:50:50", "remaining_time": "6:31:38"} +{"current_steps": 1253, "total_steps": 5676, "loss": 0.7848949432373047, "lr": 1.8451262023899973e-05, "epoch": 0.44158590308370044, "percentage": 22.08, "elapsed_time": "1:50:55", "remaining_time": "6:31:32"} +{"current_steps": 1254, "total_steps": 5676, "loss": 0.7306517958641052, "lr": 1.8448145991600618e-05, "epoch": 0.44193832599118943, "percentage": 22.09, "elapsed_time": "1:50:59", "remaining_time": "6:31:22"} +{"current_steps": 1255, "total_steps": 5676, "loss": 0.6933906078338623, "lr": 1.8445027091418614e-05, "epoch": 0.4422907488986784, "percentage": 22.11, "elapsed_time": "1:51:05", "remaining_time": "6:31:19"} +{"current_steps": 1256, "total_steps": 5676, "loss": 0.8260579109191895, "lr": 1.8441905324412732e-05, "epoch": 0.4426431718061674, "percentage": 22.13, "elapsed_time": "1:51:10", "remaining_time": "6:31:15"} +{"current_steps": 1257, "total_steps": 5676, "loss": 0.6818344593048096, "lr": 1.8438780691642712e-05, "epoch": 0.4429955947136564, "percentage": 22.15, "elapsed_time": "1:51:17", "remaining_time": "6:31:14"} +{"current_steps": 1258, "total_steps": 5676, "loss": 0.5980014801025391, "lr": 1.8435653194169274e-05, "epoch": 0.4433480176211454, "percentage": 22.16, "elapsed_time": "1:51:22", "remaining_time": "6:31:08"} +{"current_steps": 1259, "total_steps": 5676, "loss": 0.7694655656814575, "lr": 1.8432522833054102e-05, "epoch": 0.4437004405286344, "percentage": 22.18, "elapsed_time": "1:51:26", "remaining_time": "6:31:00"} +{"current_steps": 1260, "total_steps": 5676, "loss": 0.6861646771430969, "lr": 1.842938960935986e-05, "epoch": 0.4440528634361233, "percentage": 22.2, "elapsed_time": "1:51:32", "remaining_time": "6:30:54"} +{"current_steps": 1261, "total_steps": 5676, "loss": 0.7346323728561401, "lr": 1.8426253524150176e-05, "epoch": 0.4444052863436123, "percentage": 22.22, "elapsed_time": "1:51:37", "remaining_time": "6:30:48"} +{"current_steps": 1262, "total_steps": 5676, "loss": 0.7116265296936035, "lr": 1.8423114578489657e-05, "epoch": 0.4447577092511013, "percentage": 22.23, "elapsed_time": "1:51:41", "remaining_time": "6:30:38"} +{"current_steps": 1263, "total_steps": 5676, "loss": 0.7148594856262207, "lr": 1.8419972773443877e-05, "epoch": 0.4451101321585903, "percentage": 22.25, "elapsed_time": "1:51:48", "remaining_time": "6:30:38"} +{"current_steps": 1264, "total_steps": 5676, "loss": 0.6629737615585327, "lr": 1.8416828110079378e-05, "epoch": 0.4454625550660793, "percentage": 22.27, "elapsed_time": "1:51:53", "remaining_time": "6:30:32"} +{"current_steps": 1265, "total_steps": 5676, "loss": 0.5734454393386841, "lr": 1.8413680589463677e-05, "epoch": 0.4458149779735683, "percentage": 22.29, "elapsed_time": "1:51:59", "remaining_time": "6:30:30"} +{"current_steps": 1266, "total_steps": 5676, "loss": 0.8129212260246277, "lr": 1.8410530212665258e-05, "epoch": 0.4461674008810573, "percentage": 22.3, "elapsed_time": "1:52:05", "remaining_time": "6:30:26"} +{"current_steps": 1267, "total_steps": 5676, "loss": 0.7408754825592041, "lr": 1.8407376980753578e-05, "epoch": 0.44651982378854627, "percentage": 22.32, "elapsed_time": "1:52:11", "remaining_time": "6:30:23"} +{"current_steps": 1268, "total_steps": 5676, "loss": 0.7315034866333008, "lr": 1.840422089479906e-05, "epoch": 0.44687224669603526, "percentage": 22.34, "elapsed_time": "1:52:17", "remaining_time": "6:30:20"} +{"current_steps": 1269, "total_steps": 5676, "loss": 0.6774684190750122, "lr": 1.8401061955873102e-05, "epoch": 0.44722466960352425, "percentage": 22.36, "elapsed_time": "1:52:22", "remaining_time": "6:30:14"} +{"current_steps": 1270, "total_steps": 5676, "loss": 0.6615294814109802, "lr": 1.8397900165048055e-05, "epoch": 0.44757709251101324, "percentage": 22.37, "elapsed_time": "1:52:27", "remaining_time": "6:30:10"} +{"current_steps": 1271, "total_steps": 5676, "loss": 0.6757136583328247, "lr": 1.8394735523397258e-05, "epoch": 0.4479295154185022, "percentage": 22.39, "elapsed_time": "1:52:32", "remaining_time": "6:30:02"} +{"current_steps": 1272, "total_steps": 5676, "loss": 0.6395466327667236, "lr": 1.8391568031995004e-05, "epoch": 0.44828193832599117, "percentage": 22.41, "elapsed_time": "1:52:38", "remaining_time": "6:29:59"} +{"current_steps": 1273, "total_steps": 5676, "loss": 0.6436404585838318, "lr": 1.8388397691916556e-05, "epoch": 0.44863436123348016, "percentage": 22.43, "elapsed_time": "1:52:44", "remaining_time": "6:29:55"} +{"current_steps": 1274, "total_steps": 5676, "loss": 0.6280484199523926, "lr": 1.838522450423815e-05, "epoch": 0.44898678414096915, "percentage": 22.45, "elapsed_time": "1:52:50", "remaining_time": "6:29:52"} +{"current_steps": 1275, "total_steps": 5676, "loss": 0.7485225200653076, "lr": 1.8382048470036983e-05, "epoch": 0.44933920704845814, "percentage": 22.46, "elapsed_time": "1:52:54", "remaining_time": "6:29:45"} +{"current_steps": 1276, "total_steps": 5676, "loss": 0.745079517364502, "lr": 1.8378869590391217e-05, "epoch": 0.44969162995594714, "percentage": 22.48, "elapsed_time": "1:52:59", "remaining_time": "6:29:38"} +{"current_steps": 1277, "total_steps": 5676, "loss": 0.656510591506958, "lr": 1.8375687866379988e-05, "epoch": 0.45004405286343613, "percentage": 22.5, "elapsed_time": "1:53:04", "remaining_time": "6:29:32"} +{"current_steps": 1278, "total_steps": 5676, "loss": 0.7122445106506348, "lr": 1.8372503299083392e-05, "epoch": 0.4503964757709251, "percentage": 22.52, "elapsed_time": "1:53:09", "remaining_time": "6:29:24"} +{"current_steps": 1279, "total_steps": 5676, "loss": 0.7402621507644653, "lr": 1.8369315889582483e-05, "epoch": 0.4507488986784141, "percentage": 22.53, "elapsed_time": "1:53:14", "remaining_time": "6:29:19"} +{"current_steps": 1280, "total_steps": 5676, "loss": 0.79311203956604, "lr": 1.8366125638959292e-05, "epoch": 0.4511013215859031, "percentage": 22.55, "elapsed_time": "1:53:19", "remaining_time": "6:29:10"} +{"current_steps": 1281, "total_steps": 5676, "loss": 0.7748456001281738, "lr": 1.8362932548296815e-05, "epoch": 0.4514537444933921, "percentage": 22.57, "elapsed_time": "1:53:23", "remaining_time": "6:29:03"} +{"current_steps": 1282, "total_steps": 5676, "loss": 0.8285728096961975, "lr": 1.8359736618679e-05, "epoch": 0.45180616740088103, "percentage": 22.59, "elapsed_time": "1:53:30", "remaining_time": "6:29:03"} +{"current_steps": 1283, "total_steps": 5676, "loss": 0.7874733209609985, "lr": 1.835653785119076e-05, "epoch": 0.45215859030837, "percentage": 22.6, "elapsed_time": "1:53:35", "remaining_time": "6:28:57"} +{"current_steps": 1284, "total_steps": 5676, "loss": 0.8984566926956177, "lr": 1.8353336246917996e-05, "epoch": 0.452511013215859, "percentage": 22.62, "elapsed_time": "1:53:40", "remaining_time": "6:28:50"} +{"current_steps": 1285, "total_steps": 5676, "loss": 0.7730413675308228, "lr": 1.8350131806947537e-05, "epoch": 0.452863436123348, "percentage": 22.64, "elapsed_time": "1:53:47", "remaining_time": "6:28:49"} +{"current_steps": 1286, "total_steps": 5676, "loss": 0.6064612865447998, "lr": 1.8346924532367195e-05, "epoch": 0.453215859030837, "percentage": 22.66, "elapsed_time": "1:53:51", "remaining_time": "6:28:41"} +{"current_steps": 1287, "total_steps": 5676, "loss": 0.6946402192115784, "lr": 1.8343714424265742e-05, "epoch": 0.453568281938326, "percentage": 22.67, "elapsed_time": "1:53:58", "remaining_time": "6:28:40"} +{"current_steps": 1288, "total_steps": 5676, "loss": 0.6131751537322998, "lr": 1.8340501483732908e-05, "epoch": 0.453920704845815, "percentage": 22.69, "elapsed_time": "1:54:02", "remaining_time": "6:28:32"} +{"current_steps": 1289, "total_steps": 5676, "loss": 0.7251182794570923, "lr": 1.833728571185938e-05, "epoch": 0.454273127753304, "percentage": 22.71, "elapsed_time": "1:54:07", "remaining_time": "6:28:24"} +{"current_steps": 1290, "total_steps": 5676, "loss": 0.6548069715499878, "lr": 1.8334067109736826e-05, "epoch": 0.45462555066079297, "percentage": 22.73, "elapsed_time": "1:54:13", "remaining_time": "6:28:22"} +{"current_steps": 1291, "total_steps": 5676, "loss": 0.7416098117828369, "lr": 1.833084567845785e-05, "epoch": 0.45497797356828196, "percentage": 22.74, "elapsed_time": "1:54:20", "remaining_time": "6:28:20"} +{"current_steps": 1292, "total_steps": 5676, "loss": 0.7320964932441711, "lr": 1.8327621419116034e-05, "epoch": 0.45533039647577095, "percentage": 22.76, "elapsed_time": "1:54:25", "remaining_time": "6:28:14"} +{"current_steps": 1293, "total_steps": 5676, "loss": 0.5926196575164795, "lr": 1.8324394332805913e-05, "epoch": 0.4556828193832599, "percentage": 22.78, "elapsed_time": "1:54:30", "remaining_time": "6:28:08"} +{"current_steps": 1294, "total_steps": 5676, "loss": 0.5294085741043091, "lr": 1.8321164420622977e-05, "epoch": 0.4560352422907489, "percentage": 22.8, "elapsed_time": "1:54:36", "remaining_time": "6:28:07"} +{"current_steps": 1295, "total_steps": 5676, "loss": 0.6332723498344421, "lr": 1.8317931683663688e-05, "epoch": 0.45638766519823787, "percentage": 22.82, "elapsed_time": "1:54:42", "remaining_time": "6:28:03"} +{"current_steps": 1296, "total_steps": 5676, "loss": 0.8361148834228516, "lr": 1.8314696123025456e-05, "epoch": 0.45674008810572686, "percentage": 22.83, "elapsed_time": "1:54:47", "remaining_time": "6:27:57"} +{"current_steps": 1297, "total_steps": 5676, "loss": 0.8097354173660278, "lr": 1.8311457739806648e-05, "epoch": 0.45709251101321585, "percentage": 22.85, "elapsed_time": "1:54:52", "remaining_time": "6:27:49"} +{"current_steps": 1298, "total_steps": 5676, "loss": 0.8619102239608765, "lr": 1.8308216535106606e-05, "epoch": 0.45744493392070484, "percentage": 22.87, "elapsed_time": "1:54:57", "remaining_time": "6:27:43"} +{"current_steps": 1299, "total_steps": 5676, "loss": 0.8149014711380005, "lr": 1.8304972510025607e-05, "epoch": 0.45779735682819384, "percentage": 22.89, "elapsed_time": "1:55:01", "remaining_time": "6:27:33"} +{"current_steps": 1300, "total_steps": 5676, "loss": 0.6217210292816162, "lr": 1.8301725665664904e-05, "epoch": 0.4581497797356828, "percentage": 22.9, "elapsed_time": "1:55:06", "remaining_time": "6:27:29"} +{"current_steps": 1301, "total_steps": 5676, "loss": 0.7496612071990967, "lr": 1.8298476003126695e-05, "epoch": 0.4585022026431718, "percentage": 22.92, "elapsed_time": "1:55:17", "remaining_time": "6:27:41"} +{"current_steps": 1302, "total_steps": 5676, "loss": 0.743242084980011, "lr": 1.8295223523514144e-05, "epoch": 0.4588546255506608, "percentage": 22.94, "elapsed_time": "1:55:23", "remaining_time": "6:27:37"} +{"current_steps": 1303, "total_steps": 5676, "loss": 0.6425061821937561, "lr": 1.829196822793136e-05, "epoch": 0.4592070484581498, "percentage": 22.96, "elapsed_time": "1:55:28", "remaining_time": "6:27:32"} +{"current_steps": 1304, "total_steps": 5676, "loss": 0.8274835348129272, "lr": 1.828871011748342e-05, "epoch": 0.4595594713656388, "percentage": 22.97, "elapsed_time": "1:55:33", "remaining_time": "6:27:26"} +{"current_steps": 1305, "total_steps": 5676, "loss": 0.6403865814208984, "lr": 1.828544919327635e-05, "epoch": 0.45991189427312773, "percentage": 22.99, "elapsed_time": "1:55:38", "remaining_time": "6:27:21"} +{"current_steps": 1306, "total_steps": 5676, "loss": 0.6585257053375244, "lr": 1.828218545641713e-05, "epoch": 0.4602643171806167, "percentage": 23.01, "elapsed_time": "1:55:45", "remaining_time": "6:27:19"} +{"current_steps": 1307, "total_steps": 5676, "loss": 0.6467862129211426, "lr": 1.82789189080137e-05, "epoch": 0.4606167400881057, "percentage": 23.03, "elapsed_time": "1:55:50", "remaining_time": "6:27:14"} +{"current_steps": 1308, "total_steps": 5676, "loss": 0.8656524419784546, "lr": 1.827564954917495e-05, "epoch": 0.4609691629955947, "percentage": 23.04, "elapsed_time": "1:55:55", "remaining_time": "6:27:07"} +{"current_steps": 1309, "total_steps": 5676, "loss": 0.7298469543457031, "lr": 1.8272377381010726e-05, "epoch": 0.4613215859030837, "percentage": 23.06, "elapsed_time": "1:56:01", "remaining_time": "6:27:05"} +{"current_steps": 1310, "total_steps": 5676, "loss": 0.7342871427536011, "lr": 1.8269102404631826e-05, "epoch": 0.4616740088105727, "percentage": 23.08, "elapsed_time": "1:56:05", "remaining_time": "6:26:55"} +{"current_steps": 1311, "total_steps": 5676, "loss": 0.7437269687652588, "lr": 1.8265824621150005e-05, "epoch": 0.4620264317180617, "percentage": 23.1, "elapsed_time": "1:56:10", "remaining_time": "6:26:50"} +{"current_steps": 1312, "total_steps": 5676, "loss": 0.6761496067047119, "lr": 1.8262544031677965e-05, "epoch": 0.4623788546255507, "percentage": 23.11, "elapsed_time": "1:56:16", "remaining_time": "6:26:45"} +{"current_steps": 1313, "total_steps": 5676, "loss": 0.6504565477371216, "lr": 1.825926063732937e-05, "epoch": 0.46273127753303966, "percentage": 23.13, "elapsed_time": "1:56:21", "remaining_time": "6:26:38"} +{"current_steps": 1314, "total_steps": 5676, "loss": 0.7058892250061035, "lr": 1.8255974439218826e-05, "epoch": 0.46308370044052866, "percentage": 23.15, "elapsed_time": "1:56:27", "remaining_time": "6:26:35"} +{"current_steps": 1315, "total_steps": 5676, "loss": 0.704500675201416, "lr": 1.8252685438461893e-05, "epoch": 0.46343612334801765, "percentage": 23.17, "elapsed_time": "1:56:32", "remaining_time": "6:26:30"} +{"current_steps": 1316, "total_steps": 5676, "loss": 0.7438445091247559, "lr": 1.824939363617509e-05, "epoch": 0.4637885462555066, "percentage": 23.19, "elapsed_time": "1:56:37", "remaining_time": "6:26:24"} +{"current_steps": 1317, "total_steps": 5676, "loss": 0.6610915660858154, "lr": 1.8246099033475872e-05, "epoch": 0.4641409691629956, "percentage": 23.2, "elapsed_time": "1:56:42", "remaining_time": "6:26:17"} +{"current_steps": 1318, "total_steps": 5676, "loss": 0.5868711471557617, "lr": 1.8242801631482666e-05, "epoch": 0.46449339207048457, "percentage": 23.22, "elapsed_time": "1:56:48", "remaining_time": "6:26:15"} +{"current_steps": 1319, "total_steps": 5676, "loss": 0.7403215765953064, "lr": 1.8239501431314828e-05, "epoch": 0.46484581497797356, "percentage": 23.24, "elapsed_time": "1:56:53", "remaining_time": "6:26:05"} +{"current_steps": 1320, "total_steps": 5676, "loss": 0.6836927533149719, "lr": 1.823619843409268e-05, "epoch": 0.46519823788546255, "percentage": 23.26, "elapsed_time": "1:56:57", "remaining_time": "6:25:57"} +{"current_steps": 1321, "total_steps": 5676, "loss": 0.744488537311554, "lr": 1.8232892640937482e-05, "epoch": 0.46555066079295154, "percentage": 23.27, "elapsed_time": "1:57:03", "remaining_time": "6:25:54"} +{"current_steps": 1322, "total_steps": 5676, "loss": 0.8203051090240479, "lr": 1.822958405297145e-05, "epoch": 0.46590308370044053, "percentage": 23.29, "elapsed_time": "1:57:08", "remaining_time": "6:25:47"} +{"current_steps": 1323, "total_steps": 5676, "loss": 0.6452913284301758, "lr": 1.8226272671317747e-05, "epoch": 0.4662555066079295, "percentage": 23.31, "elapsed_time": "1:57:13", "remaining_time": "6:25:43"} +{"current_steps": 1324, "total_steps": 5676, "loss": 0.7362639307975769, "lr": 1.8222958497100482e-05, "epoch": 0.4666079295154185, "percentage": 23.33, "elapsed_time": "1:57:18", "remaining_time": "6:25:35"} +{"current_steps": 1325, "total_steps": 5676, "loss": 0.8192600011825562, "lr": 1.8219641531444713e-05, "epoch": 0.4669603524229075, "percentage": 23.34, "elapsed_time": "1:57:23", "remaining_time": "6:25:29"} +{"current_steps": 1326, "total_steps": 5676, "loss": 0.8391410112380981, "lr": 1.8216321775476452e-05, "epoch": 0.4673127753303965, "percentage": 23.36, "elapsed_time": "1:57:29", "remaining_time": "6:25:26"} +{"current_steps": 1327, "total_steps": 5676, "loss": 0.8723593354225159, "lr": 1.8212999230322648e-05, "epoch": 0.46766519823788544, "percentage": 23.38, "elapsed_time": "1:57:35", "remaining_time": "6:25:22"} +{"current_steps": 1328, "total_steps": 5676, "loss": 0.6891233921051025, "lr": 1.8209673897111208e-05, "epoch": 0.46801762114537443, "percentage": 23.4, "elapsed_time": "1:57:41", "remaining_time": "6:25:19"} +{"current_steps": 1329, "total_steps": 5676, "loss": 0.6585180759429932, "lr": 1.820634577697097e-05, "epoch": 0.4683700440528634, "percentage": 23.41, "elapsed_time": "1:57:48", "remaining_time": "6:25:18"} +{"current_steps": 1330, "total_steps": 5676, "loss": 0.9556418657302856, "lr": 1.8203014871031732e-05, "epoch": 0.4687224669603524, "percentage": 23.43, "elapsed_time": "1:57:53", "remaining_time": "6:25:12"} +{"current_steps": 1331, "total_steps": 5676, "loss": 0.803380012512207, "lr": 1.8199681180424234e-05, "epoch": 0.4690748898678414, "percentage": 23.45, "elapsed_time": "1:57:57", "remaining_time": "6:25:04"} +{"current_steps": 1332, "total_steps": 5676, "loss": 0.7090115547180176, "lr": 1.819634470628016e-05, "epoch": 0.4694273127753304, "percentage": 23.47, "elapsed_time": "1:58:01", "remaining_time": "6:24:53"} +{"current_steps": 1333, "total_steps": 5676, "loss": 0.6314720511436462, "lr": 1.8193005449732134e-05, "epoch": 0.4697797356828194, "percentage": 23.48, "elapsed_time": "1:58:05", "remaining_time": "6:24:45"} +{"current_steps": 1334, "total_steps": 5676, "loss": 0.72248375415802, "lr": 1.8189663411913737e-05, "epoch": 0.4701321585903084, "percentage": 23.5, "elapsed_time": "1:58:09", "remaining_time": "6:24:36"} +{"current_steps": 1335, "total_steps": 5676, "loss": 0.6192474961280823, "lr": 1.818631859395948e-05, "epoch": 0.47048458149779737, "percentage": 23.52, "elapsed_time": "1:58:14", "remaining_time": "6:24:30"} +{"current_steps": 1336, "total_steps": 5676, "loss": 0.6354564428329468, "lr": 1.818297099700483e-05, "epoch": 0.47083700440528636, "percentage": 23.54, "elapsed_time": "1:58:19", "remaining_time": "6:24:23"} +{"current_steps": 1337, "total_steps": 5676, "loss": 0.7577195167541504, "lr": 1.817962062218619e-05, "epoch": 0.47118942731277536, "percentage": 23.56, "elapsed_time": "1:58:25", "remaining_time": "6:24:19"} +{"current_steps": 1338, "total_steps": 5676, "loss": 0.8064994812011719, "lr": 1.8176267470640908e-05, "epoch": 0.4715418502202643, "percentage": 23.57, "elapsed_time": "1:58:31", "remaining_time": "6:24:17"} +{"current_steps": 1339, "total_steps": 5676, "loss": 0.5994154214859009, "lr": 1.8172911543507276e-05, "epoch": 0.4718942731277533, "percentage": 23.59, "elapsed_time": "1:58:37", "remaining_time": "6:24:12"} +{"current_steps": 1340, "total_steps": 5676, "loss": 0.7483634948730469, "lr": 1.8169552841924524e-05, "epoch": 0.4722466960352423, "percentage": 23.61, "elapsed_time": "1:58:41", "remaining_time": "6:24:02"} +{"current_steps": 1341, "total_steps": 5676, "loss": 0.817699134349823, "lr": 1.8166191367032828e-05, "epoch": 0.47259911894273127, "percentage": 23.63, "elapsed_time": "1:58:45", "remaining_time": "6:23:55"} +{"current_steps": 1342, "total_steps": 5676, "loss": 0.7262923717498779, "lr": 1.8162827119973305e-05, "epoch": 0.47295154185022026, "percentage": 23.64, "elapsed_time": "1:58:51", "remaining_time": "6:23:50"} +{"current_steps": 1343, "total_steps": 5676, "loss": 0.6097851991653442, "lr": 1.8159460101888013e-05, "epoch": 0.47330396475770925, "percentage": 23.66, "elapsed_time": "1:58:54", "remaining_time": "6:23:39"} +{"current_steps": 1344, "total_steps": 5676, "loss": 0.7258971929550171, "lr": 1.8156090313919944e-05, "epoch": 0.47365638766519824, "percentage": 23.68, "elapsed_time": "1:59:00", "remaining_time": "6:23:36"} +{"current_steps": 1345, "total_steps": 5676, "loss": 0.6300361156463623, "lr": 1.8152717757213045e-05, "epoch": 0.47400881057268723, "percentage": 23.7, "elapsed_time": "1:59:07", "remaining_time": "6:23:35"} +{"current_steps": 1346, "total_steps": 5676, "loss": 0.7339942455291748, "lr": 1.8149342432912184e-05, "epoch": 0.4743612334801762, "percentage": 23.71, "elapsed_time": "1:59:12", "remaining_time": "6:23:28"} +{"current_steps": 1347, "total_steps": 5676, "loss": 0.7520095109939575, "lr": 1.8145964342163188e-05, "epoch": 0.4747136563876652, "percentage": 23.73, "elapsed_time": "1:59:17", "remaining_time": "6:23:22"} +{"current_steps": 1348, "total_steps": 5676, "loss": 0.7276853322982788, "lr": 1.814258348611281e-05, "epoch": 0.4750660792951542, "percentage": 23.75, "elapsed_time": "1:59:22", "remaining_time": "6:23:15"} +{"current_steps": 1349, "total_steps": 5676, "loss": 0.8004029989242554, "lr": 1.8139199865908742e-05, "epoch": 0.47541850220264315, "percentage": 23.77, "elapsed_time": "1:59:26", "remaining_time": "6:23:06"} +{"current_steps": 1350, "total_steps": 5676, "loss": 0.6932536363601685, "lr": 1.8135813482699623e-05, "epoch": 0.47577092511013214, "percentage": 23.78, "elapsed_time": "1:59:31", "remaining_time": "6:23:00"} +{"current_steps": 1351, "total_steps": 5676, "loss": 0.7697082161903381, "lr": 1.8132424337635026e-05, "epoch": 0.47612334801762113, "percentage": 23.8, "elapsed_time": "1:59:36", "remaining_time": "6:22:54"} +{"current_steps": 1352, "total_steps": 5676, "loss": 0.6472513675689697, "lr": 1.8129032431865453e-05, "epoch": 0.4764757709251101, "percentage": 23.82, "elapsed_time": "1:59:41", "remaining_time": "6:22:49"} +{"current_steps": 1353, "total_steps": 5676, "loss": 0.6483110785484314, "lr": 1.8125637766542353e-05, "epoch": 0.4768281938325991, "percentage": 23.84, "elapsed_time": "1:59:47", "remaining_time": "6:22:45"} +{"current_steps": 1354, "total_steps": 5676, "loss": 0.5495485067367554, "lr": 1.8122240342818113e-05, "epoch": 0.4771806167400881, "percentage": 23.85, "elapsed_time": "1:59:52", "remaining_time": "6:22:38"} +{"current_steps": 1355, "total_steps": 5676, "loss": 0.5235577821731567, "lr": 1.811884016184605e-05, "epoch": 0.4775330396475771, "percentage": 23.87, "elapsed_time": "1:59:59", "remaining_time": "6:22:38"} +{"current_steps": 1356, "total_steps": 5676, "loss": 0.7852121591567993, "lr": 1.811543722478042e-05, "epoch": 0.4778854625550661, "percentage": 23.89, "elapsed_time": "2:00:05", "remaining_time": "6:22:36"} +{"current_steps": 1357, "total_steps": 5676, "loss": 0.6704862713813782, "lr": 1.811203153277641e-05, "epoch": 0.4782378854625551, "percentage": 23.91, "elapsed_time": "2:00:11", "remaining_time": "6:22:32"} +{"current_steps": 1358, "total_steps": 5676, "loss": 0.5964453220367432, "lr": 1.8108623086990156e-05, "epoch": 0.47859030837004407, "percentage": 23.93, "elapsed_time": "2:00:15", "remaining_time": "6:22:24"} +{"current_steps": 1359, "total_steps": 5676, "loss": 0.6697995066642761, "lr": 1.8105211888578708e-05, "epoch": 0.47894273127753306, "percentage": 23.94, "elapsed_time": "2:00:21", "remaining_time": "6:22:18"} +{"current_steps": 1360, "total_steps": 5676, "loss": 0.6335821151733398, "lr": 1.810179793870007e-05, "epoch": 0.479295154185022, "percentage": 23.96, "elapsed_time": "2:00:24", "remaining_time": "6:22:08"} +{"current_steps": 1361, "total_steps": 5676, "loss": 0.7925145626068115, "lr": 1.8098381238513173e-05, "epoch": 0.479647577092511, "percentage": 23.98, "elapsed_time": "2:00:31", "remaining_time": "6:22:08"} +{"current_steps": 1362, "total_steps": 5676, "loss": 0.6567563414573669, "lr": 1.809496178917787e-05, "epoch": 0.48, "percentage": 24.0, "elapsed_time": "2:00:36", "remaining_time": "6:22:01"} +{"current_steps": 1363, "total_steps": 5676, "loss": 0.6318811178207397, "lr": 1.809153959185497e-05, "epoch": 0.480352422907489, "percentage": 24.01, "elapsed_time": "2:00:42", "remaining_time": "6:21:57"} +{"current_steps": 1364, "total_steps": 5676, "loss": 0.7309727668762207, "lr": 1.8088114647706195e-05, "epoch": 0.48070484581497797, "percentage": 24.03, "elapsed_time": "2:00:46", "remaining_time": "6:21:49"} +{"current_steps": 1365, "total_steps": 5676, "loss": 0.7109836339950562, "lr": 1.8084686957894207e-05, "epoch": 0.48105726872246696, "percentage": 24.05, "elapsed_time": "2:00:51", "remaining_time": "6:21:43"} +{"current_steps": 1366, "total_steps": 5676, "loss": 0.7475707530975342, "lr": 1.8081256523582604e-05, "epoch": 0.48140969162995595, "percentage": 24.07, "elapsed_time": "2:00:57", "remaining_time": "6:21:39"} +{"current_steps": 1367, "total_steps": 5676, "loss": 0.7149914503097534, "lr": 1.8077823345935904e-05, "epoch": 0.48176211453744494, "percentage": 24.08, "elapsed_time": "2:01:03", "remaining_time": "6:21:34"} +{"current_steps": 1368, "total_steps": 5676, "loss": 0.7294478416442871, "lr": 1.8074387426119574e-05, "epoch": 0.48211453744493393, "percentage": 24.1, "elapsed_time": "2:01:09", "remaining_time": "6:21:31"} +{"current_steps": 1369, "total_steps": 5676, "loss": 0.7115635871887207, "lr": 1.8070948765299995e-05, "epoch": 0.4824669603524229, "percentage": 24.12, "elapsed_time": "2:01:13", "remaining_time": "6:21:22"} +{"current_steps": 1370, "total_steps": 5676, "loss": 0.7046270966529846, "lr": 1.806750736464449e-05, "epoch": 0.4828193832599119, "percentage": 24.14, "elapsed_time": "2:01:18", "remaining_time": "6:21:16"} +{"current_steps": 1371, "total_steps": 5676, "loss": 0.6206589937210083, "lr": 1.8064063225321305e-05, "epoch": 0.4831718061674009, "percentage": 24.15, "elapsed_time": "2:01:23", "remaining_time": "6:21:11"} +{"current_steps": 1372, "total_steps": 5676, "loss": 0.7135940194129944, "lr": 1.8060616348499612e-05, "epoch": 0.48352422907488984, "percentage": 24.17, "elapsed_time": "2:01:30", "remaining_time": "6:21:11"} +{"current_steps": 1373, "total_steps": 5676, "loss": 0.8360849618911743, "lr": 1.8057166735349533e-05, "epoch": 0.48387665198237884, "percentage": 24.19, "elapsed_time": "2:01:36", "remaining_time": "6:21:07"} +{"current_steps": 1374, "total_steps": 5676, "loss": 0.6842360496520996, "lr": 1.805371438704209e-05, "epoch": 0.4842290748898678, "percentage": 24.21, "elapsed_time": "2:01:41", "remaining_time": "6:21:02"} +{"current_steps": 1375, "total_steps": 5676, "loss": 0.7615031003952026, "lr": 1.8050259304749254e-05, "epoch": 0.4845814977973568, "percentage": 24.22, "elapsed_time": "2:01:47", "remaining_time": "6:20:58"} +{"current_steps": 1376, "total_steps": 5676, "loss": 0.8019323348999023, "lr": 1.804680148964392e-05, "epoch": 0.4849339207048458, "percentage": 24.24, "elapsed_time": "2:01:52", "remaining_time": "6:20:50"} +{"current_steps": 1377, "total_steps": 5676, "loss": 0.7882958054542542, "lr": 1.8043340942899906e-05, "epoch": 0.4852863436123348, "percentage": 24.26, "elapsed_time": "2:01:57", "remaining_time": "6:20:46"} +{"current_steps": 1378, "total_steps": 5676, "loss": 0.7504314184188843, "lr": 1.8039877665691955e-05, "epoch": 0.4856387665198238, "percentage": 24.28, "elapsed_time": "2:02:03", "remaining_time": "6:20:42"} +{"current_steps": 1379, "total_steps": 5676, "loss": 0.6634547710418701, "lr": 1.803641165919575e-05, "epoch": 0.4859911894273128, "percentage": 24.3, "elapsed_time": "2:02:07", "remaining_time": "6:20:32"} +{"current_steps": 1380, "total_steps": 5676, "loss": 0.7744965553283691, "lr": 1.803294292458789e-05, "epoch": 0.4863436123348018, "percentage": 24.31, "elapsed_time": "2:02:14", "remaining_time": "6:20:31"} +{"current_steps": 1381, "total_steps": 5676, "loss": 0.6322098970413208, "lr": 1.8029471463045904e-05, "epoch": 0.48669603524229077, "percentage": 24.33, "elapsed_time": "2:02:19", "remaining_time": "6:20:26"} +{"current_steps": 1382, "total_steps": 5676, "loss": 0.7675940990447998, "lr": 1.8025997275748237e-05, "epoch": 0.48704845814977976, "percentage": 24.35, "elapsed_time": "2:02:24", "remaining_time": "6:20:19"} +{"current_steps": 1383, "total_steps": 5676, "loss": 0.805001974105835, "lr": 1.8022520363874275e-05, "epoch": 0.4874008810572687, "percentage": 24.37, "elapsed_time": "2:02:29", "remaining_time": "6:20:13"} +{"current_steps": 1384, "total_steps": 5676, "loss": 0.7647902369499207, "lr": 1.8019040728604322e-05, "epoch": 0.4877533039647577, "percentage": 24.38, "elapsed_time": "2:02:34", "remaining_time": "6:20:07"} +{"current_steps": 1385, "total_steps": 5676, "loss": 0.7267208099365234, "lr": 1.8015558371119604e-05, "epoch": 0.4881057268722467, "percentage": 24.4, "elapsed_time": "2:02:38", "remaining_time": "6:19:59"} +{"current_steps": 1386, "total_steps": 5676, "loss": 0.9259899854660034, "lr": 1.801207329260227e-05, "epoch": 0.4884581497797357, "percentage": 24.42, "elapsed_time": "2:02:44", "remaining_time": "6:19:53"} +{"current_steps": 1387, "total_steps": 5676, "loss": 0.7127895951271057, "lr": 1.8008585494235398e-05, "epoch": 0.48881057268722466, "percentage": 24.44, "elapsed_time": "2:02:49", "remaining_time": "6:19:47"} +{"current_steps": 1388, "total_steps": 5676, "loss": 0.5890867710113525, "lr": 1.8005094977202987e-05, "epoch": 0.48916299559471366, "percentage": 24.45, "elapsed_time": "2:02:53", "remaining_time": "6:19:40"} +{"current_steps": 1389, "total_steps": 5676, "loss": 0.9388052225112915, "lr": 1.800160174268996e-05, "epoch": 0.48951541850220265, "percentage": 24.47, "elapsed_time": "2:02:58", "remaining_time": "6:19:32"} +{"current_steps": 1390, "total_steps": 5676, "loss": 0.7282747626304626, "lr": 1.799810579188216e-05, "epoch": 0.48986784140969164, "percentage": 24.49, "elapsed_time": "2:03:04", "remaining_time": "6:19:29"} +{"current_steps": 1391, "total_steps": 5676, "loss": 0.743558943271637, "lr": 1.7994607125966354e-05, "epoch": 0.49022026431718063, "percentage": 24.51, "elapsed_time": "2:03:09", "remaining_time": "6:19:23"} +{"current_steps": 1392, "total_steps": 5676, "loss": 0.794719934463501, "lr": 1.7991105746130234e-05, "epoch": 0.4905726872246696, "percentage": 24.52, "elapsed_time": "2:03:14", "remaining_time": "6:19:18"} +{"current_steps": 1393, "total_steps": 5676, "loss": 0.7320685982704163, "lr": 1.7987601653562402e-05, "epoch": 0.4909251101321586, "percentage": 24.54, "elapsed_time": "2:03:19", "remaining_time": "6:19:11"} +{"current_steps": 1394, "total_steps": 5676, "loss": 0.7376105785369873, "lr": 1.798409484945239e-05, "epoch": 0.49127753303964755, "percentage": 24.56, "elapsed_time": "2:03:23", "remaining_time": "6:19:02"} +{"current_steps": 1395, "total_steps": 5676, "loss": 0.7474706172943115, "lr": 1.7980585334990652e-05, "epoch": 0.49162995594713654, "percentage": 24.58, "elapsed_time": "2:03:30", "remaining_time": "6:19:01"} +{"current_steps": 1396, "total_steps": 5676, "loss": 0.5799805521965027, "lr": 1.797707311136856e-05, "epoch": 0.49198237885462553, "percentage": 24.59, "elapsed_time": "2:03:36", "remaining_time": "6:18:57"} +{"current_steps": 1397, "total_steps": 5676, "loss": 0.7515959739685059, "lr": 1.79735581797784e-05, "epoch": 0.4923348017621145, "percentage": 24.61, "elapsed_time": "2:03:42", "remaining_time": "6:18:55"} +{"current_steps": 1398, "total_steps": 5676, "loss": 0.6035799980163574, "lr": 1.797004054141339e-05, "epoch": 0.4926872246696035, "percentage": 24.63, "elapsed_time": "2:03:46", "remaining_time": "6:18:47"} +{"current_steps": 1399, "total_steps": 5676, "loss": 0.7613668441772461, "lr": 1.796652019746765e-05, "epoch": 0.4930396475770925, "percentage": 24.65, "elapsed_time": "2:03:53", "remaining_time": "6:18:44"} +{"current_steps": 1400, "total_steps": 5676, "loss": 0.8780882954597473, "lr": 1.7962997149136226e-05, "epoch": 0.4933920704845815, "percentage": 24.67, "elapsed_time": "2:03:57", "remaining_time": "6:18:36"} +{"current_steps": 1401, "total_steps": 5676, "loss": 0.8661091327667236, "lr": 1.795947139761509e-05, "epoch": 0.4937444933920705, "percentage": 24.68, "elapsed_time": "2:04:07", "remaining_time": "6:18:45"} +{"current_steps": 1402, "total_steps": 5676, "loss": 0.6893571019172668, "lr": 1.7955942944101124e-05, "epoch": 0.4940969162995595, "percentage": 24.7, "elapsed_time": "2:04:12", "remaining_time": "6:18:40"} +{"current_steps": 1403, "total_steps": 5676, "loss": 0.787032961845398, "lr": 1.7952411789792125e-05, "epoch": 0.4944493392070485, "percentage": 24.72, "elapsed_time": "2:04:19", "remaining_time": "6:18:37"} +{"current_steps": 1404, "total_steps": 5676, "loss": 0.5346347689628601, "lr": 1.7948877935886812e-05, "epoch": 0.49480176211453747, "percentage": 24.74, "elapsed_time": "2:04:24", "remaining_time": "6:18:32"} +{"current_steps": 1405, "total_steps": 5676, "loss": 0.8090060949325562, "lr": 1.7945341383584818e-05, "epoch": 0.4951541850220264, "percentage": 24.75, "elapsed_time": "2:04:29", "remaining_time": "6:18:26"} +{"current_steps": 1406, "total_steps": 5676, "loss": 0.6321496963500977, "lr": 1.7941802134086695e-05, "epoch": 0.4955066079295154, "percentage": 24.77, "elapsed_time": "2:04:35", "remaining_time": "6:18:23"} +{"current_steps": 1407, "total_steps": 5676, "loss": 0.6405632495880127, "lr": 1.7938260188593903e-05, "epoch": 0.4958590308370044, "percentage": 24.79, "elapsed_time": "2:04:39", "remaining_time": "6:18:13"} +{"current_steps": 1408, "total_steps": 5676, "loss": 0.7665356397628784, "lr": 1.7934715548308825e-05, "epoch": 0.4962114537444934, "percentage": 24.81, "elapsed_time": "2:04:44", "remaining_time": "6:18:07"} +{"current_steps": 1409, "total_steps": 5676, "loss": 0.7960416078567505, "lr": 1.7931168214434757e-05, "epoch": 0.49656387665198237, "percentage": 24.82, "elapsed_time": "2:04:48", "remaining_time": "6:17:59"} +{"current_steps": 1410, "total_steps": 5676, "loss": 0.8080639839172363, "lr": 1.7927618188175908e-05, "epoch": 0.49691629955947136, "percentage": 24.84, "elapsed_time": "2:04:53", "remaining_time": "6:17:52"} +{"current_steps": 1411, "total_steps": 5676, "loss": 0.6503266096115112, "lr": 1.79240654707374e-05, "epoch": 0.49726872246696036, "percentage": 24.86, "elapsed_time": "2:05:00", "remaining_time": "6:17:50"} +{"current_steps": 1412, "total_steps": 5676, "loss": 0.6063007116317749, "lr": 1.792051006332527e-05, "epoch": 0.49762114537444935, "percentage": 24.88, "elapsed_time": "2:05:05", "remaining_time": "6:17:44"} +{"current_steps": 1413, "total_steps": 5676, "loss": 0.6861660480499268, "lr": 1.791695196714647e-05, "epoch": 0.49797356828193834, "percentage": 24.89, "elapsed_time": "2:05:10", "remaining_time": "6:17:38"} +{"current_steps": 1414, "total_steps": 5676, "loss": 0.7064980268478394, "lr": 1.791339118340886e-05, "epoch": 0.49832599118942733, "percentage": 24.91, "elapsed_time": "2:05:16", "remaining_time": "6:17:35"} +{"current_steps": 1415, "total_steps": 5676, "loss": 0.6102496981620789, "lr": 1.7909827713321214e-05, "epoch": 0.4986784140969163, "percentage": 24.93, "elapsed_time": "2:05:21", "remaining_time": "6:17:28"} +{"current_steps": 1416, "total_steps": 5676, "loss": 0.7460618019104004, "lr": 1.790626155809323e-05, "epoch": 0.49903083700440526, "percentage": 24.95, "elapsed_time": "2:05:27", "remaining_time": "6:17:24"} +{"current_steps": 1417, "total_steps": 5676, "loss": 0.7124448418617249, "lr": 1.7902692718935496e-05, "epoch": 0.49938325991189425, "percentage": 24.96, "elapsed_time": "2:05:32", "remaining_time": "6:17:19"} +{"current_steps": 1418, "total_steps": 5676, "loss": 0.7194923162460327, "lr": 1.7899121197059525e-05, "epoch": 0.49973568281938324, "percentage": 24.98, "elapsed_time": "2:05:37", "remaining_time": "6:17:13"} +{"current_steps": 1419, "total_steps": 5676, "loss": 0.6633901596069336, "lr": 1.7895546993677736e-05, "epoch": 0.5000881057268722, "percentage": 25.0, "elapsed_time": "2:05:41", "remaining_time": "6:17:05"} +{"current_steps": 1420, "total_steps": 5676, "loss": 0.8554216623306274, "lr": 1.7891970110003463e-05, "epoch": 0.5004405286343613, "percentage": 25.02, "elapsed_time": "2:05:47", "remaining_time": "6:17:00"} +{"current_steps": 1421, "total_steps": 5676, "loss": 0.7259502410888672, "lr": 1.7888390547250944e-05, "epoch": 0.5007929515418502, "percentage": 25.04, "elapsed_time": "2:05:53", "remaining_time": "6:16:57"} +{"current_steps": 1422, "total_steps": 5676, "loss": 0.7330816984176636, "lr": 1.788480830663533e-05, "epoch": 0.5011453744493392, "percentage": 25.05, "elapsed_time": "2:05:58", "remaining_time": "6:16:51"} +{"current_steps": 1423, "total_steps": 5676, "loss": 0.7793391346931458, "lr": 1.7881223389372678e-05, "epoch": 0.5014977973568282, "percentage": 25.07, "elapsed_time": "2:06:04", "remaining_time": "6:16:48"} +{"current_steps": 1424, "total_steps": 5676, "loss": 0.7387483716011047, "lr": 1.787763579667996e-05, "epoch": 0.5018502202643171, "percentage": 25.09, "elapsed_time": "2:06:11", "remaining_time": "6:16:47"} +{"current_steps": 1425, "total_steps": 5676, "loss": 0.6665850877761841, "lr": 1.787404552977505e-05, "epoch": 0.5022026431718062, "percentage": 25.11, "elapsed_time": "2:06:17", "remaining_time": "6:16:45"} +{"current_steps": 1426, "total_steps": 5676, "loss": 0.7487791180610657, "lr": 1.7870452589876733e-05, "epoch": 0.5025550660792951, "percentage": 25.12, "elapsed_time": "2:06:22", "remaining_time": "6:16:39"} +{"current_steps": 1427, "total_steps": 5676, "loss": 0.6048247814178467, "lr": 1.78668569782047e-05, "epoch": 0.5029074889867842, "percentage": 25.14, "elapsed_time": "2:06:27", "remaining_time": "6:16:31"} +{"current_steps": 1428, "total_steps": 5676, "loss": 0.7196261882781982, "lr": 1.786325869597955e-05, "epoch": 0.5032599118942731, "percentage": 25.16, "elapsed_time": "2:06:31", "remaining_time": "6:16:24"} +{"current_steps": 1429, "total_steps": 5676, "loss": 0.6845135688781738, "lr": 1.785965774442278e-05, "epoch": 0.5036123348017622, "percentage": 25.18, "elapsed_time": "2:06:36", "remaining_time": "6:16:15"} +{"current_steps": 1430, "total_steps": 5676, "loss": 0.7314398288726807, "lr": 1.785605412475681e-05, "epoch": 0.5039647577092511, "percentage": 25.19, "elapsed_time": "2:06:40", "remaining_time": "6:16:08"} +{"current_steps": 1431, "total_steps": 5676, "loss": 0.7171268463134766, "lr": 1.7852447838204957e-05, "epoch": 0.5043171806167401, "percentage": 25.21, "elapsed_time": "2:06:46", "remaining_time": "6:16:04"} +{"current_steps": 1432, "total_steps": 5676, "loss": 0.8349916338920593, "lr": 1.784883888599144e-05, "epoch": 0.5046696035242291, "percentage": 25.23, "elapsed_time": "2:06:50", "remaining_time": "6:15:56"} +{"current_steps": 1433, "total_steps": 5676, "loss": 0.6375530958175659, "lr": 1.7845227269341387e-05, "epoch": 0.505022026431718, "percentage": 25.25, "elapsed_time": "2:06:56", "remaining_time": "6:15:53"} +{"current_steps": 1434, "total_steps": 5676, "loss": 0.8156824707984924, "lr": 1.7841612989480824e-05, "epoch": 0.505374449339207, "percentage": 25.26, "elapsed_time": "2:07:01", "remaining_time": "6:15:45"} +{"current_steps": 1435, "total_steps": 5676, "loss": 0.7186283469200134, "lr": 1.7837996047636696e-05, "epoch": 0.505726872246696, "percentage": 25.28, "elapsed_time": "2:07:05", "remaining_time": "6:15:35"} +{"current_steps": 1436, "total_steps": 5676, "loss": 0.6130756139755249, "lr": 1.7834376445036834e-05, "epoch": 0.506079295154185, "percentage": 25.3, "elapsed_time": "2:07:09", "remaining_time": "6:15:27"} +{"current_steps": 1437, "total_steps": 5676, "loss": 0.6948508024215698, "lr": 1.7830754182909985e-05, "epoch": 0.506431718061674, "percentage": 25.32, "elapsed_time": "2:07:14", "remaining_time": "6:15:22"} +{"current_steps": 1438, "total_steps": 5676, "loss": 0.7049688100814819, "lr": 1.7827129262485793e-05, "epoch": 0.506784140969163, "percentage": 25.33, "elapsed_time": "2:07:20", "remaining_time": "6:15:18"} +{"current_steps": 1439, "total_steps": 5676, "loss": 0.7491527795791626, "lr": 1.7823501684994805e-05, "epoch": 0.507136563876652, "percentage": 25.35, "elapsed_time": "2:07:25", "remaining_time": "6:15:12"} +{"current_steps": 1440, "total_steps": 5676, "loss": 0.8718780279159546, "lr": 1.781987145166847e-05, "epoch": 0.507488986784141, "percentage": 25.37, "elapsed_time": "2:07:30", "remaining_time": "6:15:05"} +{"current_steps": 1441, "total_steps": 5676, "loss": 0.5675592422485352, "lr": 1.7816238563739144e-05, "epoch": 0.5078414096916299, "percentage": 25.39, "elapsed_time": "2:07:35", "remaining_time": "6:14:59"} +{"current_steps": 1442, "total_steps": 5676, "loss": 0.7472085952758789, "lr": 1.7812603022440076e-05, "epoch": 0.508193832599119, "percentage": 25.41, "elapsed_time": "2:07:39", "remaining_time": "6:14:49"} +{"current_steps": 1443, "total_steps": 5676, "loss": 0.7673810720443726, "lr": 1.7808964829005416e-05, "epoch": 0.5085462555066079, "percentage": 25.42, "elapsed_time": "2:07:45", "remaining_time": "6:14:46"} +{"current_steps": 1444, "total_steps": 5676, "loss": 0.8245630264282227, "lr": 1.7805323984670224e-05, "epoch": 0.5088986784140969, "percentage": 25.44, "elapsed_time": "2:07:49", "remaining_time": "6:14:37"} +{"current_steps": 1445, "total_steps": 5676, "loss": 0.8578429222106934, "lr": 1.780168049067045e-05, "epoch": 0.5092511013215859, "percentage": 25.46, "elapsed_time": "2:07:54", "remaining_time": "6:14:31"} +{"current_steps": 1446, "total_steps": 5676, "loss": 0.6631708145141602, "lr": 1.7798034348242944e-05, "epoch": 0.5096035242290748, "percentage": 25.48, "elapsed_time": "2:08:01", "remaining_time": "6:14:31"} +{"current_steps": 1447, "total_steps": 5676, "loss": 0.8106615543365479, "lr": 1.779438555862546e-05, "epoch": 0.5099559471365639, "percentage": 25.49, "elapsed_time": "2:08:06", "remaining_time": "6:14:25"} +{"current_steps": 1448, "total_steps": 5676, "loss": 0.7033256888389587, "lr": 1.7790734123056654e-05, "epoch": 0.5103083700440528, "percentage": 25.51, "elapsed_time": "2:08:11", "remaining_time": "6:14:19"} +{"current_steps": 1449, "total_steps": 5676, "loss": 0.7124278545379639, "lr": 1.7787080042776065e-05, "epoch": 0.5106607929515419, "percentage": 25.53, "elapsed_time": "2:08:18", "remaining_time": "6:14:16"} +{"current_steps": 1450, "total_steps": 5676, "loss": 0.7834827899932861, "lr": 1.7783423319024144e-05, "epoch": 0.5110132158590308, "percentage": 25.55, "elapsed_time": "2:08:23", "remaining_time": "6:14:10"} +{"current_steps": 1451, "total_steps": 5676, "loss": 0.6762892603874207, "lr": 1.777976395304224e-05, "epoch": 0.5113656387665199, "percentage": 25.56, "elapsed_time": "2:08:28", "remaining_time": "6:14:06"} +{"current_steps": 1452, "total_steps": 5676, "loss": 0.7317261695861816, "lr": 1.7776101946072586e-05, "epoch": 0.5117180616740088, "percentage": 25.58, "elapsed_time": "2:08:33", "remaining_time": "6:14:00"} +{"current_steps": 1453, "total_steps": 5676, "loss": 0.6278417110443115, "lr": 1.7772437299358324e-05, "epoch": 0.5120704845814978, "percentage": 25.6, "elapsed_time": "2:08:38", "remaining_time": "6:13:53"} +{"current_steps": 1454, "total_steps": 5676, "loss": 0.6638025045394897, "lr": 1.7768770014143485e-05, "epoch": 0.5124229074889868, "percentage": 25.62, "elapsed_time": "2:08:44", "remaining_time": "6:13:49"} +{"current_steps": 1455, "total_steps": 5676, "loss": 0.786564826965332, "lr": 1.7765100091673e-05, "epoch": 0.5127753303964758, "percentage": 25.63, "elapsed_time": "2:08:50", "remaining_time": "6:13:45"} +{"current_steps": 1456, "total_steps": 5676, "loss": 0.7483570575714111, "lr": 1.776142753319269e-05, "epoch": 0.5131277533039648, "percentage": 25.65, "elapsed_time": "2:08:56", "remaining_time": "6:13:41"} +{"current_steps": 1457, "total_steps": 5676, "loss": 0.7036221027374268, "lr": 1.7757752339949284e-05, "epoch": 0.5134801762114537, "percentage": 25.67, "elapsed_time": "2:09:00", "remaining_time": "6:13:33"} +{"current_steps": 1458, "total_steps": 5676, "loss": 0.6903718709945679, "lr": 1.7754074513190384e-05, "epoch": 0.5138325991189427, "percentage": 25.69, "elapsed_time": "2:09:05", "remaining_time": "6:13:28"} +{"current_steps": 1459, "total_steps": 5676, "loss": 0.7728221416473389, "lr": 1.77503940541645e-05, "epoch": 0.5141850220264317, "percentage": 25.7, "elapsed_time": "2:09:11", "remaining_time": "6:13:25"} +{"current_steps": 1460, "total_steps": 5676, "loss": 0.7127183675765991, "lr": 1.774671096412104e-05, "epoch": 0.5145374449339207, "percentage": 25.72, "elapsed_time": "2:09:17", "remaining_time": "6:13:19"} +{"current_steps": 1461, "total_steps": 5676, "loss": 0.7801295518875122, "lr": 1.7743025244310293e-05, "epoch": 0.5148898678414097, "percentage": 25.74, "elapsed_time": "2:09:21", "remaining_time": "6:13:12"} +{"current_steps": 1462, "total_steps": 5676, "loss": 0.632892906665802, "lr": 1.773933689598345e-05, "epoch": 0.5152422907488987, "percentage": 25.76, "elapsed_time": "2:09:28", "remaining_time": "6:13:10"} +{"current_steps": 1463, "total_steps": 5676, "loss": 0.7347458600997925, "lr": 1.7735645920392587e-05, "epoch": 0.5155947136563876, "percentage": 25.78, "elapsed_time": "2:09:33", "remaining_time": "6:13:05"} +{"current_steps": 1464, "total_steps": 5676, "loss": 0.6705365777015686, "lr": 1.7731952318790673e-05, "epoch": 0.5159471365638767, "percentage": 25.79, "elapsed_time": "2:09:37", "remaining_time": "6:12:56"} +{"current_steps": 1465, "total_steps": 5676, "loss": 0.696006715297699, "lr": 1.7728256092431577e-05, "epoch": 0.5162995594713656, "percentage": 25.81, "elapsed_time": "2:09:42", "remaining_time": "6:12:49"} +{"current_steps": 1466, "total_steps": 5676, "loss": 0.5922254323959351, "lr": 1.7724557242570045e-05, "epoch": 0.5166519823788547, "percentage": 25.83, "elapsed_time": "2:09:48", "remaining_time": "6:12:46"} +{"current_steps": 1467, "total_steps": 5676, "loss": 0.6162985563278198, "lr": 1.7720855770461733e-05, "epoch": 0.5170044052863436, "percentage": 25.85, "elapsed_time": "2:09:54", "remaining_time": "6:12:44"} +{"current_steps": 1468, "total_steps": 5676, "loss": 0.7319275140762329, "lr": 1.7717151677363164e-05, "epoch": 0.5173568281938326, "percentage": 25.86, "elapsed_time": "2:09:59", "remaining_time": "6:12:37"} +{"current_steps": 1469, "total_steps": 5676, "loss": 0.7349969148635864, "lr": 1.771344496453177e-05, "epoch": 0.5177092511013216, "percentage": 25.88, "elapsed_time": "2:10:05", "remaining_time": "6:12:33"} +{"current_steps": 1470, "total_steps": 5676, "loss": 0.8153162598609924, "lr": 1.7709735633225863e-05, "epoch": 0.5180616740088105, "percentage": 25.9, "elapsed_time": "2:10:10", "remaining_time": "6:12:28"} +{"current_steps": 1471, "total_steps": 5676, "loss": 0.6582974195480347, "lr": 1.7706023684704642e-05, "epoch": 0.5184140969162996, "percentage": 25.92, "elapsed_time": "2:10:15", "remaining_time": "6:12:21"} +{"current_steps": 1472, "total_steps": 5676, "loss": 0.696917712688446, "lr": 1.77023091202282e-05, "epoch": 0.5187665198237885, "percentage": 25.93, "elapsed_time": "2:10:20", "remaining_time": "6:12:15"} +{"current_steps": 1473, "total_steps": 5676, "loss": 0.6281285285949707, "lr": 1.769859194105752e-05, "epoch": 0.5191189427312776, "percentage": 25.95, "elapsed_time": "2:10:26", "remaining_time": "6:12:12"} +{"current_steps": 1474, "total_steps": 5676, "loss": 0.7687089443206787, "lr": 1.7694872148454463e-05, "epoch": 0.5194713656387665, "percentage": 25.97, "elapsed_time": "2:10:32", "remaining_time": "6:12:07"} +{"current_steps": 1475, "total_steps": 5676, "loss": 0.6928491592407227, "lr": 1.7691149743681783e-05, "epoch": 0.5198237885462555, "percentage": 25.99, "elapsed_time": "2:10:37", "remaining_time": "6:12:02"} +{"current_steps": 1476, "total_steps": 5676, "loss": 0.63843834400177, "lr": 1.7687424728003126e-05, "epoch": 0.5201762114537445, "percentage": 26.0, "elapsed_time": "2:10:42", "remaining_time": "6:11:55"} +{"current_steps": 1477, "total_steps": 5676, "loss": 0.8987904787063599, "lr": 1.7683697102683012e-05, "epoch": 0.5205286343612335, "percentage": 26.02, "elapsed_time": "2:10:47", "remaining_time": "6:11:48"} +{"current_steps": 1478, "total_steps": 5676, "loss": 0.7027539014816284, "lr": 1.767996686898686e-05, "epoch": 0.5208810572687225, "percentage": 26.04, "elapsed_time": "2:10:52", "remaining_time": "6:11:44"} +{"current_steps": 1479, "total_steps": 5676, "loss": 0.8490183353424072, "lr": 1.7676234028180964e-05, "epoch": 0.5212334801762114, "percentage": 26.06, "elapsed_time": "2:10:56", "remaining_time": "6:11:34"} +{"current_steps": 1480, "total_steps": 5676, "loss": 0.5885729789733887, "lr": 1.7672498581532508e-05, "epoch": 0.5215859030837005, "percentage": 26.07, "elapsed_time": "2:11:01", "remaining_time": "6:11:27"} +{"current_steps": 1481, "total_steps": 5676, "loss": 0.627627968788147, "lr": 1.766876053030956e-05, "epoch": 0.5219383259911894, "percentage": 26.09, "elapsed_time": "2:11:06", "remaining_time": "6:11:23"} +{"current_steps": 1482, "total_steps": 5676, "loss": 0.6441413164138794, "lr": 1.766501987578108e-05, "epoch": 0.5222907488986784, "percentage": 26.11, "elapsed_time": "2:11:12", "remaining_time": "6:11:19"} +{"current_steps": 1483, "total_steps": 5676, "loss": 0.6199722290039062, "lr": 1.7661276619216888e-05, "epoch": 0.5226431718061674, "percentage": 26.13, "elapsed_time": "2:11:17", "remaining_time": "6:11:11"} +{"current_steps": 1484, "total_steps": 5676, "loss": 0.6364887952804565, "lr": 1.7657530761887715e-05, "epoch": 0.5229955947136564, "percentage": 26.15, "elapsed_time": "2:11:22", "remaining_time": "6:11:06"} +{"current_steps": 1485, "total_steps": 5676, "loss": 0.7279890775680542, "lr": 1.7653782305065158e-05, "epoch": 0.5233480176211454, "percentage": 26.16, "elapsed_time": "2:11:28", "remaining_time": "6:11:03"} +{"current_steps": 1486, "total_steps": 5676, "loss": 0.6552719473838806, "lr": 1.7650031250021704e-05, "epoch": 0.5237004405286344, "percentage": 26.18, "elapsed_time": "2:11:34", "remaining_time": "6:10:58"} +{"current_steps": 1487, "total_steps": 5676, "loss": 0.6778907775878906, "lr": 1.7646277598030717e-05, "epoch": 0.5240528634361233, "percentage": 26.2, "elapsed_time": "2:11:38", "remaining_time": "6:10:51"} +{"current_steps": 1488, "total_steps": 5676, "loss": 0.6581870317459106, "lr": 1.7642521350366447e-05, "epoch": 0.5244052863436124, "percentage": 26.22, "elapsed_time": "2:11:45", "remaining_time": "6:10:49"} +{"current_steps": 1489, "total_steps": 5676, "loss": 0.8529388904571533, "lr": 1.7638762508304025e-05, "epoch": 0.5247577092511013, "percentage": 26.23, "elapsed_time": "2:11:49", "remaining_time": "6:10:40"} +{"current_steps": 1490, "total_steps": 5676, "loss": 0.6601512432098389, "lr": 1.7635001073119458e-05, "epoch": 0.5251101321585903, "percentage": 26.25, "elapsed_time": "2:11:54", "remaining_time": "6:10:34"} +{"current_steps": 1491, "total_steps": 5676, "loss": 0.6347510814666748, "lr": 1.7631237046089637e-05, "epoch": 0.5254625550660793, "percentage": 26.27, "elapsed_time": "2:11:59", "remaining_time": "6:10:28"} +{"current_steps": 1492, "total_steps": 5676, "loss": 0.700560986995697, "lr": 1.762747042849233e-05, "epoch": 0.5258149779735682, "percentage": 26.29, "elapsed_time": "2:12:05", "remaining_time": "6:10:24"} +{"current_steps": 1493, "total_steps": 5676, "loss": 0.5641219019889832, "lr": 1.762370122160619e-05, "epoch": 0.5261674008810573, "percentage": 26.3, "elapsed_time": "2:12:09", "remaining_time": "6:10:16"} +{"current_steps": 1494, "total_steps": 5676, "loss": 0.8017370700836182, "lr": 1.761992942671074e-05, "epoch": 0.5265198237885462, "percentage": 26.32, "elapsed_time": "2:12:14", "remaining_time": "6:10:09"} +{"current_steps": 1495, "total_steps": 5676, "loss": 0.6345353126525879, "lr": 1.7616155045086394e-05, "epoch": 0.5268722466960353, "percentage": 26.34, "elapsed_time": "2:12:19", "remaining_time": "6:10:05"} +{"current_steps": 1496, "total_steps": 5676, "loss": 0.7118426561355591, "lr": 1.7612378078014432e-05, "epoch": 0.5272246696035242, "percentage": 26.36, "elapsed_time": "2:12:24", "remaining_time": "6:09:57"} +{"current_steps": 1497, "total_steps": 5676, "loss": 0.6186550855636597, "lr": 1.7608598526777017e-05, "epoch": 0.5275770925110133, "percentage": 26.37, "elapsed_time": "2:12:28", "remaining_time": "6:09:49"} +{"current_steps": 1498, "total_steps": 5676, "loss": 0.8351110219955444, "lr": 1.7604816392657195e-05, "epoch": 0.5279295154185022, "percentage": 26.39, "elapsed_time": "2:12:32", "remaining_time": "6:09:41"} +{"current_steps": 1499, "total_steps": 5676, "loss": 0.638684868812561, "lr": 1.7601031676938877e-05, "epoch": 0.5282819383259912, "percentage": 26.41, "elapsed_time": "2:12:38", "remaining_time": "6:09:36"} +{"current_steps": 1500, "total_steps": 5676, "loss": 0.5118356943130493, "lr": 1.7597244380906856e-05, "epoch": 0.5286343612334802, "percentage": 26.43, "elapsed_time": "2:12:43", "remaining_time": "6:09:30"} +{"current_steps": 1501, "total_steps": 5676, "loss": 0.637636125087738, "lr": 1.7593454505846807e-05, "epoch": 0.5289867841409691, "percentage": 26.44, "elapsed_time": "2:12:52", "remaining_time": "6:09:36"} +{"current_steps": 1502, "total_steps": 5676, "loss": 0.8412563800811768, "lr": 1.7589662053045264e-05, "epoch": 0.5293392070484582, "percentage": 26.46, "elapsed_time": "2:12:57", "remaining_time": "6:09:30"} +{"current_steps": 1503, "total_steps": 5676, "loss": 0.7940464019775391, "lr": 1.758586702378966e-05, "epoch": 0.5296916299559471, "percentage": 26.48, "elapsed_time": "2:13:01", "remaining_time": "6:09:21"} +{"current_steps": 1504, "total_steps": 5676, "loss": 0.7136783599853516, "lr": 1.7582069419368276e-05, "epoch": 0.5300440528634361, "percentage": 26.5, "elapsed_time": "2:13:07", "remaining_time": "6:09:16"} +{"current_steps": 1505, "total_steps": 5676, "loss": 0.7212727069854736, "lr": 1.757826924107029e-05, "epoch": 0.5303964757709251, "percentage": 26.52, "elapsed_time": "2:13:12", "remaining_time": "6:09:10"} +{"current_steps": 1506, "total_steps": 5676, "loss": 0.7026032209396362, "lr": 1.757446649018574e-05, "epoch": 0.5307488986784141, "percentage": 26.53, "elapsed_time": "2:13:17", "remaining_time": "6:09:03"} +{"current_steps": 1507, "total_steps": 5676, "loss": 0.541954755783081, "lr": 1.7570661168005544e-05, "epoch": 0.5311013215859031, "percentage": 26.55, "elapsed_time": "2:13:22", "remaining_time": "6:08:57"} +{"current_steps": 1508, "total_steps": 5676, "loss": 0.6927075386047363, "lr": 1.7566853275821488e-05, "epoch": 0.5314537444933921, "percentage": 26.57, "elapsed_time": "2:13:28", "remaining_time": "6:08:53"} +{"current_steps": 1509, "total_steps": 5676, "loss": 0.6556441783905029, "lr": 1.7563042814926237e-05, "epoch": 0.531806167400881, "percentage": 26.59, "elapsed_time": "2:13:31", "remaining_time": "6:08:44"} +{"current_steps": 1510, "total_steps": 5676, "loss": 0.6895149946212769, "lr": 1.7559229786613317e-05, "epoch": 0.5321585903083701, "percentage": 26.6, "elapsed_time": "2:13:37", "remaining_time": "6:08:40"} +{"current_steps": 1511, "total_steps": 5676, "loss": 0.6178139448165894, "lr": 1.755541419217714e-05, "epoch": 0.532511013215859, "percentage": 26.62, "elapsed_time": "2:13:41", "remaining_time": "6:08:31"} +{"current_steps": 1512, "total_steps": 5676, "loss": 0.7645368576049805, "lr": 1.7551596032912975e-05, "epoch": 0.532863436123348, "percentage": 26.64, "elapsed_time": "2:13:47", "remaining_time": "6:08:26"} +{"current_steps": 1513, "total_steps": 5676, "loss": 0.8247367143630981, "lr": 1.7547775310116973e-05, "epoch": 0.533215859030837, "percentage": 26.66, "elapsed_time": "2:13:53", "remaining_time": "6:08:23"} +{"current_steps": 1514, "total_steps": 5676, "loss": 0.535837709903717, "lr": 1.7543952025086147e-05, "epoch": 0.533568281938326, "percentage": 26.67, "elapsed_time": "2:13:58", "remaining_time": "6:08:17"} +{"current_steps": 1515, "total_steps": 5676, "loss": 0.51450514793396, "lr": 1.7540126179118387e-05, "epoch": 0.533920704845815, "percentage": 26.69, "elapsed_time": "2:14:02", "remaining_time": "6:08:10"} +{"current_steps": 1516, "total_steps": 5676, "loss": 0.7962276935577393, "lr": 1.7536297773512444e-05, "epoch": 0.5342731277533039, "percentage": 26.71, "elapsed_time": "2:14:09", "remaining_time": "6:08:07"} +{"current_steps": 1517, "total_steps": 5676, "loss": 0.7586444616317749, "lr": 1.753246680956795e-05, "epoch": 0.534625550660793, "percentage": 26.73, "elapsed_time": "2:14:14", "remaining_time": "6:08:03"} +{"current_steps": 1518, "total_steps": 5676, "loss": 0.7074990272521973, "lr": 1.752863328858539e-05, "epoch": 0.5349779735682819, "percentage": 26.74, "elapsed_time": "2:14:19", "remaining_time": "6:07:55"} +{"current_steps": 1519, "total_steps": 5676, "loss": 0.7409921884536743, "lr": 1.7524797211866126e-05, "epoch": 0.535330396475771, "percentage": 26.76, "elapsed_time": "2:14:25", "remaining_time": "6:07:51"} +{"current_steps": 1520, "total_steps": 5676, "loss": 0.7889251708984375, "lr": 1.7520958580712394e-05, "epoch": 0.5356828193832599, "percentage": 26.78, "elapsed_time": "2:14:29", "remaining_time": "6:07:43"} +{"current_steps": 1521, "total_steps": 5676, "loss": 0.8244975209236145, "lr": 1.751711739642728e-05, "epoch": 0.536035242290749, "percentage": 26.8, "elapsed_time": "2:14:35", "remaining_time": "6:07:38"} +{"current_steps": 1522, "total_steps": 5676, "loss": 0.6573888659477234, "lr": 1.7513273660314753e-05, "epoch": 0.5363876651982379, "percentage": 26.81, "elapsed_time": "2:14:40", "remaining_time": "6:07:33"} +{"current_steps": 1523, "total_steps": 5676, "loss": 0.71816086769104, "lr": 1.7509427373679643e-05, "epoch": 0.5367400881057268, "percentage": 26.83, "elapsed_time": "2:14:45", "remaining_time": "6:07:28"} +{"current_steps": 1524, "total_steps": 5676, "loss": 0.7681000232696533, "lr": 1.750557853782764e-05, "epoch": 0.5370925110132159, "percentage": 26.85, "elapsed_time": "2:14:49", "remaining_time": "6:07:19"} +{"current_steps": 1525, "total_steps": 5676, "loss": 0.6777461767196655, "lr": 1.7501727154065304e-05, "epoch": 0.5374449339207048, "percentage": 26.87, "elapsed_time": "2:14:53", "remaining_time": "6:07:11"} +{"current_steps": 1526, "total_steps": 5676, "loss": 0.714499831199646, "lr": 1.7497873223700063e-05, "epoch": 0.5377973568281939, "percentage": 26.89, "elapsed_time": "2:14:58", "remaining_time": "6:07:02"} +{"current_steps": 1527, "total_steps": 5676, "loss": 0.6587036848068237, "lr": 1.7494016748040206e-05, "epoch": 0.5381497797356828, "percentage": 26.9, "elapsed_time": "2:15:02", "remaining_time": "6:06:55"} +{"current_steps": 1528, "total_steps": 5676, "loss": 0.7256105542182922, "lr": 1.7490157728394887e-05, "epoch": 0.5385022026431718, "percentage": 26.92, "elapsed_time": "2:15:07", "remaining_time": "6:06:49"} +{"current_steps": 1529, "total_steps": 5676, "loss": 0.6551185846328735, "lr": 1.7486296166074116e-05, "epoch": 0.5388546255506608, "percentage": 26.94, "elapsed_time": "2:15:11", "remaining_time": "6:06:40"} +{"current_steps": 1530, "total_steps": 5676, "loss": 0.710479736328125, "lr": 1.7482432062388782e-05, "epoch": 0.5392070484581498, "percentage": 26.96, "elapsed_time": "2:15:15", "remaining_time": "6:06:32"} +{"current_steps": 1531, "total_steps": 5676, "loss": 0.7882527709007263, "lr": 1.7478565418650625e-05, "epoch": 0.5395594713656388, "percentage": 26.97, "elapsed_time": "2:15:22", "remaining_time": "6:06:29"} +{"current_steps": 1532, "total_steps": 5676, "loss": 0.6163671612739563, "lr": 1.7474696236172247e-05, "epoch": 0.5399118942731278, "percentage": 26.99, "elapsed_time": "2:15:27", "remaining_time": "6:06:24"} +{"current_steps": 1533, "total_steps": 5676, "loss": 0.6923140287399292, "lr": 1.7470824516267125e-05, "epoch": 0.5402643171806167, "percentage": 27.01, "elapsed_time": "2:15:33", "remaining_time": "6:06:20"} +{"current_steps": 1534, "total_steps": 5676, "loss": 0.6473938226699829, "lr": 1.7466950260249573e-05, "epoch": 0.5406167400881057, "percentage": 27.03, "elapsed_time": "2:15:38", "remaining_time": "6:06:13"} +{"current_steps": 1535, "total_steps": 5676, "loss": 0.631247878074646, "lr": 1.7463073469434792e-05, "epoch": 0.5409691629955947, "percentage": 27.04, "elapsed_time": "2:15:42", "remaining_time": "6:06:06"} +{"current_steps": 1536, "total_steps": 5676, "loss": 0.8113377690315247, "lr": 1.745919414513883e-05, "epoch": 0.5413215859030837, "percentage": 27.06, "elapsed_time": "2:15:46", "remaining_time": "6:05:58"} +{"current_steps": 1537, "total_steps": 5676, "loss": 0.7950010299682617, "lr": 1.7455312288678588e-05, "epoch": 0.5416740088105727, "percentage": 27.08, "elapsed_time": "2:15:52", "remaining_time": "6:05:52"} +{"current_steps": 1538, "total_steps": 5676, "loss": 0.7279125452041626, "lr": 1.7451427901371843e-05, "epoch": 0.5420264317180616, "percentage": 27.1, "elapsed_time": "2:15:57", "remaining_time": "6:05:48"} +{"current_steps": 1539, "total_steps": 5676, "loss": 0.6746084690093994, "lr": 1.7447540984537225e-05, "epoch": 0.5423788546255507, "percentage": 27.11, "elapsed_time": "2:16:03", "remaining_time": "6:05:43"} +{"current_steps": 1540, "total_steps": 5676, "loss": 0.5818569660186768, "lr": 1.744365153949422e-05, "epoch": 0.5427312775330396, "percentage": 27.13, "elapsed_time": "2:16:09", "remaining_time": "6:05:41"} +{"current_steps": 1541, "total_steps": 5676, "loss": 0.7408111095428467, "lr": 1.743975956756317e-05, "epoch": 0.5430837004405287, "percentage": 27.15, "elapsed_time": "2:16:15", "remaining_time": "6:05:36"} +{"current_steps": 1542, "total_steps": 5676, "loss": 0.6842402815818787, "lr": 1.7435865070065282e-05, "epoch": 0.5434361233480176, "percentage": 27.17, "elapsed_time": "2:16:21", "remaining_time": "6:05:33"} +{"current_steps": 1543, "total_steps": 5676, "loss": 0.6179615259170532, "lr": 1.7431968048322617e-05, "epoch": 0.5437885462555067, "percentage": 27.18, "elapsed_time": "2:16:25", "remaining_time": "6:05:26"} +{"current_steps": 1544, "total_steps": 5676, "loss": 0.5943140983581543, "lr": 1.7428068503658085e-05, "epoch": 0.5441409691629956, "percentage": 27.2, "elapsed_time": "2:16:32", "remaining_time": "6:05:23"} +{"current_steps": 1545, "total_steps": 5676, "loss": 0.7901419401168823, "lr": 1.742416643739547e-05, "epoch": 0.5444933920704845, "percentage": 27.22, "elapsed_time": "2:16:37", "remaining_time": "6:05:19"} +{"current_steps": 1546, "total_steps": 5676, "loss": 0.7630521655082703, "lr": 1.74202618508594e-05, "epoch": 0.5448458149779736, "percentage": 27.24, "elapsed_time": "2:16:43", "remaining_time": "6:05:14"} +{"current_steps": 1547, "total_steps": 5676, "loss": 0.7662566900253296, "lr": 1.7416354745375355e-05, "epoch": 0.5451982378854625, "percentage": 27.26, "elapsed_time": "2:16:47", "remaining_time": "6:05:07"} +{"current_steps": 1548, "total_steps": 5676, "loss": 0.5758601427078247, "lr": 1.7412445122269683e-05, "epoch": 0.5455506607929516, "percentage": 27.27, "elapsed_time": "2:16:54", "remaining_time": "6:05:05"} +{"current_steps": 1549, "total_steps": 5676, "loss": 0.753425121307373, "lr": 1.7408532982869573e-05, "epoch": 0.5459030837004405, "percentage": 27.29, "elapsed_time": "2:17:00", "remaining_time": "6:05:00"} +{"current_steps": 1550, "total_steps": 5676, "loss": 0.6954981088638306, "lr": 1.7404618328503082e-05, "epoch": 0.5462555066079295, "percentage": 27.31, "elapsed_time": "2:17:05", "remaining_time": "6:04:55"} +{"current_steps": 1551, "total_steps": 5676, "loss": 0.7608321905136108, "lr": 1.7400701160499107e-05, "epoch": 0.5466079295154185, "percentage": 27.33, "elapsed_time": "2:17:11", "remaining_time": "6:04:53"} +{"current_steps": 1552, "total_steps": 5676, "loss": 0.679731011390686, "lr": 1.7396781480187403e-05, "epoch": 0.5469603524229075, "percentage": 27.34, "elapsed_time": "2:17:16", "remaining_time": "6:04:45"} +{"current_steps": 1553, "total_steps": 5676, "loss": 0.7101309299468994, "lr": 1.7392859288898586e-05, "epoch": 0.5473127753303965, "percentage": 27.36, "elapsed_time": "2:17:21", "remaining_time": "6:04:39"} +{"current_steps": 1554, "total_steps": 5676, "loss": 0.7975757122039795, "lr": 1.7388934587964114e-05, "epoch": 0.5476651982378855, "percentage": 27.38, "elapsed_time": "2:17:25", "remaining_time": "6:04:31"} +{"current_steps": 1555, "total_steps": 5676, "loss": 0.9132372140884399, "lr": 1.73850073787163e-05, "epoch": 0.5480176211453744, "percentage": 27.4, "elapsed_time": "2:17:30", "remaining_time": "6:04:23"} +{"current_steps": 1556, "total_steps": 5676, "loss": 0.7375202178955078, "lr": 1.7381077662488313e-05, "epoch": 0.5483700440528634, "percentage": 27.41, "elapsed_time": "2:17:34", "remaining_time": "6:04:16"} +{"current_steps": 1557, "total_steps": 5676, "loss": 0.7066434025764465, "lr": 1.7377145440614165e-05, "epoch": 0.5487224669603524, "percentage": 27.43, "elapsed_time": "2:17:40", "remaining_time": "6:04:12"} +{"current_steps": 1558, "total_steps": 5676, "loss": 0.8217945098876953, "lr": 1.737321071442873e-05, "epoch": 0.5490748898678414, "percentage": 27.45, "elapsed_time": "2:17:44", "remaining_time": "6:04:04"} +{"current_steps": 1559, "total_steps": 5676, "loss": 0.6946529150009155, "lr": 1.7369273485267716e-05, "epoch": 0.5494273127753304, "percentage": 27.47, "elapsed_time": "2:17:50", "remaining_time": "6:04:00"} +{"current_steps": 1560, "total_steps": 5676, "loss": 0.7671442627906799, "lr": 1.7365333754467694e-05, "epoch": 0.5497797356828193, "percentage": 27.48, "elapsed_time": "2:17:56", "remaining_time": "6:03:56"} +{"current_steps": 1561, "total_steps": 5676, "loss": 0.7044692039489746, "lr": 1.736139152336608e-05, "epoch": 0.5501321585903084, "percentage": 27.5, "elapsed_time": "2:18:02", "remaining_time": "6:03:53"} +{"current_steps": 1562, "total_steps": 5676, "loss": 0.6654937267303467, "lr": 1.735744679330114e-05, "epoch": 0.5504845814977973, "percentage": 27.52, "elapsed_time": "2:18:06", "remaining_time": "6:03:44"} +{"current_steps": 1563, "total_steps": 5676, "loss": 0.6683400869369507, "lr": 1.7353499565611986e-05, "epoch": 0.5508370044052864, "percentage": 27.54, "elapsed_time": "2:18:12", "remaining_time": "6:03:40"} +{"current_steps": 1564, "total_steps": 5676, "loss": 0.6360758543014526, "lr": 1.734954984163858e-05, "epoch": 0.5511894273127753, "percentage": 27.55, "elapsed_time": "2:18:16", "remaining_time": "6:03:33"} +{"current_steps": 1565, "total_steps": 5676, "loss": 0.6982365846633911, "lr": 1.7345597622721727e-05, "epoch": 0.5515418502202644, "percentage": 27.57, "elapsed_time": "2:18:22", "remaining_time": "6:03:29"} +{"current_steps": 1566, "total_steps": 5676, "loss": 0.8011882305145264, "lr": 1.7341642910203087e-05, "epoch": 0.5518942731277533, "percentage": 27.59, "elapsed_time": "2:18:27", "remaining_time": "6:03:23"} +{"current_steps": 1567, "total_steps": 5676, "loss": 0.8203347325325012, "lr": 1.7337685705425156e-05, "epoch": 0.5522466960352423, "percentage": 27.61, "elapsed_time": "2:18:31", "remaining_time": "6:03:14"} +{"current_steps": 1568, "total_steps": 5676, "loss": 0.653145432472229, "lr": 1.7333726009731288e-05, "epoch": 0.5525991189427313, "percentage": 27.63, "elapsed_time": "2:18:38", "remaining_time": "6:03:13"} +{"current_steps": 1569, "total_steps": 5676, "loss": 0.6527417302131653, "lr": 1.7329763824465676e-05, "epoch": 0.5529515418502202, "percentage": 27.64, "elapsed_time": "2:18:44", "remaining_time": "6:03:09"} +{"current_steps": 1570, "total_steps": 5676, "loss": 0.6965492963790894, "lr": 1.7325799150973353e-05, "epoch": 0.5533039647577093, "percentage": 27.66, "elapsed_time": "2:18:49", "remaining_time": "6:03:02"} +{"current_steps": 1571, "total_steps": 5676, "loss": 0.6779811382293701, "lr": 1.7321831990600206e-05, "epoch": 0.5536563876651982, "percentage": 27.68, "elapsed_time": "2:18:54", "remaining_time": "6:02:57"} +{"current_steps": 1572, "total_steps": 5676, "loss": 0.733130931854248, "lr": 1.731786234469296e-05, "epoch": 0.5540088105726872, "percentage": 27.7, "elapsed_time": "2:18:58", "remaining_time": "6:02:48"} +{"current_steps": 1573, "total_steps": 5676, "loss": 0.6482118964195251, "lr": 1.7313890214599195e-05, "epoch": 0.5543612334801762, "percentage": 27.71, "elapsed_time": "2:19:03", "remaining_time": "6:02:43"} +{"current_steps": 1574, "total_steps": 5676, "loss": 0.5167185068130493, "lr": 1.7309915601667312e-05, "epoch": 0.5547136563876652, "percentage": 27.73, "elapsed_time": "2:19:09", "remaining_time": "6:02:38"} +{"current_steps": 1575, "total_steps": 5676, "loss": 0.7172712087631226, "lr": 1.730593850724658e-05, "epoch": 0.5550660792951542, "percentage": 27.75, "elapsed_time": "2:19:14", "remaining_time": "6:02:32"} +{"current_steps": 1576, "total_steps": 5676, "loss": 0.6786075830459595, "lr": 1.730195893268709e-05, "epoch": 0.5554185022026432, "percentage": 27.77, "elapsed_time": "2:19:20", "remaining_time": "6:02:29"} +{"current_steps": 1577, "total_steps": 5676, "loss": 0.6823022365570068, "lr": 1.7297976879339787e-05, "epoch": 0.5557709251101322, "percentage": 27.78, "elapsed_time": "2:19:27", "remaining_time": "6:02:28"} +{"current_steps": 1578, "total_steps": 5676, "loss": 0.6882521510124207, "lr": 1.7293992348556462e-05, "epoch": 0.5561233480176212, "percentage": 27.8, "elapsed_time": "2:19:31", "remaining_time": "6:02:21"} +{"current_steps": 1579, "total_steps": 5676, "loss": 0.6028990745544434, "lr": 1.7290005341689726e-05, "epoch": 0.5564757709251101, "percentage": 27.82, "elapsed_time": "2:19:36", "remaining_time": "6:02:13"} +{"current_steps": 1580, "total_steps": 5676, "loss": 0.7759981155395508, "lr": 1.728601586009305e-05, "epoch": 0.5568281938325991, "percentage": 27.84, "elapsed_time": "2:19:41", "remaining_time": "6:02:07"} +{"current_steps": 1581, "total_steps": 5676, "loss": 0.7067322134971619, "lr": 1.7282023905120743e-05, "epoch": 0.5571806167400881, "percentage": 27.85, "elapsed_time": "2:19:47", "remaining_time": "6:02:03"} +{"current_steps": 1582, "total_steps": 5676, "loss": 0.7972309589385986, "lr": 1.727802947812794e-05, "epoch": 0.5575330396475771, "percentage": 27.87, "elapsed_time": "2:19:52", "remaining_time": "6:01:59"} +{"current_steps": 1583, "total_steps": 5676, "loss": 0.780463457107544, "lr": 1.7274032580470634e-05, "epoch": 0.5578854625550661, "percentage": 27.89, "elapsed_time": "2:19:58", "remaining_time": "6:01:55"} +{"current_steps": 1584, "total_steps": 5676, "loss": 0.647217869758606, "lr": 1.7270033213505638e-05, "epoch": 0.558237885462555, "percentage": 27.91, "elapsed_time": "2:20:04", "remaining_time": "6:01:52"} +{"current_steps": 1585, "total_steps": 5676, "loss": 0.6253752112388611, "lr": 1.7266031378590624e-05, "epoch": 0.5585903083700441, "percentage": 27.92, "elapsed_time": "2:20:10", "remaining_time": "6:01:48"} +{"current_steps": 1586, "total_steps": 5676, "loss": 0.8427211046218872, "lr": 1.7262027077084083e-05, "epoch": 0.558942731277533, "percentage": 27.94, "elapsed_time": "2:20:14", "remaining_time": "6:01:40"} +{"current_steps": 1587, "total_steps": 5676, "loss": 0.6763455867767334, "lr": 1.7258020310345348e-05, "epoch": 0.5592951541850221, "percentage": 27.96, "elapsed_time": "2:20:20", "remaining_time": "6:01:35"} +{"current_steps": 1588, "total_steps": 5676, "loss": 0.7333850264549255, "lr": 1.72540110797346e-05, "epoch": 0.559647577092511, "percentage": 27.98, "elapsed_time": "2:20:24", "remaining_time": "6:01:28"} +{"current_steps": 1589, "total_steps": 5676, "loss": 0.8572328090667725, "lr": 1.7249999386612844e-05, "epoch": 0.56, "percentage": 28.0, "elapsed_time": "2:20:29", "remaining_time": "6:01:20"} +{"current_steps": 1590, "total_steps": 5676, "loss": 0.7960183620452881, "lr": 1.7245985232341923e-05, "epoch": 0.560352422907489, "percentage": 28.01, "elapsed_time": "2:20:34", "remaining_time": "6:01:16"} +{"current_steps": 1591, "total_steps": 5676, "loss": 0.6750795841217041, "lr": 1.7241968618284518e-05, "epoch": 0.5607048458149779, "percentage": 28.03, "elapsed_time": "2:20:41", "remaining_time": "6:01:13"} +{"current_steps": 1592, "total_steps": 5676, "loss": 0.7828525304794312, "lr": 1.7237949545804145e-05, "epoch": 0.561057268722467, "percentage": 28.05, "elapsed_time": "2:20:45", "remaining_time": "6:01:04"} +{"current_steps": 1593, "total_steps": 5676, "loss": 0.7414604425430298, "lr": 1.7233928016265158e-05, "epoch": 0.5614096916299559, "percentage": 28.07, "elapsed_time": "2:20:50", "remaining_time": "6:00:58"} +{"current_steps": 1594, "total_steps": 5676, "loss": 0.6853663921356201, "lr": 1.7229904031032736e-05, "epoch": 0.561762114537445, "percentage": 28.08, "elapsed_time": "2:20:55", "remaining_time": "6:00:53"} +{"current_steps": 1595, "total_steps": 5676, "loss": 0.7923493385314941, "lr": 1.72258775914729e-05, "epoch": 0.5621145374449339, "percentage": 28.1, "elapsed_time": "2:21:00", "remaining_time": "6:00:47"} +{"current_steps": 1596, "total_steps": 5676, "loss": 0.6776527166366577, "lr": 1.7221848698952496e-05, "epoch": 0.5624669603524229, "percentage": 28.12, "elapsed_time": "2:21:05", "remaining_time": "6:00:41"} +{"current_steps": 1597, "total_steps": 5676, "loss": 0.6036615371704102, "lr": 1.721781735483921e-05, "epoch": 0.5628193832599119, "percentage": 28.14, "elapsed_time": "2:21:10", "remaining_time": "6:00:34"} +{"current_steps": 1598, "total_steps": 5676, "loss": 0.7175784111022949, "lr": 1.7213783560501564e-05, "epoch": 0.5631718061674009, "percentage": 28.15, "elapsed_time": "2:21:16", "remaining_time": "6:00:32"} +{"current_steps": 1599, "total_steps": 5676, "loss": 0.790808379650116, "lr": 1.7209747317308897e-05, "epoch": 0.5635242290748899, "percentage": 28.17, "elapsed_time": "2:21:21", "remaining_time": "6:00:25"} +{"current_steps": 1600, "total_steps": 5676, "loss": 0.6230301856994629, "lr": 1.7205708626631392e-05, "epoch": 0.5638766519823789, "percentage": 28.19, "elapsed_time": "2:21:28", "remaining_time": "6:00:23"} +{"current_steps": 1601, "total_steps": 5676, "loss": 0.712124228477478, "lr": 1.720166748984006e-05, "epoch": 0.5642290748898678, "percentage": 28.21, "elapsed_time": "2:21:38", "remaining_time": "6:00:31"} +{"current_steps": 1602, "total_steps": 5676, "loss": 0.543883740901947, "lr": 1.719762390830674e-05, "epoch": 0.5645814977973568, "percentage": 28.22, "elapsed_time": "2:21:44", "remaining_time": "6:00:28"} +{"current_steps": 1603, "total_steps": 5676, "loss": 0.7619644999504089, "lr": 1.71935778834041e-05, "epoch": 0.5649339207048458, "percentage": 28.24, "elapsed_time": "2:21:49", "remaining_time": "6:00:21"} +{"current_steps": 1604, "total_steps": 5676, "loss": 0.6447019577026367, "lr": 1.718952941650564e-05, "epoch": 0.5652863436123348, "percentage": 28.26, "elapsed_time": "2:21:54", "remaining_time": "6:00:16"} +{"current_steps": 1605, "total_steps": 5676, "loss": 0.7254266738891602, "lr": 1.718547850898569e-05, "epoch": 0.5656387665198238, "percentage": 28.28, "elapsed_time": "2:22:00", "remaining_time": "6:00:12"} +{"current_steps": 1606, "total_steps": 5676, "loss": 0.632878839969635, "lr": 1.7181425162219406e-05, "epoch": 0.5659911894273127, "percentage": 28.29, "elapsed_time": "2:22:05", "remaining_time": "6:00:05"} +{"current_steps": 1607, "total_steps": 5676, "loss": 0.7711806893348694, "lr": 1.7177369377582776e-05, "epoch": 0.5663436123348018, "percentage": 28.31, "elapsed_time": "2:22:10", "remaining_time": "6:00:00"} +{"current_steps": 1608, "total_steps": 5676, "loss": 0.6639282703399658, "lr": 1.7173311156452607e-05, "epoch": 0.5666960352422907, "percentage": 28.33, "elapsed_time": "2:22:17", "remaining_time": "5:59:57"} +{"current_steps": 1609, "total_steps": 5676, "loss": 0.6918407082557678, "lr": 1.7169250500206544e-05, "epoch": 0.5670484581497798, "percentage": 28.35, "elapsed_time": "2:22:21", "remaining_time": "5:59:50"} +{"current_steps": 1610, "total_steps": 5676, "loss": 0.6602861881256104, "lr": 1.716518741022305e-05, "epoch": 0.5674008810572687, "percentage": 28.37, "elapsed_time": "2:22:27", "remaining_time": "5:59:46"} +{"current_steps": 1611, "total_steps": 5676, "loss": 0.5853942632675171, "lr": 1.7161121887881424e-05, "epoch": 0.5677533039647578, "percentage": 28.38, "elapsed_time": "2:22:33", "remaining_time": "5:59:43"} +{"current_steps": 1612, "total_steps": 5676, "loss": 0.6793895959854126, "lr": 1.7157053934561775e-05, "epoch": 0.5681057268722467, "percentage": 28.4, "elapsed_time": "2:22:37", "remaining_time": "5:59:34"} +{"current_steps": 1613, "total_steps": 5676, "loss": 0.7882634401321411, "lr": 1.7152983551645054e-05, "epoch": 0.5684581497797356, "percentage": 28.42, "elapsed_time": "2:22:43", "remaining_time": "5:59:29"} +{"current_steps": 1614, "total_steps": 5676, "loss": 0.6530553698539734, "lr": 1.7148910740513023e-05, "epoch": 0.5688105726872247, "percentage": 28.44, "elapsed_time": "2:22:47", "remaining_time": "5:59:21"} +{"current_steps": 1615, "total_steps": 5676, "loss": 0.6405597925186157, "lr": 1.714483550254828e-05, "epoch": 0.5691629955947136, "percentage": 28.45, "elapsed_time": "2:22:53", "remaining_time": "5:59:18"} +{"current_steps": 1616, "total_steps": 5676, "loss": 0.7356796860694885, "lr": 1.714075783913424e-05, "epoch": 0.5695154185022027, "percentage": 28.47, "elapsed_time": "2:22:58", "remaining_time": "5:59:12"} +{"current_steps": 1617, "total_steps": 5676, "loss": 0.7393465042114258, "lr": 1.7136677751655142e-05, "epoch": 0.5698678414096916, "percentage": 28.49, "elapsed_time": "2:23:03", "remaining_time": "5:59:05"} +{"current_steps": 1618, "total_steps": 5676, "loss": 0.7205296158790588, "lr": 1.7132595241496045e-05, "epoch": 0.5702202643171806, "percentage": 28.51, "elapsed_time": "2:23:07", "remaining_time": "5:58:58"} +{"current_steps": 1619, "total_steps": 5676, "loss": 0.7359808683395386, "lr": 1.7128510310042842e-05, "epoch": 0.5705726872246696, "percentage": 28.52, "elapsed_time": "2:23:13", "remaining_time": "5:58:55"} +{"current_steps": 1620, "total_steps": 5676, "loss": 0.7097065448760986, "lr": 1.712442295868224e-05, "epoch": 0.5709251101321586, "percentage": 28.54, "elapsed_time": "2:23:19", "remaining_time": "5:58:50"} +{"current_steps": 1621, "total_steps": 5676, "loss": 0.66839599609375, "lr": 1.7120333188801756e-05, "epoch": 0.5712775330396476, "percentage": 28.56, "elapsed_time": "2:23:24", "remaining_time": "5:58:45"} +{"current_steps": 1622, "total_steps": 5676, "loss": 0.8373857736587524, "lr": 1.7116241001789753e-05, "epoch": 0.5716299559471366, "percentage": 28.58, "elapsed_time": "2:23:28", "remaining_time": "5:58:36"} +{"current_steps": 1623, "total_steps": 5676, "loss": 0.6405144333839417, "lr": 1.7112146399035393e-05, "epoch": 0.5719823788546256, "percentage": 28.59, "elapsed_time": "2:23:34", "remaining_time": "5:58:32"} +{"current_steps": 1624, "total_steps": 5676, "loss": 0.622218906879425, "lr": 1.710804938192867e-05, "epoch": 0.5723348017621145, "percentage": 28.61, "elapsed_time": "2:23:39", "remaining_time": "5:58:26"} +{"current_steps": 1625, "total_steps": 5676, "loss": 0.6728596687316895, "lr": 1.710394995186039e-05, "epoch": 0.5726872246696035, "percentage": 28.63, "elapsed_time": "2:23:45", "remaining_time": "5:58:22"} +{"current_steps": 1626, "total_steps": 5676, "loss": 0.7749369144439697, "lr": 1.7099848110222188e-05, "epoch": 0.5730396475770925, "percentage": 28.65, "elapsed_time": "2:23:49", "remaining_time": "5:58:15"} +{"current_steps": 1627, "total_steps": 5676, "loss": 0.7230759859085083, "lr": 1.7095743858406506e-05, "epoch": 0.5733920704845815, "percentage": 28.66, "elapsed_time": "2:23:54", "remaining_time": "5:58:08"} +{"current_steps": 1628, "total_steps": 5676, "loss": 0.8243547677993774, "lr": 1.7091637197806614e-05, "epoch": 0.5737444933920705, "percentage": 28.68, "elapsed_time": "2:24:00", "remaining_time": "5:58:05"} +{"current_steps": 1629, "total_steps": 5676, "loss": 0.5860315561294556, "lr": 1.708752812981659e-05, "epoch": 0.5740969162995595, "percentage": 28.7, "elapsed_time": "2:24:05", "remaining_time": "5:57:58"} +{"current_steps": 1630, "total_steps": 5676, "loss": 0.6623368859291077, "lr": 1.708341665583134e-05, "epoch": 0.5744493392070484, "percentage": 28.72, "elapsed_time": "2:24:11", "remaining_time": "5:57:53"} +{"current_steps": 1631, "total_steps": 5676, "loss": 0.6467370986938477, "lr": 1.7079302777246577e-05, "epoch": 0.5748017621145375, "percentage": 28.74, "elapsed_time": "2:24:16", "remaining_time": "5:57:47"} +{"current_steps": 1632, "total_steps": 5676, "loss": 0.6443271636962891, "lr": 1.707518649545884e-05, "epoch": 0.5751541850220264, "percentage": 28.75, "elapsed_time": "2:24:21", "remaining_time": "5:57:42"} +{"current_steps": 1633, "total_steps": 5676, "loss": 0.6995208263397217, "lr": 1.7071067811865477e-05, "epoch": 0.5755066079295155, "percentage": 28.77, "elapsed_time": "2:24:26", "remaining_time": "5:57:37"} +{"current_steps": 1634, "total_steps": 5676, "loss": 0.698627233505249, "lr": 1.706694672786465e-05, "epoch": 0.5758590308370044, "percentage": 28.79, "elapsed_time": "2:24:31", "remaining_time": "5:57:29"} +{"current_steps": 1635, "total_steps": 5676, "loss": 0.713565468788147, "lr": 1.706282324485534e-05, "epoch": 0.5762114537444933, "percentage": 28.81, "elapsed_time": "2:24:37", "remaining_time": "5:57:27"} +{"current_steps": 1636, "total_steps": 5676, "loss": 0.7978894710540771, "lr": 1.7058697364237342e-05, "epoch": 0.5765638766519824, "percentage": 28.82, "elapsed_time": "2:24:43", "remaining_time": "5:57:22"} +{"current_steps": 1637, "total_steps": 5676, "loss": 0.7361177206039429, "lr": 1.7054569087411262e-05, "epoch": 0.5769162995594713, "percentage": 28.84, "elapsed_time": "2:24:47", "remaining_time": "5:57:15"} +{"current_steps": 1638, "total_steps": 5676, "loss": 0.5904364585876465, "lr": 1.705043841577853e-05, "epoch": 0.5772687224669604, "percentage": 28.86, "elapsed_time": "2:24:53", "remaining_time": "5:57:11"} +{"current_steps": 1639, "total_steps": 5676, "loss": 0.7122133374214172, "lr": 1.7046305350741365e-05, "epoch": 0.5776211453744493, "percentage": 28.88, "elapsed_time": "2:24:59", "remaining_time": "5:57:08"} +{"current_steps": 1640, "total_steps": 5676, "loss": 0.6350806951522827, "lr": 1.7042169893702826e-05, "epoch": 0.5779735682819384, "percentage": 28.89, "elapsed_time": "2:25:05", "remaining_time": "5:57:04"} +{"current_steps": 1641, "total_steps": 5676, "loss": 0.6332669258117676, "lr": 1.7038032046066767e-05, "epoch": 0.5783259911894273, "percentage": 28.91, "elapsed_time": "2:25:11", "remaining_time": "5:56:59"} +{"current_steps": 1642, "total_steps": 5676, "loss": 0.6645903587341309, "lr": 1.7033891809237865e-05, "epoch": 0.5786784140969163, "percentage": 28.93, "elapsed_time": "2:25:16", "remaining_time": "5:56:54"} +{"current_steps": 1643, "total_steps": 5676, "loss": 0.8156411051750183, "lr": 1.7029749184621593e-05, "epoch": 0.5790308370044053, "percentage": 28.95, "elapsed_time": "2:25:20", "remaining_time": "5:56:45"} +{"current_steps": 1644, "total_steps": 5676, "loss": 0.6778720617294312, "lr": 1.7025604173624247e-05, "epoch": 0.5793832599118943, "percentage": 28.96, "elapsed_time": "2:25:28", "remaining_time": "5:56:47"} +{"current_steps": 1645, "total_steps": 5676, "loss": 0.6774875521659851, "lr": 1.702145677765293e-05, "epoch": 0.5797356828193833, "percentage": 28.98, "elapsed_time": "2:25:33", "remaining_time": "5:56:41"} +{"current_steps": 1646, "total_steps": 5676, "loss": 0.9239652156829834, "lr": 1.701730699811555e-05, "epoch": 0.5800881057268722, "percentage": 29.0, "elapsed_time": "2:25:38", "remaining_time": "5:56:34"} +{"current_steps": 1647, "total_steps": 5676, "loss": 0.6841437816619873, "lr": 1.701315483642083e-05, "epoch": 0.5804405286343612, "percentage": 29.02, "elapsed_time": "2:25:43", "remaining_time": "5:56:29"} +{"current_steps": 1648, "total_steps": 5676, "loss": 0.7540775537490845, "lr": 1.7009000293978308e-05, "epoch": 0.5807929515418502, "percentage": 29.03, "elapsed_time": "2:25:48", "remaining_time": "5:56:22"} +{"current_steps": 1649, "total_steps": 5676, "loss": 0.5534735321998596, "lr": 1.7004843372198306e-05, "epoch": 0.5811453744493392, "percentage": 29.05, "elapsed_time": "2:25:54", "remaining_time": "5:56:19"} +{"current_steps": 1650, "total_steps": 5676, "loss": 0.5398745536804199, "lr": 1.7000684072491984e-05, "epoch": 0.5814977973568282, "percentage": 29.07, "elapsed_time": "2:26:01", "remaining_time": "5:56:17"} +{"current_steps": 1651, "total_steps": 5676, "loss": 0.7249305248260498, "lr": 1.6996522396271285e-05, "epoch": 0.5818502202643172, "percentage": 29.09, "elapsed_time": "2:26:05", "remaining_time": "5:56:09"} +{"current_steps": 1652, "total_steps": 5676, "loss": 0.819263219833374, "lr": 1.6992358344948976e-05, "epoch": 0.5822026431718061, "percentage": 29.11, "elapsed_time": "2:26:11", "remaining_time": "5:56:05"} +{"current_steps": 1653, "total_steps": 5676, "loss": 0.7421448826789856, "lr": 1.6988191919938618e-05, "epoch": 0.5825550660792952, "percentage": 29.12, "elapsed_time": "2:26:16", "remaining_time": "5:56:00"} +{"current_steps": 1654, "total_steps": 5676, "loss": 0.7665672302246094, "lr": 1.6984023122654584e-05, "epoch": 0.5829074889867841, "percentage": 29.14, "elapsed_time": "2:26:20", "remaining_time": "5:55:51"} +{"current_steps": 1655, "total_steps": 5676, "loss": 0.7226558327674866, "lr": 1.697985195451205e-05, "epoch": 0.5832599118942732, "percentage": 29.16, "elapsed_time": "2:26:27", "remaining_time": "5:55:49"} +{"current_steps": 1656, "total_steps": 5676, "loss": 0.6702080965042114, "lr": 1.6975678416926995e-05, "epoch": 0.5836123348017621, "percentage": 29.18, "elapsed_time": "2:26:32", "remaining_time": "5:55:43"} +{"current_steps": 1657, "total_steps": 5676, "loss": 0.5843878984451294, "lr": 1.697150251131621e-05, "epoch": 0.583964757709251, "percentage": 29.19, "elapsed_time": "2:26:37", "remaining_time": "5:55:38"} +{"current_steps": 1658, "total_steps": 5676, "loss": 0.707448422908783, "lr": 1.6967324239097287e-05, "epoch": 0.5843171806167401, "percentage": 29.21, "elapsed_time": "2:26:42", "remaining_time": "5:55:32"} +{"current_steps": 1659, "total_steps": 5676, "loss": 0.7619093060493469, "lr": 1.6963143601688615e-05, "epoch": 0.584669603524229, "percentage": 29.23, "elapsed_time": "2:26:48", "remaining_time": "5:55:29"} +{"current_steps": 1660, "total_steps": 5676, "loss": 0.6550310850143433, "lr": 1.695896060050939e-05, "epoch": 0.5850220264317181, "percentage": 29.25, "elapsed_time": "2:26:53", "remaining_time": "5:55:22"} +{"current_steps": 1661, "total_steps": 5676, "loss": 0.7202504277229309, "lr": 1.6954775236979616e-05, "epoch": 0.585374449339207, "percentage": 29.26, "elapsed_time": "2:26:57", "remaining_time": "5:55:14"} +{"current_steps": 1662, "total_steps": 5676, "loss": 0.7941907644271851, "lr": 1.6950587512520085e-05, "epoch": 0.5857268722466961, "percentage": 29.28, "elapsed_time": "2:27:02", "remaining_time": "5:55:08"} +{"current_steps": 1663, "total_steps": 5676, "loss": 0.6349755525588989, "lr": 1.6946397428552406e-05, "epoch": 0.586079295154185, "percentage": 29.3, "elapsed_time": "2:27:08", "remaining_time": "5:55:05"} +{"current_steps": 1664, "total_steps": 5676, "loss": 0.6220123171806335, "lr": 1.6942204986498978e-05, "epoch": 0.586431718061674, "percentage": 29.32, "elapsed_time": "2:27:14", "remaining_time": "5:55:01"} +{"current_steps": 1665, "total_steps": 5676, "loss": 0.6617282629013062, "lr": 1.693801018778301e-05, "epoch": 0.586784140969163, "percentage": 29.33, "elapsed_time": "2:27:21", "remaining_time": "5:54:58"} +{"current_steps": 1666, "total_steps": 5676, "loss": 0.7424415349960327, "lr": 1.6933813033828496e-05, "epoch": 0.587136563876652, "percentage": 29.35, "elapsed_time": "2:27:26", "remaining_time": "5:54:53"} +{"current_steps": 1667, "total_steps": 5676, "loss": 0.7245291471481323, "lr": 1.6929613526060254e-05, "epoch": 0.587488986784141, "percentage": 29.37, "elapsed_time": "2:27:32", "remaining_time": "5:54:48"} +{"current_steps": 1668, "total_steps": 5676, "loss": 0.7037352323532104, "lr": 1.692541166590387e-05, "epoch": 0.5878414096916299, "percentage": 29.39, "elapsed_time": "2:27:36", "remaining_time": "5:54:40"} +{"current_steps": 1669, "total_steps": 5676, "loss": 0.7452583312988281, "lr": 1.6921207454785754e-05, "epoch": 0.588193832599119, "percentage": 29.4, "elapsed_time": "2:27:40", "remaining_time": "5:54:31"} +{"current_steps": 1670, "total_steps": 5676, "loss": 0.7773720026016235, "lr": 1.6917000894133106e-05, "epoch": 0.5885462555066079, "percentage": 29.42, "elapsed_time": "2:27:44", "remaining_time": "5:54:24"} +{"current_steps": 1671, "total_steps": 5676, "loss": 0.5820617079734802, "lr": 1.6912791985373916e-05, "epoch": 0.5888986784140969, "percentage": 29.44, "elapsed_time": "2:27:49", "remaining_time": "5:54:18"} +{"current_steps": 1672, "total_steps": 5676, "loss": 0.7513154745101929, "lr": 1.6908580729936983e-05, "epoch": 0.5892511013215859, "percentage": 29.46, "elapsed_time": "2:27:55", "remaining_time": "5:54:15"} +{"current_steps": 1673, "total_steps": 5676, "loss": 0.6741763949394226, "lr": 1.6904367129251898e-05, "epoch": 0.5896035242290749, "percentage": 29.47, "elapsed_time": "2:28:02", "remaining_time": "5:54:13"} +{"current_steps": 1674, "total_steps": 5676, "loss": 0.7290149331092834, "lr": 1.690015118474904e-05, "epoch": 0.5899559471365639, "percentage": 29.49, "elapsed_time": "2:28:08", "remaining_time": "5:54:08"} +{"current_steps": 1675, "total_steps": 5676, "loss": 0.651113748550415, "lr": 1.6895932897859596e-05, "epoch": 0.5903083700440529, "percentage": 29.51, "elapsed_time": "2:28:12", "remaining_time": "5:54:01"} +{"current_steps": 1676, "total_steps": 5676, "loss": 0.8062121272087097, "lr": 1.6891712270015546e-05, "epoch": 0.5906607929515418, "percentage": 29.53, "elapsed_time": "2:28:18", "remaining_time": "5:53:57"} +{"current_steps": 1677, "total_steps": 5676, "loss": 0.7168683409690857, "lr": 1.6887489302649657e-05, "epoch": 0.5910132158590309, "percentage": 29.55, "elapsed_time": "2:28:22", "remaining_time": "5:53:49"} +{"current_steps": 1678, "total_steps": 5676, "loss": 0.6751970052719116, "lr": 1.6883263997195497e-05, "epoch": 0.5913656387665198, "percentage": 29.56, "elapsed_time": "2:28:27", "remaining_time": "5:53:42"} +{"current_steps": 1679, "total_steps": 5676, "loss": 0.5176222324371338, "lr": 1.687903635508742e-05, "epoch": 0.5917180616740088, "percentage": 29.58, "elapsed_time": "2:28:33", "remaining_time": "5:53:38"} +{"current_steps": 1680, "total_steps": 5676, "loss": 0.605686366558075, "lr": 1.6874806377760587e-05, "epoch": 0.5920704845814978, "percentage": 29.6, "elapsed_time": "2:28:37", "remaining_time": "5:53:31"} +{"current_steps": 1681, "total_steps": 5676, "loss": 0.6927961111068726, "lr": 1.6870574066650945e-05, "epoch": 0.5924229074889867, "percentage": 29.62, "elapsed_time": "2:28:42", "remaining_time": "5:53:25"} +{"current_steps": 1682, "total_steps": 5676, "loss": 0.7434122562408447, "lr": 1.6866339423195223e-05, "epoch": 0.5927753303964758, "percentage": 29.63, "elapsed_time": "2:28:48", "remaining_time": "5:53:21"} +{"current_steps": 1683, "total_steps": 5676, "loss": 0.5646539926528931, "lr": 1.6862102448830956e-05, "epoch": 0.5931277533039647, "percentage": 29.65, "elapsed_time": "2:28:53", "remaining_time": "5:53:15"} +{"current_steps": 1684, "total_steps": 5676, "loss": 0.6666921377182007, "lr": 1.6857863144996464e-05, "epoch": 0.5934801762114538, "percentage": 29.67, "elapsed_time": "2:28:58", "remaining_time": "5:53:08"} +{"current_steps": 1685, "total_steps": 5676, "loss": 0.6630325317382812, "lr": 1.6853621513130857e-05, "epoch": 0.5938325991189427, "percentage": 29.69, "elapsed_time": "2:29:02", "remaining_time": "5:53:00"} +{"current_steps": 1686, "total_steps": 5676, "loss": 0.6519981622695923, "lr": 1.6849377554674042e-05, "epoch": 0.5941850220264318, "percentage": 29.7, "elapsed_time": "2:29:07", "remaining_time": "5:52:55"} +{"current_steps": 1687, "total_steps": 5676, "loss": 0.7958102822303772, "lr": 1.6845131271066705e-05, "epoch": 0.5945374449339207, "percentage": 29.72, "elapsed_time": "2:29:12", "remaining_time": "5:52:47"} +{"current_steps": 1688, "total_steps": 5676, "loss": 0.6136632561683655, "lr": 1.6840882663750333e-05, "epoch": 0.5948898678414097, "percentage": 29.74, "elapsed_time": "2:29:17", "remaining_time": "5:52:42"} +{"current_steps": 1689, "total_steps": 5676, "loss": 0.6177657842636108, "lr": 1.683663173416719e-05, "epoch": 0.5952422907488987, "percentage": 29.76, "elapsed_time": "2:29:22", "remaining_time": "5:52:36"} +{"current_steps": 1690, "total_steps": 5676, "loss": 0.7489751577377319, "lr": 1.683237848376034e-05, "epoch": 0.5955947136563877, "percentage": 29.77, "elapsed_time": "2:29:27", "remaining_time": "5:52:31"} +{"current_steps": 1691, "total_steps": 5676, "loss": 0.6749632954597473, "lr": 1.6828122913973625e-05, "epoch": 0.5959471365638767, "percentage": 29.79, "elapsed_time": "2:29:32", "remaining_time": "5:52:24"} +{"current_steps": 1692, "total_steps": 5676, "loss": 0.6340545415878296, "lr": 1.682386502625168e-05, "epoch": 0.5962995594713656, "percentage": 29.81, "elapsed_time": "2:29:37", "remaining_time": "5:52:17"} +{"current_steps": 1693, "total_steps": 5676, "loss": 0.6141117811203003, "lr": 1.6819604822039924e-05, "epoch": 0.5966519823788546, "percentage": 29.83, "elapsed_time": "2:29:43", "remaining_time": "5:52:15"} +{"current_steps": 1694, "total_steps": 5676, "loss": 0.7937319874763489, "lr": 1.681534230278457e-05, "epoch": 0.5970044052863436, "percentage": 29.84, "elapsed_time": "2:29:47", "remaining_time": "5:52:07"} +{"current_steps": 1695, "total_steps": 5676, "loss": 0.6196104288101196, "lr": 1.68110774699326e-05, "epoch": 0.5973568281938326, "percentage": 29.86, "elapsed_time": "2:29:53", "remaining_time": "5:52:02"} +{"current_steps": 1696, "total_steps": 5676, "loss": 0.6856463551521301, "lr": 1.68068103249318e-05, "epoch": 0.5977092511013216, "percentage": 29.88, "elapsed_time": "2:29:59", "remaining_time": "5:52:00"} +{"current_steps": 1697, "total_steps": 5676, "loss": 0.754359245300293, "lr": 1.680254086923073e-05, "epoch": 0.5980616740088106, "percentage": 29.9, "elapsed_time": "2:30:05", "remaining_time": "5:51:55"} +{"current_steps": 1698, "total_steps": 5676, "loss": 0.6663862466812134, "lr": 1.6798269104278738e-05, "epoch": 0.5984140969162995, "percentage": 29.92, "elapsed_time": "2:30:10", "remaining_time": "5:51:48"} +{"current_steps": 1699, "total_steps": 5676, "loss": 0.7072615027427673, "lr": 1.6793995031525955e-05, "epoch": 0.5987665198237886, "percentage": 29.93, "elapsed_time": "2:30:14", "remaining_time": "5:51:41"} +{"current_steps": 1700, "total_steps": 5676, "loss": 0.5722007751464844, "lr": 1.678971865242329e-05, "epoch": 0.5991189427312775, "percentage": 29.95, "elapsed_time": "2:30:20", "remaining_time": "5:51:37"} +{"current_steps": 1701, "total_steps": 5676, "loss": 0.8254455327987671, "lr": 1.6785439968422456e-05, "epoch": 0.5994713656387666, "percentage": 29.97, "elapsed_time": "2:30:30", "remaining_time": "5:51:43"} +{"current_steps": 1702, "total_steps": 5676, "loss": 0.5726041793823242, "lr": 1.678115898097592e-05, "epoch": 0.5998237885462555, "percentage": 29.99, "elapsed_time": "2:30:36", "remaining_time": "5:51:39"} +{"current_steps": 1703, "total_steps": 5676, "loss": 0.6480926275253296, "lr": 1.6776875691536946e-05, "epoch": 0.6001762114537444, "percentage": 30.0, "elapsed_time": "2:30:40", "remaining_time": "5:51:31"} +{"current_steps": 1704, "total_steps": 5676, "loss": 0.6469742059707642, "lr": 1.677259010155958e-05, "epoch": 0.6005286343612335, "percentage": 30.02, "elapsed_time": "2:30:44", "remaining_time": "5:51:22"} +{"current_steps": 1705, "total_steps": 5676, "loss": 0.814565896987915, "lr": 1.6768302212498647e-05, "epoch": 0.6008810572687224, "percentage": 30.04, "elapsed_time": "2:30:50", "remaining_time": "5:51:18"} +{"current_steps": 1706, "total_steps": 5676, "loss": 0.7063060402870178, "lr": 1.6764012025809745e-05, "epoch": 0.6012334801762115, "percentage": 30.06, "elapsed_time": "2:30:55", "remaining_time": "5:51:12"} +{"current_steps": 1707, "total_steps": 5676, "loss": 0.6523685455322266, "lr": 1.6759719542949268e-05, "epoch": 0.6015859030837004, "percentage": 30.07, "elapsed_time": "2:31:01", "remaining_time": "5:51:09"} +{"current_steps": 1708, "total_steps": 5676, "loss": 0.7361165285110474, "lr": 1.6755424765374378e-05, "epoch": 0.6019383259911895, "percentage": 30.09, "elapsed_time": "2:31:06", "remaining_time": "5:51:04"} +{"current_steps": 1709, "total_steps": 5676, "loss": 0.45241934061050415, "lr": 1.6751127694543012e-05, "epoch": 0.6022907488986784, "percentage": 30.11, "elapsed_time": "2:31:11", "remaining_time": "5:50:58"} +{"current_steps": 1710, "total_steps": 5676, "loss": 0.6610431671142578, "lr": 1.6746828331913903e-05, "epoch": 0.6026431718061674, "percentage": 30.13, "elapsed_time": "2:31:16", "remaining_time": "5:50:52"} +{"current_steps": 1711, "total_steps": 5676, "loss": 0.7572601437568665, "lr": 1.674252667894654e-05, "epoch": 0.6029955947136564, "percentage": 30.14, "elapsed_time": "2:31:22", "remaining_time": "5:50:46"} +{"current_steps": 1712, "total_steps": 5676, "loss": 0.7021572589874268, "lr": 1.6738222737101205e-05, "epoch": 0.6033480176211454, "percentage": 30.16, "elapsed_time": "2:31:28", "remaining_time": "5:50:43"} +{"current_steps": 1713, "total_steps": 5676, "loss": 0.7742347121238708, "lr": 1.6733916507838952e-05, "epoch": 0.6037004405286344, "percentage": 30.18, "elapsed_time": "2:31:33", "remaining_time": "5:50:38"} +{"current_steps": 1714, "total_steps": 5676, "loss": 0.6453407406806946, "lr": 1.6729607992621613e-05, "epoch": 0.6040528634361233, "percentage": 30.2, "elapsed_time": "2:31:40", "remaining_time": "5:50:35"} +{"current_steps": 1715, "total_steps": 5676, "loss": 0.7004555463790894, "lr": 1.6725297192911793e-05, "epoch": 0.6044052863436123, "percentage": 30.21, "elapsed_time": "2:31:45", "remaining_time": "5:50:29"} +{"current_steps": 1716, "total_steps": 5676, "loss": 0.6979051232337952, "lr": 1.6720984110172875e-05, "epoch": 0.6047577092511013, "percentage": 30.23, "elapsed_time": "2:31:50", "remaining_time": "5:50:24"} +{"current_steps": 1717, "total_steps": 5676, "loss": 0.6387851238250732, "lr": 1.671666874586902e-05, "epoch": 0.6051101321585903, "percentage": 30.25, "elapsed_time": "2:31:56", "remaining_time": "5:50:20"} +{"current_steps": 1718, "total_steps": 5676, "loss": 0.9083811044692993, "lr": 1.671235110146515e-05, "epoch": 0.6054625550660793, "percentage": 30.27, "elapsed_time": "2:32:01", "remaining_time": "5:50:13"} +{"current_steps": 1719, "total_steps": 5676, "loss": 0.747002363204956, "lr": 1.6708031178426984e-05, "epoch": 0.6058149779735683, "percentage": 30.29, "elapsed_time": "2:32:05", "remaining_time": "5:50:05"} +{"current_steps": 1720, "total_steps": 5676, "loss": 0.7553372383117676, "lr": 1.6703708978220986e-05, "epoch": 0.6061674008810573, "percentage": 30.3, "elapsed_time": "2:32:10", "remaining_time": "5:49:59"} +{"current_steps": 1721, "total_steps": 5676, "loss": 0.762795090675354, "lr": 1.669938450231442e-05, "epoch": 0.6065198237885463, "percentage": 30.32, "elapsed_time": "2:32:15", "remaining_time": "5:49:53"} +{"current_steps": 1722, "total_steps": 5676, "loss": 0.739936113357544, "lr": 1.669505775217531e-05, "epoch": 0.6068722466960352, "percentage": 30.34, "elapsed_time": "2:32:20", "remaining_time": "5:49:47"} +{"current_steps": 1723, "total_steps": 5676, "loss": 0.8439112305641174, "lr": 1.6690728729272456e-05, "epoch": 0.6072246696035243, "percentage": 30.36, "elapsed_time": "2:32:27", "remaining_time": "5:49:46"} +{"current_steps": 1724, "total_steps": 5676, "loss": 0.6144756078720093, "lr": 1.6686397435075416e-05, "epoch": 0.6075770925110132, "percentage": 30.37, "elapsed_time": "2:32:32", "remaining_time": "5:49:41"} +{"current_steps": 1725, "total_steps": 5676, "loss": 0.569161057472229, "lr": 1.6682063871054534e-05, "epoch": 0.6079295154185022, "percentage": 30.39, "elapsed_time": "2:32:38", "remaining_time": "5:49:36"} +{"current_steps": 1726, "total_steps": 5676, "loss": 0.7523979544639587, "lr": 1.6677728038680926e-05, "epoch": 0.6082819383259912, "percentage": 30.41, "elapsed_time": "2:32:44", "remaining_time": "5:49:32"} +{"current_steps": 1727, "total_steps": 5676, "loss": 0.6394520401954651, "lr": 1.6673389939426463e-05, "epoch": 0.6086343612334801, "percentage": 30.43, "elapsed_time": "2:32:48", "remaining_time": "5:49:25"} +{"current_steps": 1728, "total_steps": 5676, "loss": 0.5975633859634399, "lr": 1.66690495747638e-05, "epoch": 0.6089867841409692, "percentage": 30.44, "elapsed_time": "2:32:54", "remaining_time": "5:49:20"} +{"current_steps": 1729, "total_steps": 5676, "loss": 0.736790657043457, "lr": 1.666470694616636e-05, "epoch": 0.6093392070484581, "percentage": 30.46, "elapsed_time": "2:33:00", "remaining_time": "5:49:16"} +{"current_steps": 1730, "total_steps": 5676, "loss": 0.7693831920623779, "lr": 1.6660362055108316e-05, "epoch": 0.6096916299559472, "percentage": 30.48, "elapsed_time": "2:33:04", "remaining_time": "5:49:09"} +{"current_steps": 1731, "total_steps": 5676, "loss": 0.7322608232498169, "lr": 1.665601490306464e-05, "epoch": 0.6100440528634361, "percentage": 30.5, "elapsed_time": "2:33:08", "remaining_time": "5:49:01"} +{"current_steps": 1732, "total_steps": 5676, "loss": 0.6478679180145264, "lr": 1.6651665491511043e-05, "epoch": 0.6103964757709252, "percentage": 30.51, "elapsed_time": "2:33:14", "remaining_time": "5:48:56"} +{"current_steps": 1733, "total_steps": 5676, "loss": 0.7125877141952515, "lr": 1.6647313821924022e-05, "epoch": 0.6107488986784141, "percentage": 30.53, "elapsed_time": "2:33:19", "remaining_time": "5:48:51"} +{"current_steps": 1734, "total_steps": 5676, "loss": 0.8999321460723877, "lr": 1.664295989578083e-05, "epoch": 0.6111013215859031, "percentage": 30.55, "elapsed_time": "2:33:24", "remaining_time": "5:48:46"} +{"current_steps": 1735, "total_steps": 5676, "loss": 0.6908334493637085, "lr": 1.663860371455949e-05, "epoch": 0.6114537444933921, "percentage": 30.57, "elapsed_time": "2:33:29", "remaining_time": "5:48:39"} +{"current_steps": 1736, "total_steps": 5676, "loss": 0.6708767414093018, "lr": 1.663424527973879e-05, "epoch": 0.611806167400881, "percentage": 30.58, "elapsed_time": "2:33:34", "remaining_time": "5:48:33"} +{"current_steps": 1737, "total_steps": 5676, "loss": 0.6991565823554993, "lr": 1.6629884592798283e-05, "epoch": 0.61215859030837, "percentage": 30.6, "elapsed_time": "2:33:38", "remaining_time": "5:48:25"} +{"current_steps": 1738, "total_steps": 5676, "loss": 0.6224193572998047, "lr": 1.6625521655218287e-05, "epoch": 0.612511013215859, "percentage": 30.62, "elapsed_time": "2:33:45", "remaining_time": "5:48:23"} +{"current_steps": 1739, "total_steps": 5676, "loss": 0.701459527015686, "lr": 1.662115646847988e-05, "epoch": 0.612863436123348, "percentage": 30.64, "elapsed_time": "2:33:49", "remaining_time": "5:48:14"} +{"current_steps": 1740, "total_steps": 5676, "loss": 0.784063458442688, "lr": 1.6616789034064914e-05, "epoch": 0.613215859030837, "percentage": 30.66, "elapsed_time": "2:33:53", "remaining_time": "5:48:06"} +{"current_steps": 1741, "total_steps": 5676, "loss": 0.7604146003723145, "lr": 1.661241935345599e-05, "epoch": 0.613568281938326, "percentage": 30.67, "elapsed_time": "2:34:00", "remaining_time": "5:48:04"} +{"current_steps": 1742, "total_steps": 5676, "loss": 0.6347941160202026, "lr": 1.6608047428136482e-05, "epoch": 0.613920704845815, "percentage": 30.69, "elapsed_time": "2:34:04", "remaining_time": "5:47:57"} +{"current_steps": 1743, "total_steps": 5676, "loss": 0.7559434175491333, "lr": 1.6603673259590524e-05, "epoch": 0.614273127753304, "percentage": 30.71, "elapsed_time": "2:34:10", "remaining_time": "5:47:52"} +{"current_steps": 1744, "total_steps": 5676, "loss": 0.742524266242981, "lr": 1.6599296849303007e-05, "epoch": 0.6146255506607929, "percentage": 30.73, "elapsed_time": "2:34:14", "remaining_time": "5:47:45"} +{"current_steps": 1745, "total_steps": 5676, "loss": 0.697594165802002, "lr": 1.6594918198759586e-05, "epoch": 0.614977973568282, "percentage": 30.74, "elapsed_time": "2:34:20", "remaining_time": "5:47:40"} +{"current_steps": 1746, "total_steps": 5676, "loss": 0.7876765131950378, "lr": 1.659053730944668e-05, "epoch": 0.6153303964757709, "percentage": 30.76, "elapsed_time": "2:34:26", "remaining_time": "5:47:36"} +{"current_steps": 1747, "total_steps": 5676, "loss": 0.7514386177062988, "lr": 1.658615418285146e-05, "epoch": 0.6156828193832599, "percentage": 30.78, "elapsed_time": "2:34:32", "remaining_time": "5:47:32"} +{"current_steps": 1748, "total_steps": 5676, "loss": 0.6220899820327759, "lr": 1.658176882046187e-05, "epoch": 0.6160352422907489, "percentage": 30.8, "elapsed_time": "2:34:37", "remaining_time": "5:47:26"} +{"current_steps": 1749, "total_steps": 5676, "loss": 0.7376539707183838, "lr": 1.6577381223766592e-05, "epoch": 0.6163876651982378, "percentage": 30.81, "elapsed_time": "2:34:42", "remaining_time": "5:47:22"} +{"current_steps": 1750, "total_steps": 5676, "loss": 0.8296281099319458, "lr": 1.6572991394255084e-05, "epoch": 0.6167400881057269, "percentage": 30.83, "elapsed_time": "2:34:47", "remaining_time": "5:47:15"} +{"current_steps": 1751, "total_steps": 5676, "loss": 0.7316757440567017, "lr": 1.656859933341756e-05, "epoch": 0.6170925110132158, "percentage": 30.85, "elapsed_time": "2:34:52", "remaining_time": "5:47:10"} +{"current_steps": 1752, "total_steps": 5676, "loss": 0.6933871507644653, "lr": 1.6564205042744986e-05, "epoch": 0.6174449339207049, "percentage": 30.87, "elapsed_time": "2:34:58", "remaining_time": "5:47:05"} +{"current_steps": 1753, "total_steps": 5676, "loss": 0.6835601329803467, "lr": 1.655980852372908e-05, "epoch": 0.6177973568281938, "percentage": 30.88, "elapsed_time": "2:35:03", "remaining_time": "5:46:59"} +{"current_steps": 1754, "total_steps": 5676, "loss": 0.6752027869224548, "lr": 1.655540977786233e-05, "epoch": 0.6181497797356829, "percentage": 30.9, "elapsed_time": "2:35:09", "remaining_time": "5:46:55"} +{"current_steps": 1755, "total_steps": 5676, "loss": 0.6092851758003235, "lr": 1.6551008806637976e-05, "epoch": 0.6185022026431718, "percentage": 30.92, "elapsed_time": "2:35:14", "remaining_time": "5:46:49"} +{"current_steps": 1756, "total_steps": 5676, "loss": 0.682563066482544, "lr": 1.6546605611550008e-05, "epoch": 0.6188546255506608, "percentage": 30.94, "elapsed_time": "2:35:19", "remaining_time": "5:46:44"} +{"current_steps": 1757, "total_steps": 5676, "loss": 0.8674311637878418, "lr": 1.654220019409317e-05, "epoch": 0.6192070484581498, "percentage": 30.95, "elapsed_time": "2:35:24", "remaining_time": "5:46:39"} +{"current_steps": 1758, "total_steps": 5676, "loss": 0.7209165096282959, "lr": 1.6537792555762966e-05, "epoch": 0.6195594713656387, "percentage": 30.97, "elapsed_time": "2:35:30", "remaining_time": "5:46:33"} +{"current_steps": 1759, "total_steps": 5676, "loss": 0.7795991897583008, "lr": 1.6533382698055655e-05, "epoch": 0.6199118942731278, "percentage": 30.99, "elapsed_time": "2:35:34", "remaining_time": "5:46:26"} +{"current_steps": 1760, "total_steps": 5676, "loss": 0.6749448776245117, "lr": 1.6528970622468245e-05, "epoch": 0.6202643171806167, "percentage": 31.01, "elapsed_time": "2:35:39", "remaining_time": "5:46:19"} +{"current_steps": 1761, "total_steps": 5676, "loss": 0.9127920866012573, "lr": 1.6524556330498494e-05, "epoch": 0.6206167400881057, "percentage": 31.03, "elapsed_time": "2:35:44", "remaining_time": "5:46:14"} +{"current_steps": 1762, "total_steps": 5676, "loss": 0.6224071979522705, "lr": 1.6520139823644922e-05, "epoch": 0.6209691629955947, "percentage": 31.04, "elapsed_time": "2:35:49", "remaining_time": "5:46:08"} +{"current_steps": 1763, "total_steps": 5676, "loss": 0.6955251693725586, "lr": 1.6515721103406798e-05, "epoch": 0.6213215859030837, "percentage": 31.06, "elapsed_time": "2:35:54", "remaining_time": "5:46:02"} +{"current_steps": 1764, "total_steps": 5676, "loss": 0.676613986492157, "lr": 1.6511300171284132e-05, "epoch": 0.6216740088105727, "percentage": 31.08, "elapsed_time": "2:36:00", "remaining_time": "5:45:59"} +{"current_steps": 1765, "total_steps": 5676, "loss": 0.7482033967971802, "lr": 1.65068770287777e-05, "epoch": 0.6220264317180617, "percentage": 31.1, "elapsed_time": "2:36:07", "remaining_time": "5:45:56"} +{"current_steps": 1766, "total_steps": 5676, "loss": 0.6019684076309204, "lr": 1.6502451677389015e-05, "epoch": 0.6223788546255506, "percentage": 31.11, "elapsed_time": "2:36:12", "remaining_time": "5:45:51"} +{"current_steps": 1767, "total_steps": 5676, "loss": 0.6796068549156189, "lr": 1.649802411862035e-05, "epoch": 0.6227312775330397, "percentage": 31.13, "elapsed_time": "2:36:17", "remaining_time": "5:45:46"} +{"current_steps": 1768, "total_steps": 5676, "loss": 0.6351302862167358, "lr": 1.6493594353974724e-05, "epoch": 0.6230837004405286, "percentage": 31.15, "elapsed_time": "2:36:23", "remaining_time": "5:45:42"} +{"current_steps": 1769, "total_steps": 5676, "loss": 0.6093732714653015, "lr": 1.6489162384955906e-05, "epoch": 0.6234361233480176, "percentage": 31.17, "elapsed_time": "2:36:29", "remaining_time": "5:45:38"} +{"current_steps": 1770, "total_steps": 5676, "loss": 0.8181271553039551, "lr": 1.6484728213068405e-05, "epoch": 0.6237885462555066, "percentage": 31.18, "elapsed_time": "2:36:35", "remaining_time": "5:45:32"} +{"current_steps": 1771, "total_steps": 5676, "loss": 0.7093993425369263, "lr": 1.6480291839817488e-05, "epoch": 0.6241409691629956, "percentage": 31.2, "elapsed_time": "2:36:40", "remaining_time": "5:45:27"} +{"current_steps": 1772, "total_steps": 5676, "loss": 0.6895081996917725, "lr": 1.6475853266709165e-05, "epoch": 0.6244933920704846, "percentage": 31.22, "elapsed_time": "2:36:45", "remaining_time": "5:45:21"} +{"current_steps": 1773, "total_steps": 5676, "loss": 0.6706013679504395, "lr": 1.6471412495250195e-05, "epoch": 0.6248458149779735, "percentage": 31.24, "elapsed_time": "2:36:49", "remaining_time": "5:45:13"} +{"current_steps": 1774, "total_steps": 5676, "loss": 0.6700015664100647, "lr": 1.6466969526948082e-05, "epoch": 0.6251982378854626, "percentage": 31.25, "elapsed_time": "2:36:54", "remaining_time": "5:45:08"} +{"current_steps": 1775, "total_steps": 5676, "loss": 0.6591087579727173, "lr": 1.6462524363311072e-05, "epoch": 0.6255506607929515, "percentage": 31.27, "elapsed_time": "2:36:59", "remaining_time": "5:45:01"} +{"current_steps": 1776, "total_steps": 5676, "loss": 0.7775006294250488, "lr": 1.6458077005848164e-05, "epoch": 0.6259030837004406, "percentage": 31.29, "elapsed_time": "2:37:04", "remaining_time": "5:44:56"} +{"current_steps": 1777, "total_steps": 5676, "loss": 0.8459682464599609, "lr": 1.6453627456069093e-05, "epoch": 0.6262555066079295, "percentage": 31.31, "elapsed_time": "2:37:09", "remaining_time": "5:44:49"} +{"current_steps": 1778, "total_steps": 5676, "loss": 0.6536898612976074, "lr": 1.6449175715484346e-05, "epoch": 0.6266079295154185, "percentage": 31.32, "elapsed_time": "2:37:14", "remaining_time": "5:44:44"} +{"current_steps": 1779, "total_steps": 5676, "loss": 0.7543610334396362, "lr": 1.6444721785605148e-05, "epoch": 0.6269603524229075, "percentage": 31.34, "elapsed_time": "2:37:20", "remaining_time": "5:44:40"} +{"current_steps": 1780, "total_steps": 5676, "loss": 0.7416362762451172, "lr": 1.6440265667943474e-05, "epoch": 0.6273127753303964, "percentage": 31.36, "elapsed_time": "2:37:25", "remaining_time": "5:44:33"} +{"current_steps": 1781, "total_steps": 5676, "loss": 0.5505499839782715, "lr": 1.6435807364012035e-05, "epoch": 0.6276651982378855, "percentage": 31.38, "elapsed_time": "2:37:30", "remaining_time": "5:44:27"} +{"current_steps": 1782, "total_steps": 5676, "loss": 0.792723536491394, "lr": 1.6431346875324284e-05, "epoch": 0.6280176211453744, "percentage": 31.4, "elapsed_time": "2:37:34", "remaining_time": "5:44:20"} +{"current_steps": 1783, "total_steps": 5676, "loss": 0.6313158273696899, "lr": 1.6426884203394416e-05, "epoch": 0.6283700440528635, "percentage": 31.41, "elapsed_time": "2:37:39", "remaining_time": "5:44:14"} +{"current_steps": 1784, "total_steps": 5676, "loss": 0.6168874502182007, "lr": 1.642241934973738e-05, "epoch": 0.6287224669603524, "percentage": 31.43, "elapsed_time": "2:37:44", "remaining_time": "5:44:08"} +{"current_steps": 1785, "total_steps": 5676, "loss": 0.6995766162872314, "lr": 1.6417952315868845e-05, "epoch": 0.6290748898678414, "percentage": 31.45, "elapsed_time": "2:37:50", "remaining_time": "5:44:03"} +{"current_steps": 1786, "total_steps": 5676, "loss": 0.8046826124191284, "lr": 1.641348310330523e-05, "epoch": 0.6294273127753304, "percentage": 31.47, "elapsed_time": "2:37:55", "remaining_time": "5:43:58"} +{"current_steps": 1787, "total_steps": 5676, "loss": 0.7227291464805603, "lr": 1.6409011713563697e-05, "epoch": 0.6297797356828194, "percentage": 31.48, "elapsed_time": "2:38:00", "remaining_time": "5:43:51"} +{"current_steps": 1788, "total_steps": 5676, "loss": 0.6463631391525269, "lr": 1.6404538148162145e-05, "epoch": 0.6301321585903084, "percentage": 31.5, "elapsed_time": "2:38:05", "remaining_time": "5:43:46"} +{"current_steps": 1789, "total_steps": 5676, "loss": 0.7473348379135132, "lr": 1.640006240861921e-05, "epoch": 0.6304845814977974, "percentage": 31.52, "elapsed_time": "2:38:11", "remaining_time": "5:43:43"} +{"current_steps": 1790, "total_steps": 5676, "loss": 0.7311505079269409, "lr": 1.6395584496454263e-05, "epoch": 0.6308370044052863, "percentage": 31.54, "elapsed_time": "2:38:16", "remaining_time": "5:43:36"} +{"current_steps": 1791, "total_steps": 5676, "loss": 0.7259535789489746, "lr": 1.639110441318742e-05, "epoch": 0.6311894273127753, "percentage": 31.55, "elapsed_time": "2:38:22", "remaining_time": "5:43:31"} +{"current_steps": 1792, "total_steps": 5676, "loss": 0.5777252912521362, "lr": 1.6386622160339522e-05, "epoch": 0.6315418502202643, "percentage": 31.57, "elapsed_time": "2:38:26", "remaining_time": "5:43:23"} +{"current_steps": 1793, "total_steps": 5676, "loss": 0.5510598421096802, "lr": 1.638213773943216e-05, "epoch": 0.6318942731277533, "percentage": 31.59, "elapsed_time": "2:38:31", "remaining_time": "5:43:19"} +{"current_steps": 1794, "total_steps": 5676, "loss": 0.6448229551315308, "lr": 1.637765115198766e-05, "epoch": 0.6322466960352423, "percentage": 31.61, "elapsed_time": "2:38:37", "remaining_time": "5:43:14"} +{"current_steps": 1795, "total_steps": 5676, "loss": 0.7359289526939392, "lr": 1.6373162399529067e-05, "epoch": 0.6325991189427312, "percentage": 31.62, "elapsed_time": "2:38:43", "remaining_time": "5:43:10"} +{"current_steps": 1796, "total_steps": 5676, "loss": 0.616656482219696, "lr": 1.6368671483580185e-05, "epoch": 0.6329515418502203, "percentage": 31.64, "elapsed_time": "2:38:48", "remaining_time": "5:43:04"} +{"current_steps": 1797, "total_steps": 5676, "loss": 0.5966289043426514, "lr": 1.6364178405665534e-05, "epoch": 0.6333039647577092, "percentage": 31.66, "elapsed_time": "2:38:53", "remaining_time": "5:42:59"} +{"current_steps": 1798, "total_steps": 5676, "loss": 0.7475985288619995, "lr": 1.6359683167310375e-05, "epoch": 0.6336563876651983, "percentage": 31.68, "elapsed_time": "2:38:58", "remaining_time": "5:42:53"} +{"current_steps": 1799, "total_steps": 5676, "loss": 0.6030765771865845, "lr": 1.63551857700407e-05, "epoch": 0.6340088105726872, "percentage": 31.69, "elapsed_time": "2:39:03", "remaining_time": "5:42:48"} +{"current_steps": 1800, "total_steps": 5676, "loss": 0.6193016171455383, "lr": 1.6350686215383237e-05, "epoch": 0.6343612334801763, "percentage": 31.71, "elapsed_time": "2:39:09", "remaining_time": "5:42:42"} +{"current_steps": 1801, "total_steps": 5676, "loss": 0.6404513120651245, "lr": 1.6346184504865442e-05, "epoch": 0.6347136563876652, "percentage": 31.73, "elapsed_time": "2:39:19", "remaining_time": "5:42:47"} +{"current_steps": 1802, "total_steps": 5676, "loss": 0.8453506231307983, "lr": 1.6341680640015515e-05, "epoch": 0.6350660792951542, "percentage": 31.75, "elapsed_time": "2:39:23", "remaining_time": "5:42:39"} +{"current_steps": 1803, "total_steps": 5676, "loss": 0.6094445586204529, "lr": 1.6337174622362366e-05, "epoch": 0.6354185022026432, "percentage": 31.77, "elapsed_time": "2:39:27", "remaining_time": "5:42:32"} +{"current_steps": 1804, "total_steps": 5676, "loss": 0.7352159023284912, "lr": 1.6332666453435653e-05, "epoch": 0.6357709251101321, "percentage": 31.78, "elapsed_time": "2:39:32", "remaining_time": "5:42:26"} +{"current_steps": 1805, "total_steps": 5676, "loss": 0.7395339608192444, "lr": 1.632815613476576e-05, "epoch": 0.6361233480176212, "percentage": 31.8, "elapsed_time": "2:39:38", "remaining_time": "5:42:22"} +{"current_steps": 1806, "total_steps": 5676, "loss": 0.7246927618980408, "lr": 1.63236436678838e-05, "epoch": 0.6364757709251101, "percentage": 31.82, "elapsed_time": "2:39:45", "remaining_time": "5:42:19"} +{"current_steps": 1807, "total_steps": 5676, "loss": 0.6913329362869263, "lr": 1.6319129054321616e-05, "epoch": 0.6368281938325991, "percentage": 31.84, "elapsed_time": "2:39:51", "remaining_time": "5:42:15"} +{"current_steps": 1808, "total_steps": 5676, "loss": 0.6410515308380127, "lr": 1.6314612295611772e-05, "epoch": 0.6371806167400881, "percentage": 31.85, "elapsed_time": "2:39:56", "remaining_time": "5:42:09"} +{"current_steps": 1809, "total_steps": 5676, "loss": 0.690910816192627, "lr": 1.6310093393287574e-05, "epoch": 0.6375330396475771, "percentage": 31.87, "elapsed_time": "2:40:01", "remaining_time": "5:42:04"} +{"current_steps": 1810, "total_steps": 5676, "loss": 0.6520562171936035, "lr": 1.6305572348883044e-05, "epoch": 0.6378854625550661, "percentage": 31.89, "elapsed_time": "2:40:05", "remaining_time": "5:41:57"} +{"current_steps": 1811, "total_steps": 5676, "loss": 0.6966608166694641, "lr": 1.630104916393294e-05, "epoch": 0.6382378854625551, "percentage": 31.91, "elapsed_time": "2:40:10", "remaining_time": "5:41:51"} +{"current_steps": 1812, "total_steps": 5676, "loss": 0.826806902885437, "lr": 1.6296523839972743e-05, "epoch": 0.638590308370044, "percentage": 31.92, "elapsed_time": "2:40:15", "remaining_time": "5:41:43"} +{"current_steps": 1813, "total_steps": 5676, "loss": 0.6695773601531982, "lr": 1.6291996378538653e-05, "epoch": 0.6389427312775331, "percentage": 31.94, "elapsed_time": "2:40:20", "remaining_time": "5:41:38"} +{"current_steps": 1814, "total_steps": 5676, "loss": 0.5725491046905518, "lr": 1.6287466781167607e-05, "epoch": 0.639295154185022, "percentage": 31.96, "elapsed_time": "2:40:24", "remaining_time": "5:41:31"} +{"current_steps": 1815, "total_steps": 5676, "loss": 0.5543544292449951, "lr": 1.628293504939727e-05, "epoch": 0.639647577092511, "percentage": 31.98, "elapsed_time": "2:40:30", "remaining_time": "5:41:26"} +{"current_steps": 1816, "total_steps": 5676, "loss": 0.6964641809463501, "lr": 1.6278401184766007e-05, "epoch": 0.64, "percentage": 31.99, "elapsed_time": "2:40:34", "remaining_time": "5:41:18"} +{"current_steps": 1817, "total_steps": 5676, "loss": 0.675407886505127, "lr": 1.6273865188812935e-05, "epoch": 0.640352422907489, "percentage": 32.01, "elapsed_time": "2:40:41", "remaining_time": "5:41:15"} +{"current_steps": 1818, "total_steps": 5676, "loss": 0.6304433345794678, "lr": 1.626932706307788e-05, "epoch": 0.640704845814978, "percentage": 32.03, "elapsed_time": "2:40:45", "remaining_time": "5:41:09"} +{"current_steps": 1819, "total_steps": 5676, "loss": 0.7432112693786621, "lr": 1.62647868091014e-05, "epoch": 0.6410572687224669, "percentage": 32.05, "elapsed_time": "2:40:50", "remaining_time": "5:41:02"} +{"current_steps": 1820, "total_steps": 5676, "loss": 0.730377197265625, "lr": 1.6260244428424763e-05, "epoch": 0.641409691629956, "percentage": 32.06, "elapsed_time": "2:40:55", "remaining_time": "5:40:56"} +{"current_steps": 1821, "total_steps": 5676, "loss": 0.694229006767273, "lr": 1.6255699922589968e-05, "epoch": 0.6417621145374449, "percentage": 32.08, "elapsed_time": "2:41:00", "remaining_time": "5:40:51"} +{"current_steps": 1822, "total_steps": 5676, "loss": 0.7284739017486572, "lr": 1.6251153293139735e-05, "epoch": 0.642114537444934, "percentage": 32.1, "elapsed_time": "2:41:05", "remaining_time": "5:40:44"} +{"current_steps": 1823, "total_steps": 5676, "loss": 0.6028950214385986, "lr": 1.6246604541617507e-05, "epoch": 0.6424669603524229, "percentage": 32.12, "elapsed_time": "2:41:10", "remaining_time": "5:40:39"} +{"current_steps": 1824, "total_steps": 5676, "loss": 0.6776808500289917, "lr": 1.6242053669567432e-05, "epoch": 0.642819383259912, "percentage": 32.14, "elapsed_time": "2:41:15", "remaining_time": "5:40:34"} +{"current_steps": 1825, "total_steps": 5676, "loss": 0.7743366956710815, "lr": 1.6237500678534396e-05, "epoch": 0.6431718061674009, "percentage": 32.15, "elapsed_time": "2:41:21", "remaining_time": "5:40:28"} +{"current_steps": 1826, "total_steps": 5676, "loss": 0.6356723308563232, "lr": 1.6232945570064e-05, "epoch": 0.6435242290748898, "percentage": 32.17, "elapsed_time": "2:41:26", "remaining_time": "5:40:22"} +{"current_steps": 1827, "total_steps": 5676, "loss": 0.7356402277946472, "lr": 1.622838834570256e-05, "epoch": 0.6438766519823789, "percentage": 32.19, "elapsed_time": "2:41:32", "remaining_time": "5:40:19"} +{"current_steps": 1828, "total_steps": 5676, "loss": 0.7639342546463013, "lr": 1.622382900699711e-05, "epoch": 0.6442290748898678, "percentage": 32.21, "elapsed_time": "2:41:39", "remaining_time": "5:40:18"} +{"current_steps": 1829, "total_steps": 5676, "loss": 0.6969513297080994, "lr": 1.6219267555495407e-05, "epoch": 0.6445814977973569, "percentage": 32.22, "elapsed_time": "2:41:43", "remaining_time": "5:40:10"} +{"current_steps": 1830, "total_steps": 5676, "loss": 0.7532765865325928, "lr": 1.621470399274592e-05, "epoch": 0.6449339207048458, "percentage": 32.24, "elapsed_time": "2:41:50", "remaining_time": "5:40:07"} +{"current_steps": 1831, "total_steps": 5676, "loss": 0.5801100730895996, "lr": 1.6210138320297835e-05, "epoch": 0.6452863436123348, "percentage": 32.26, "elapsed_time": "2:41:56", "remaining_time": "5:40:03"} +{"current_steps": 1832, "total_steps": 5676, "loss": 0.8006102442741394, "lr": 1.6205570539701056e-05, "epoch": 0.6456387665198238, "percentage": 32.28, "elapsed_time": "2:42:01", "remaining_time": "5:39:57"} +{"current_steps": 1833, "total_steps": 5676, "loss": 0.6507089138031006, "lr": 1.6201000652506203e-05, "epoch": 0.6459911894273128, "percentage": 32.29, "elapsed_time": "2:42:06", "remaining_time": "5:39:53"} +{"current_steps": 1834, "total_steps": 5676, "loss": 0.7407999634742737, "lr": 1.619642866026461e-05, "epoch": 0.6463436123348018, "percentage": 32.31, "elapsed_time": "2:42:12", "remaining_time": "5:39:47"} +{"current_steps": 1835, "total_steps": 5676, "loss": 0.6964670419692993, "lr": 1.619185456452833e-05, "epoch": 0.6466960352422908, "percentage": 32.33, "elapsed_time": "2:42:16", "remaining_time": "5:39:39"} +{"current_steps": 1836, "total_steps": 5676, "loss": 0.7095489501953125, "lr": 1.6187278366850122e-05, "epoch": 0.6470484581497797, "percentage": 32.35, "elapsed_time": "2:42:20", "remaining_time": "5:39:32"} +{"current_steps": 1837, "total_steps": 5676, "loss": 0.6968166828155518, "lr": 1.6182700068783463e-05, "epoch": 0.6474008810572687, "percentage": 32.36, "elapsed_time": "2:42:27", "remaining_time": "5:39:30"} +{"current_steps": 1838, "total_steps": 5676, "loss": 0.7745821475982666, "lr": 1.617811967188254e-05, "epoch": 0.6477533039647577, "percentage": 32.38, "elapsed_time": "2:42:32", "remaining_time": "5:39:24"} +{"current_steps": 1839, "total_steps": 5676, "loss": 0.7071934938430786, "lr": 1.6173537177702266e-05, "epoch": 0.6481057268722467, "percentage": 32.4, "elapsed_time": "2:42:36", "remaining_time": "5:39:17"} +{"current_steps": 1840, "total_steps": 5676, "loss": 0.6481701135635376, "lr": 1.6168952587798242e-05, "epoch": 0.6484581497797357, "percentage": 32.42, "elapsed_time": "2:42:41", "remaining_time": "5:39:11"} +{"current_steps": 1841, "total_steps": 5676, "loss": 0.6349890232086182, "lr": 1.6164365903726805e-05, "epoch": 0.6488105726872246, "percentage": 32.43, "elapsed_time": "2:42:47", "remaining_time": "5:39:07"} +{"current_steps": 1842, "total_steps": 5676, "loss": 0.6067368388175964, "lr": 1.6159777127044982e-05, "epoch": 0.6491629955947137, "percentage": 32.45, "elapsed_time": "2:42:52", "remaining_time": "5:39:01"} +{"current_steps": 1843, "total_steps": 5676, "loss": 0.7170778512954712, "lr": 1.6155186259310523e-05, "epoch": 0.6495154185022026, "percentage": 32.47, "elapsed_time": "2:42:58", "remaining_time": "5:38:56"} +{"current_steps": 1844, "total_steps": 5676, "loss": 0.5623376965522766, "lr": 1.6150593302081888e-05, "epoch": 0.6498678414096917, "percentage": 32.49, "elapsed_time": "2:43:02", "remaining_time": "5:38:48"} +{"current_steps": 1845, "total_steps": 5676, "loss": 0.7295233607292175, "lr": 1.6145998256918238e-05, "epoch": 0.6502202643171806, "percentage": 32.51, "elapsed_time": "2:43:07", "remaining_time": "5:38:43"} +{"current_steps": 1846, "total_steps": 5676, "loss": 0.6991151571273804, "lr": 1.6141401125379454e-05, "epoch": 0.6505726872246697, "percentage": 32.52, "elapsed_time": "2:43:12", "remaining_time": "5:38:37"} +{"current_steps": 1847, "total_steps": 5676, "loss": 0.7553545236587524, "lr": 1.6136801909026113e-05, "epoch": 0.6509251101321586, "percentage": 32.54, "elapsed_time": "2:43:18", "remaining_time": "5:38:33"} +{"current_steps": 1848, "total_steps": 5676, "loss": 0.8280071020126343, "lr": 1.613220060941951e-05, "epoch": 0.6512775330396475, "percentage": 32.56, "elapsed_time": "2:43:23", "remaining_time": "5:38:27"} +{"current_steps": 1849, "total_steps": 5676, "loss": 0.662299633026123, "lr": 1.6127597228121636e-05, "epoch": 0.6516299559471366, "percentage": 32.58, "elapsed_time": "2:43:27", "remaining_time": "5:38:19"} +{"current_steps": 1850, "total_steps": 5676, "loss": 0.6493197679519653, "lr": 1.6122991766695206e-05, "epoch": 0.6519823788546255, "percentage": 32.59, "elapsed_time": "2:43:32", "remaining_time": "5:38:12"} +{"current_steps": 1851, "total_steps": 5676, "loss": 0.5910629034042358, "lr": 1.6118384226703623e-05, "epoch": 0.6523348017621146, "percentage": 32.61, "elapsed_time": "2:43:36", "remaining_time": "5:38:05"} +{"current_steps": 1852, "total_steps": 5676, "loss": 0.7124426364898682, "lr": 1.611377460971101e-05, "epoch": 0.6526872246696035, "percentage": 32.63, "elapsed_time": "2:43:42", "remaining_time": "5:38:00"} +{"current_steps": 1853, "total_steps": 5676, "loss": 0.6081063747406006, "lr": 1.610916291728218e-05, "epoch": 0.6530396475770925, "percentage": 32.65, "elapsed_time": "2:43:46", "remaining_time": "5:37:53"} +{"current_steps": 1854, "total_steps": 5676, "loss": 0.7536673545837402, "lr": 1.6104549150982666e-05, "epoch": 0.6533920704845815, "percentage": 32.66, "elapsed_time": "2:43:51", "remaining_time": "5:37:46"} +{"current_steps": 1855, "total_steps": 5676, "loss": 0.6514976024627686, "lr": 1.6099933312378695e-05, "epoch": 0.6537444933920705, "percentage": 32.68, "elapsed_time": "2:43:55", "remaining_time": "5:37:39"} +{"current_steps": 1856, "total_steps": 5676, "loss": 0.6595193147659302, "lr": 1.6095315403037205e-05, "epoch": 0.6540969162995595, "percentage": 32.7, "elapsed_time": "2:43:59", "remaining_time": "5:37:32"} +{"current_steps": 1857, "total_steps": 5676, "loss": 0.666920006275177, "lr": 1.6090695424525826e-05, "epoch": 0.6544493392070485, "percentage": 32.72, "elapsed_time": "2:44:04", "remaining_time": "5:37:26"} +{"current_steps": 1858, "total_steps": 5676, "loss": 0.5984979271888733, "lr": 1.6086073378412902e-05, "epoch": 0.6548017621145374, "percentage": 32.73, "elapsed_time": "2:44:09", "remaining_time": "5:37:19"} +{"current_steps": 1859, "total_steps": 5676, "loss": 0.8021191358566284, "lr": 1.608144926626747e-05, "epoch": 0.6551541850220264, "percentage": 32.75, "elapsed_time": "2:44:13", "remaining_time": "5:37:12"} +{"current_steps": 1860, "total_steps": 5676, "loss": 0.7368075847625732, "lr": 1.6076823089659272e-05, "epoch": 0.6555066079295154, "percentage": 32.77, "elapsed_time": "2:44:19", "remaining_time": "5:37:07"} +{"current_steps": 1861, "total_steps": 5676, "loss": 0.7923766374588013, "lr": 1.6072194850158755e-05, "epoch": 0.6558590308370044, "percentage": 32.79, "elapsed_time": "2:44:24", "remaining_time": "5:37:01"} +{"current_steps": 1862, "total_steps": 5676, "loss": 0.6907824873924255, "lr": 1.606756454933706e-05, "epoch": 0.6562114537444934, "percentage": 32.8, "elapsed_time": "2:44:30", "remaining_time": "5:36:57"} +{"current_steps": 1863, "total_steps": 5676, "loss": 0.7366634607315063, "lr": 1.606293218876603e-05, "epoch": 0.6565638766519823, "percentage": 32.82, "elapsed_time": "2:44:35", "remaining_time": "5:36:52"} +{"current_steps": 1864, "total_steps": 5676, "loss": 0.7166022658348083, "lr": 1.6058297770018208e-05, "epoch": 0.6569162995594714, "percentage": 32.84, "elapsed_time": "2:44:40", "remaining_time": "5:36:45"} +{"current_steps": 1865, "total_steps": 5676, "loss": 0.6969404220581055, "lr": 1.6053661294666833e-05, "epoch": 0.6572687224669603, "percentage": 32.86, "elapsed_time": "2:44:45", "remaining_time": "5:36:39"} +{"current_steps": 1866, "total_steps": 5676, "loss": 0.7182974815368652, "lr": 1.6049022764285846e-05, "epoch": 0.6576211453744494, "percentage": 32.88, "elapsed_time": "2:44:49", "remaining_time": "5:36:32"} +{"current_steps": 1867, "total_steps": 5676, "loss": 0.7469301819801331, "lr": 1.6044382180449886e-05, "epoch": 0.6579735682819383, "percentage": 32.89, "elapsed_time": "2:44:54", "remaining_time": "5:36:27"} +{"current_steps": 1868, "total_steps": 5676, "loss": 0.7097122073173523, "lr": 1.603973954473428e-05, "epoch": 0.6583259911894274, "percentage": 32.91, "elapsed_time": "2:45:01", "remaining_time": "5:36:24"} +{"current_steps": 1869, "total_steps": 5676, "loss": 0.6907291412353516, "lr": 1.6035094858715065e-05, "epoch": 0.6586784140969163, "percentage": 32.93, "elapsed_time": "2:45:06", "remaining_time": "5:36:19"} +{"current_steps": 1870, "total_steps": 5676, "loss": 0.6259130239486694, "lr": 1.6030448123968963e-05, "epoch": 0.6590308370044052, "percentage": 32.95, "elapsed_time": "2:45:11", "remaining_time": "5:36:12"} +{"current_steps": 1871, "total_steps": 5676, "loss": 0.6948051452636719, "lr": 1.6025799342073397e-05, "epoch": 0.6593832599118943, "percentage": 32.96, "elapsed_time": "2:45:16", "remaining_time": "5:36:06"} +{"current_steps": 1872, "total_steps": 5676, "loss": 0.7037572264671326, "lr": 1.602114851460648e-05, "epoch": 0.6597356828193832, "percentage": 32.98, "elapsed_time": "2:45:22", "remaining_time": "5:36:03"} +{"current_steps": 1873, "total_steps": 5676, "loss": 0.7728864550590515, "lr": 1.6016495643147036e-05, "epoch": 0.6600881057268723, "percentage": 33.0, "elapsed_time": "2:45:29", "remaining_time": "5:36:00"} +{"current_steps": 1874, "total_steps": 5676, "loss": 0.7782067060470581, "lr": 1.601184072927456e-05, "epoch": 0.6604405286343612, "percentage": 33.02, "elapsed_time": "2:45:34", "remaining_time": "5:35:55"} +{"current_steps": 1875, "total_steps": 5676, "loss": 0.6168591976165771, "lr": 1.6007183774569246e-05, "epoch": 0.6607929515418502, "percentage": 33.03, "elapsed_time": "2:45:40", "remaining_time": "5:35:50"} +{"current_steps": 1876, "total_steps": 5676, "loss": 0.702346920967102, "lr": 1.6002524780611995e-05, "epoch": 0.6611453744493392, "percentage": 33.05, "elapsed_time": "2:45:47", "remaining_time": "5:35:49"} +{"current_steps": 1877, "total_steps": 5676, "loss": 0.6084239482879639, "lr": 1.5997863748984384e-05, "epoch": 0.6614977973568282, "percentage": 33.07, "elapsed_time": "2:45:53", "remaining_time": "5:35:45"} +{"current_steps": 1878, "total_steps": 5676, "loss": 0.8307315707206726, "lr": 1.5993200681268696e-05, "epoch": 0.6618502202643172, "percentage": 33.09, "elapsed_time": "2:45:59", "remaining_time": "5:35:40"} +{"current_steps": 1879, "total_steps": 5676, "loss": 0.6465811729431152, "lr": 1.5988535579047888e-05, "epoch": 0.6622026431718062, "percentage": 33.1, "elapsed_time": "2:46:04", "remaining_time": "5:35:35"} +{"current_steps": 1880, "total_steps": 5676, "loss": 0.71415114402771, "lr": 1.598386844390562e-05, "epoch": 0.6625550660792952, "percentage": 33.12, "elapsed_time": "2:46:09", "remaining_time": "5:35:29"} +{"current_steps": 1881, "total_steps": 5676, "loss": 0.7135012149810791, "lr": 1.5979199277426243e-05, "epoch": 0.6629074889867841, "percentage": 33.14, "elapsed_time": "2:46:14", "remaining_time": "5:35:24"} +{"current_steps": 1882, "total_steps": 5676, "loss": 0.840306282043457, "lr": 1.597452808119479e-05, "epoch": 0.6632599118942731, "percentage": 33.16, "elapsed_time": "2:46:18", "remaining_time": "5:35:16"} +{"current_steps": 1883, "total_steps": 5676, "loss": 0.622429609298706, "lr": 1.596985485679699e-05, "epoch": 0.6636123348017621, "percentage": 33.17, "elapsed_time": "2:46:23", "remaining_time": "5:35:09"} +{"current_steps": 1884, "total_steps": 5676, "loss": 0.6505612134933472, "lr": 1.5965179605819248e-05, "epoch": 0.6639647577092511, "percentage": 33.19, "elapsed_time": "2:46:27", "remaining_time": "5:35:03"} +{"current_steps": 1885, "total_steps": 5676, "loss": 0.7665247917175293, "lr": 1.5960502329848683e-05, "epoch": 0.66431718061674, "percentage": 33.21, "elapsed_time": "2:46:32", "remaining_time": "5:34:55"} +{"current_steps": 1886, "total_steps": 5676, "loss": 0.7780051231384277, "lr": 1.5955823030473068e-05, "epoch": 0.6646696035242291, "percentage": 33.23, "elapsed_time": "2:46:36", "remaining_time": "5:34:48"} +{"current_steps": 1887, "total_steps": 5676, "loss": 0.6311650276184082, "lr": 1.5951141709280886e-05, "epoch": 0.665022026431718, "percentage": 33.25, "elapsed_time": "2:46:41", "remaining_time": "5:34:42"} +{"current_steps": 1888, "total_steps": 5676, "loss": 0.7126712799072266, "lr": 1.5946458367861302e-05, "epoch": 0.6653744493392071, "percentage": 33.26, "elapsed_time": "2:46:47", "remaining_time": "5:34:38"} +{"current_steps": 1889, "total_steps": 5676, "loss": 0.6979397535324097, "lr": 1.5941773007804165e-05, "epoch": 0.665726872246696, "percentage": 33.28, "elapsed_time": "2:46:52", "remaining_time": "5:34:32"} +{"current_steps": 1890, "total_steps": 5676, "loss": 0.7065495252609253, "lr": 1.5937085630700003e-05, "epoch": 0.6660792951541851, "percentage": 33.3, "elapsed_time": "2:46:57", "remaining_time": "5:34:26"} +{"current_steps": 1891, "total_steps": 5676, "loss": 0.6157221794128418, "lr": 1.593239623814004e-05, "epoch": 0.666431718061674, "percentage": 33.32, "elapsed_time": "2:47:01", "remaining_time": "5:34:19"} +{"current_steps": 1892, "total_steps": 5676, "loss": 0.6835625171661377, "lr": 1.5927704831716177e-05, "epoch": 0.6667841409691629, "percentage": 33.33, "elapsed_time": "2:47:05", "remaining_time": "5:34:11"} +{"current_steps": 1893, "total_steps": 5676, "loss": 0.6416822671890259, "lr": 1.5923011413021e-05, "epoch": 0.667136563876652, "percentage": 33.35, "elapsed_time": "2:47:10", "remaining_time": "5:34:05"} +{"current_steps": 1894, "total_steps": 5676, "loss": 0.7307168245315552, "lr": 1.5918315983647782e-05, "epoch": 0.6674889867841409, "percentage": 33.37, "elapsed_time": "2:47:16", "remaining_time": "5:34:00"} +{"current_steps": 1895, "total_steps": 5676, "loss": 0.5464824438095093, "lr": 1.5913618545190468e-05, "epoch": 0.66784140969163, "percentage": 33.39, "elapsed_time": "2:47:22", "remaining_time": "5:33:56"} +{"current_steps": 1896, "total_steps": 5676, "loss": 0.6634502410888672, "lr": 1.5908919099243698e-05, "epoch": 0.6681938325991189, "percentage": 33.4, "elapsed_time": "2:47:26", "remaining_time": "5:33:49"} +{"current_steps": 1897, "total_steps": 5676, "loss": 0.719158411026001, "lr": 1.5904217647402788e-05, "epoch": 0.668546255506608, "percentage": 33.42, "elapsed_time": "2:47:34", "remaining_time": "5:33:48"} +{"current_steps": 1898, "total_steps": 5676, "loss": 0.7547527551651001, "lr": 1.5899514191263733e-05, "epoch": 0.6688986784140969, "percentage": 33.44, "elapsed_time": "2:47:39", "remaining_time": "5:33:44"} +{"current_steps": 1899, "total_steps": 5676, "loss": 0.7549886703491211, "lr": 1.5894808732423207e-05, "epoch": 0.6692511013215859, "percentage": 33.46, "elapsed_time": "2:47:45", "remaining_time": "5:33:40"} +{"current_steps": 1900, "total_steps": 5676, "loss": 0.7107831239700317, "lr": 1.589010127247857e-05, "epoch": 0.6696035242290749, "percentage": 33.47, "elapsed_time": "2:47:52", "remaining_time": "5:33:36"} +{"current_steps": 1901, "total_steps": 5676, "loss": 0.855078935623169, "lr": 1.588539181302786e-05, "epoch": 0.6699559471365639, "percentage": 33.49, "elapsed_time": "2:48:00", "remaining_time": "5:33:37"} +{"current_steps": 1902, "total_steps": 5676, "loss": 0.8235266208648682, "lr": 1.5880680355669792e-05, "epoch": 0.6703083700440529, "percentage": 33.51, "elapsed_time": "2:48:05", "remaining_time": "5:33:31"} +{"current_steps": 1903, "total_steps": 5676, "loss": 0.6060166358947754, "lr": 1.587596690200375e-05, "epoch": 0.6706607929515418, "percentage": 33.53, "elapsed_time": "2:48:10", "remaining_time": "5:33:26"} +{"current_steps": 1904, "total_steps": 5676, "loss": 0.7325272560119629, "lr": 1.5871251453629817e-05, "epoch": 0.6710132158590308, "percentage": 33.54, "elapsed_time": "2:48:15", "remaining_time": "5:33:20"} +{"current_steps": 1905, "total_steps": 5676, "loss": 0.674901008605957, "lr": 1.586653401214873e-05, "epoch": 0.6713656387665198, "percentage": 33.56, "elapsed_time": "2:48:20", "remaining_time": "5:33:14"} +{"current_steps": 1906, "total_steps": 5676, "loss": 0.767164945602417, "lr": 1.5861814579161928e-05, "epoch": 0.6717180616740088, "percentage": 33.58, "elapsed_time": "2:48:25", "remaining_time": "5:33:09"} +{"current_steps": 1907, "total_steps": 5676, "loss": 0.5691556930541992, "lr": 1.5857093156271496e-05, "epoch": 0.6720704845814978, "percentage": 33.6, "elapsed_time": "2:48:31", "remaining_time": "5:33:04"} +{"current_steps": 1908, "total_steps": 5676, "loss": 0.6885931491851807, "lr": 1.585236974508022e-05, "epoch": 0.6724229074889868, "percentage": 33.62, "elapsed_time": "2:48:36", "remaining_time": "5:32:58"} +{"current_steps": 1909, "total_steps": 5676, "loss": 0.6227391958236694, "lr": 1.5847644347191545e-05, "epoch": 0.6727753303964757, "percentage": 33.63, "elapsed_time": "2:48:43", "remaining_time": "5:32:55"} +{"current_steps": 1910, "total_steps": 5676, "loss": 0.6084527969360352, "lr": 1.5842916964209602e-05, "epoch": 0.6731277533039648, "percentage": 33.65, "elapsed_time": "2:48:49", "remaining_time": "5:32:52"} +{"current_steps": 1911, "total_steps": 5676, "loss": 0.7001935243606567, "lr": 1.583818759773919e-05, "epoch": 0.6734801762114537, "percentage": 33.67, "elapsed_time": "2:48:55", "remaining_time": "5:32:48"} +{"current_steps": 1912, "total_steps": 5676, "loss": 0.8263465166091919, "lr": 1.5833456249385774e-05, "epoch": 0.6738325991189428, "percentage": 33.69, "elapsed_time": "2:48:59", "remaining_time": "5:32:41"} +{"current_steps": 1913, "total_steps": 5676, "loss": 0.662792444229126, "lr": 1.582872292075551e-05, "epoch": 0.6741850220264317, "percentage": 33.7, "elapsed_time": "2:49:03", "remaining_time": "5:32:33"} +{"current_steps": 1914, "total_steps": 5676, "loss": 0.7093051075935364, "lr": 1.582398761345521e-05, "epoch": 0.6745374449339208, "percentage": 33.72, "elapsed_time": "2:49:08", "remaining_time": "5:32:26"} +{"current_steps": 1915, "total_steps": 5676, "loss": 0.7264106273651123, "lr": 1.5819250329092364e-05, "epoch": 0.6748898678414097, "percentage": 33.74, "elapsed_time": "2:49:14", "remaining_time": "5:32:23"} +{"current_steps": 1916, "total_steps": 5676, "loss": 0.6561543345451355, "lr": 1.581451106927513e-05, "epoch": 0.6752422907488986, "percentage": 33.76, "elapsed_time": "2:49:19", "remaining_time": "5:32:18"} +{"current_steps": 1917, "total_steps": 5676, "loss": 0.6563262939453125, "lr": 1.580976983561235e-05, "epoch": 0.6755947136563877, "percentage": 33.77, "elapsed_time": "2:49:25", "remaining_time": "5:32:12"} +{"current_steps": 1918, "total_steps": 5676, "loss": 0.5224509239196777, "lr": 1.5805026629713512e-05, "epoch": 0.6759471365638766, "percentage": 33.79, "elapsed_time": "2:49:30", "remaining_time": "5:32:08"} +{"current_steps": 1919, "total_steps": 5676, "loss": 0.6565898656845093, "lr": 1.5800281453188793e-05, "epoch": 0.6762995594713657, "percentage": 33.81, "elapsed_time": "2:49:36", "remaining_time": "5:32:04"} +{"current_steps": 1920, "total_steps": 5676, "loss": 0.7954028844833374, "lr": 1.5795534307649032e-05, "epoch": 0.6766519823788546, "percentage": 33.83, "elapsed_time": "2:49:42", "remaining_time": "5:31:59"} +{"current_steps": 1921, "total_steps": 5676, "loss": 0.6624404788017273, "lr": 1.579078519470574e-05, "epoch": 0.6770044052863436, "percentage": 33.84, "elapsed_time": "2:49:47", "remaining_time": "5:31:53"} +{"current_steps": 1922, "total_steps": 5676, "loss": 0.840311586856842, "lr": 1.5786034115971083e-05, "epoch": 0.6773568281938326, "percentage": 33.86, "elapsed_time": "2:49:51", "remaining_time": "5:31:46"} +{"current_steps": 1923, "total_steps": 5676, "loss": 0.6967859864234924, "lr": 1.578128107305792e-05, "epoch": 0.6777092511013216, "percentage": 33.88, "elapsed_time": "2:49:56", "remaining_time": "5:31:40"} +{"current_steps": 1924, "total_steps": 5676, "loss": 0.5295379161834717, "lr": 1.5776526067579746e-05, "epoch": 0.6780616740088106, "percentage": 33.9, "elapsed_time": "2:50:02", "remaining_time": "5:31:36"} +{"current_steps": 1925, "total_steps": 5676, "loss": 0.6758475303649902, "lr": 1.5771769101150752e-05, "epoch": 0.6784140969162996, "percentage": 33.91, "elapsed_time": "2:50:08", "remaining_time": "5:31:32"} +{"current_steps": 1926, "total_steps": 5676, "loss": 0.6891785860061646, "lr": 1.576701017538577e-05, "epoch": 0.6787665198237885, "percentage": 33.93, "elapsed_time": "2:50:15", "remaining_time": "5:31:29"} +{"current_steps": 1927, "total_steps": 5676, "loss": 0.6507086157798767, "lr": 1.5762249291900304e-05, "epoch": 0.6791189427312775, "percentage": 33.95, "elapsed_time": "2:50:20", "remaining_time": "5:31:24"} +{"current_steps": 1928, "total_steps": 5676, "loss": 0.6220029592514038, "lr": 1.5757486452310537e-05, "epoch": 0.6794713656387665, "percentage": 33.97, "elapsed_time": "2:50:26", "remaining_time": "5:31:19"} +{"current_steps": 1929, "total_steps": 5676, "loss": 0.7742874622344971, "lr": 1.5752721658233294e-05, "epoch": 0.6798237885462555, "percentage": 33.99, "elapsed_time": "2:50:31", "remaining_time": "5:31:14"} +{"current_steps": 1930, "total_steps": 5676, "loss": 0.6895851492881775, "lr": 1.5747954911286085e-05, "epoch": 0.6801762114537445, "percentage": 34.0, "elapsed_time": "2:50:36", "remaining_time": "5:31:09"} +{"current_steps": 1931, "total_steps": 5676, "loss": 0.71466064453125, "lr": 1.5743186213087062e-05, "epoch": 0.6805286343612335, "percentage": 34.02, "elapsed_time": "2:50:41", "remaining_time": "5:31:02"} +{"current_steps": 1932, "total_steps": 5676, "loss": 0.6465627551078796, "lr": 1.5738415565255056e-05, "epoch": 0.6808810572687225, "percentage": 34.04, "elapsed_time": "2:50:45", "remaining_time": "5:30:55"} +{"current_steps": 1933, "total_steps": 5676, "loss": 0.7592962980270386, "lr": 1.5733642969409553e-05, "epoch": 0.6812334801762114, "percentage": 34.06, "elapsed_time": "2:50:52", "remaining_time": "5:30:52"} +{"current_steps": 1934, "total_steps": 5676, "loss": 0.7641816735267639, "lr": 1.57288684271707e-05, "epoch": 0.6815859030837005, "percentage": 34.07, "elapsed_time": "2:50:58", "remaining_time": "5:30:48"} +{"current_steps": 1935, "total_steps": 5676, "loss": 0.7015130519866943, "lr": 1.5724091940159306e-05, "epoch": 0.6819383259911894, "percentage": 34.09, "elapsed_time": "2:51:03", "remaining_time": "5:30:43"} +{"current_steps": 1936, "total_steps": 5676, "loss": 0.7851461172103882, "lr": 1.5719313509996833e-05, "epoch": 0.6822907488986785, "percentage": 34.11, "elapsed_time": "2:51:10", "remaining_time": "5:30:40"} +{"current_steps": 1937, "total_steps": 5676, "loss": 0.7924813628196716, "lr": 1.571453313830542e-05, "epoch": 0.6826431718061674, "percentage": 34.13, "elapsed_time": "2:51:15", "remaining_time": "5:30:34"} +{"current_steps": 1938, "total_steps": 5676, "loss": 0.6082741022109985, "lr": 1.570975082670785e-05, "epoch": 0.6829955947136563, "percentage": 34.14, "elapsed_time": "2:51:21", "remaining_time": "5:30:30"} +{"current_steps": 1939, "total_steps": 5676, "loss": 0.7307756543159485, "lr": 1.5704966576827563e-05, "epoch": 0.6833480176211454, "percentage": 34.16, "elapsed_time": "2:51:26", "remaining_time": "5:30:24"} +{"current_steps": 1940, "total_steps": 5676, "loss": 0.6877273917198181, "lr": 1.570018039028867e-05, "epoch": 0.6837004405286343, "percentage": 34.18, "elapsed_time": "2:51:30", "remaining_time": "5:30:17"} +{"current_steps": 1941, "total_steps": 5676, "loss": 0.7702943086624146, "lr": 1.5695392268715934e-05, "epoch": 0.6840528634361234, "percentage": 34.2, "elapsed_time": "2:51:35", "remaining_time": "5:30:11"} +{"current_steps": 1942, "total_steps": 5676, "loss": 0.6576820611953735, "lr": 1.569060221373477e-05, "epoch": 0.6844052863436123, "percentage": 34.21, "elapsed_time": "2:51:41", "remaining_time": "5:30:07"} +{"current_steps": 1943, "total_steps": 5676, "loss": 0.6605322360992432, "lr": 1.568581022697125e-05, "epoch": 0.6847577092511014, "percentage": 34.23, "elapsed_time": "2:51:45", "remaining_time": "5:29:59"} +{"current_steps": 1944, "total_steps": 5676, "loss": 0.8065364360809326, "lr": 1.568101631005211e-05, "epoch": 0.6851101321585903, "percentage": 34.25, "elapsed_time": "2:51:50", "remaining_time": "5:29:54"} +{"current_steps": 1945, "total_steps": 5676, "loss": 0.8018748164176941, "lr": 1.5676220464604726e-05, "epoch": 0.6854625550660793, "percentage": 34.27, "elapsed_time": "2:51:56", "remaining_time": "5:29:49"} +{"current_steps": 1946, "total_steps": 5676, "loss": 0.6114683151245117, "lr": 1.567142269225715e-05, "epoch": 0.6858149779735683, "percentage": 34.28, "elapsed_time": "2:52:01", "remaining_time": "5:29:43"} +{"current_steps": 1947, "total_steps": 5676, "loss": 0.8470789194107056, "lr": 1.566662299463807e-05, "epoch": 0.6861674008810573, "percentage": 34.3, "elapsed_time": "2:52:07", "remaining_time": "5:29:39"} +{"current_steps": 1948, "total_steps": 5676, "loss": 0.7133561372756958, "lr": 1.5661821373376837e-05, "epoch": 0.6865198237885463, "percentage": 34.32, "elapsed_time": "2:52:11", "remaining_time": "5:29:31"} +{"current_steps": 1949, "total_steps": 5676, "loss": 0.9101625084877014, "lr": 1.5657017830103448e-05, "epoch": 0.6868722466960352, "percentage": 34.34, "elapsed_time": "2:52:16", "remaining_time": "5:29:26"} +{"current_steps": 1950, "total_steps": 5676, "loss": 0.7395101189613342, "lr": 1.565221236644856e-05, "epoch": 0.6872246696035242, "percentage": 34.36, "elapsed_time": "2:52:20", "remaining_time": "5:29:18"} +{"current_steps": 1951, "total_steps": 5676, "loss": 0.7421061992645264, "lr": 1.5647404984043474e-05, "epoch": 0.6875770925110132, "percentage": 34.37, "elapsed_time": "2:52:26", "remaining_time": "5:29:13"} +{"current_steps": 1952, "total_steps": 5676, "loss": 0.8744432330131531, "lr": 1.5642595684520154e-05, "epoch": 0.6879295154185022, "percentage": 34.39, "elapsed_time": "2:52:31", "remaining_time": "5:29:07"} +{"current_steps": 1953, "total_steps": 5676, "loss": 0.8043868541717529, "lr": 1.56377844695112e-05, "epoch": 0.6882819383259912, "percentage": 34.41, "elapsed_time": "2:52:35", "remaining_time": "5:29:00"} +{"current_steps": 1954, "total_steps": 5676, "loss": 0.6231396198272705, "lr": 1.5632971340649873e-05, "epoch": 0.6886343612334802, "percentage": 34.43, "elapsed_time": "2:52:39", "remaining_time": "5:28:53"} +{"current_steps": 1955, "total_steps": 5676, "loss": 0.7791434526443481, "lr": 1.562815629957008e-05, "epoch": 0.6889867841409691, "percentage": 34.44, "elapsed_time": "2:52:46", "remaining_time": "5:28:50"} +{"current_steps": 1956, "total_steps": 5676, "loss": 0.5652475357055664, "lr": 1.5623339347906383e-05, "epoch": 0.6893392070484582, "percentage": 34.46, "elapsed_time": "2:52:52", "remaining_time": "5:28:46"} +{"current_steps": 1957, "total_steps": 5676, "loss": 0.611067533493042, "lr": 1.561852048729398e-05, "epoch": 0.6896916299559471, "percentage": 34.48, "elapsed_time": "2:52:56", "remaining_time": "5:28:38"} +{"current_steps": 1958, "total_steps": 5676, "loss": 0.7580389976501465, "lr": 1.5613699719368724e-05, "epoch": 0.6900440528634362, "percentage": 34.5, "elapsed_time": "2:53:01", "remaining_time": "5:28:33"} +{"current_steps": 1959, "total_steps": 5676, "loss": 0.6841205954551697, "lr": 1.560887704576712e-05, "epoch": 0.6903964757709251, "percentage": 34.51, "elapsed_time": "2:53:06", "remaining_time": "5:28:26"} +{"current_steps": 1960, "total_steps": 5676, "loss": 0.7600575089454651, "lr": 1.5604052468126315e-05, "epoch": 0.690748898678414, "percentage": 34.53, "elapsed_time": "2:53:10", "remaining_time": "5:28:19"} +{"current_steps": 1961, "total_steps": 5676, "loss": 0.7547114491462708, "lr": 1.55992259880841e-05, "epoch": 0.6911013215859031, "percentage": 34.55, "elapsed_time": "2:53:15", "remaining_time": "5:28:13"} +{"current_steps": 1962, "total_steps": 5676, "loss": 0.6917474865913391, "lr": 1.5594397607278912e-05, "epoch": 0.691453744493392, "percentage": 34.57, "elapsed_time": "2:53:20", "remaining_time": "5:28:07"} +{"current_steps": 1963, "total_steps": 5676, "loss": 0.6820487976074219, "lr": 1.5589567327349845e-05, "epoch": 0.6918061674008811, "percentage": 34.58, "elapsed_time": "2:53:25", "remaining_time": "5:28:02"} +{"current_steps": 1964, "total_steps": 5676, "loss": 0.6513597965240479, "lr": 1.5584735149936628e-05, "epoch": 0.69215859030837, "percentage": 34.6, "elapsed_time": "2:53:31", "remaining_time": "5:27:58"} +{"current_steps": 1965, "total_steps": 5676, "loss": 0.668257474899292, "lr": 1.5579901076679625e-05, "epoch": 0.6925110132158591, "percentage": 34.62, "elapsed_time": "2:53:36", "remaining_time": "5:27:51"} +{"current_steps": 1966, "total_steps": 5676, "loss": 0.7600705623626709, "lr": 1.5575065109219864e-05, "epoch": 0.692863436123348, "percentage": 34.64, "elapsed_time": "2:53:41", "remaining_time": "5:27:47"} +{"current_steps": 1967, "total_steps": 5676, "loss": 0.8140011429786682, "lr": 1.5570227249198993e-05, "epoch": 0.693215859030837, "percentage": 34.65, "elapsed_time": "2:53:47", "remaining_time": "5:27:42"} +{"current_steps": 1968, "total_steps": 5676, "loss": 0.610436201095581, "lr": 1.556538749825933e-05, "epoch": 0.693568281938326, "percentage": 34.67, "elapsed_time": "2:53:54", "remaining_time": "5:27:39"} +{"current_steps": 1969, "total_steps": 5676, "loss": 0.7745693922042847, "lr": 1.556054585804381e-05, "epoch": 0.693920704845815, "percentage": 34.69, "elapsed_time": "2:53:59", "remaining_time": "5:27:33"} +{"current_steps": 1970, "total_steps": 5676, "loss": 0.5809592008590698, "lr": 1.5555702330196024e-05, "epoch": 0.694273127753304, "percentage": 34.71, "elapsed_time": "2:54:04", "remaining_time": "5:27:28"} +{"current_steps": 1971, "total_steps": 5676, "loss": 0.6354515552520752, "lr": 1.5550856916360195e-05, "epoch": 0.6946255506607929, "percentage": 34.73, "elapsed_time": "2:54:09", "remaining_time": "5:27:23"} +{"current_steps": 1972, "total_steps": 5676, "loss": 0.8076149225234985, "lr": 1.5546009618181194e-05, "epoch": 0.694977973568282, "percentage": 34.74, "elapsed_time": "2:54:14", "remaining_time": "5:27:16"} +{"current_steps": 1973, "total_steps": 5676, "loss": 0.7553249597549438, "lr": 1.5541160437304524e-05, "epoch": 0.6953303964757709, "percentage": 34.76, "elapsed_time": "2:54:18", "remaining_time": "5:27:09"} +{"current_steps": 1974, "total_steps": 5676, "loss": 0.6109169125556946, "lr": 1.5536309375376332e-05, "epoch": 0.6956828193832599, "percentage": 34.78, "elapsed_time": "2:54:24", "remaining_time": "5:27:05"} +{"current_steps": 1975, "total_steps": 5676, "loss": 0.8184436559677124, "lr": 1.5531456434043404e-05, "epoch": 0.6960352422907489, "percentage": 34.8, "elapsed_time": "2:54:29", "remaining_time": "5:26:59"} +{"current_steps": 1976, "total_steps": 5676, "loss": 0.6823909878730774, "lr": 1.5526601614953164e-05, "epoch": 0.6963876651982379, "percentage": 34.81, "elapsed_time": "2:54:34", "remaining_time": "5:26:53"} +{"current_steps": 1977, "total_steps": 5676, "loss": 0.6669045090675354, "lr": 1.5521744919753668e-05, "epoch": 0.6967400881057269, "percentage": 34.83, "elapsed_time": "2:54:40", "remaining_time": "5:26:48"} +{"current_steps": 1978, "total_steps": 5676, "loss": 0.8054187297821045, "lr": 1.5516886350093617e-05, "epoch": 0.6970925110132159, "percentage": 34.85, "elapsed_time": "2:54:44", "remaining_time": "5:26:41"} +{"current_steps": 1979, "total_steps": 5676, "loss": 0.7089184522628784, "lr": 1.551202590762234e-05, "epoch": 0.6974449339207048, "percentage": 34.87, "elapsed_time": "2:54:49", "remaining_time": "5:26:35"} +{"current_steps": 1980, "total_steps": 5676, "loss": 0.7908214330673218, "lr": 1.5507163593989804e-05, "epoch": 0.6977973568281939, "percentage": 34.88, "elapsed_time": "2:54:54", "remaining_time": "5:26:30"} +{"current_steps": 1981, "total_steps": 5676, "loss": 0.8859039545059204, "lr": 1.5502299410846626e-05, "epoch": 0.6981497797356828, "percentage": 34.9, "elapsed_time": "2:55:00", "remaining_time": "5:26:25"} +{"current_steps": 1982, "total_steps": 5676, "loss": 0.7156866788864136, "lr": 1.549743335984403e-05, "epoch": 0.6985022026431718, "percentage": 34.92, "elapsed_time": "2:55:05", "remaining_time": "5:26:19"} +{"current_steps": 1983, "total_steps": 5676, "loss": 0.6158934831619263, "lr": 1.5492565442633894e-05, "epoch": 0.6988546255506608, "percentage": 34.94, "elapsed_time": "2:55:11", "remaining_time": "5:26:16"} +{"current_steps": 1984, "total_steps": 5676, "loss": 0.6689192056655884, "lr": 1.548769566086873e-05, "epoch": 0.6992070484581497, "percentage": 34.95, "elapsed_time": "2:55:16", "remaining_time": "5:26:10"} +{"current_steps": 1985, "total_steps": 5676, "loss": 0.6695841550827026, "lr": 1.548282401620167e-05, "epoch": 0.6995594713656388, "percentage": 34.97, "elapsed_time": "2:55:20", "remaining_time": "5:26:03"} +{"current_steps": 1986, "total_steps": 5676, "loss": 0.7196098566055298, "lr": 1.5477950510286488e-05, "epoch": 0.6999118942731277, "percentage": 34.99, "elapsed_time": "2:55:25", "remaining_time": "5:25:56"} +{"current_steps": 1987, "total_steps": 5676, "loss": 0.7811123132705688, "lr": 1.5473075144777586e-05, "epoch": 0.7002643171806168, "percentage": 35.01, "elapsed_time": "2:55:31", "remaining_time": "5:25:52"} +{"current_steps": 1988, "total_steps": 5676, "loss": 0.6341326236724854, "lr": 1.5468197921330006e-05, "epoch": 0.7006167400881057, "percentage": 35.02, "elapsed_time": "2:55:35", "remaining_time": "5:25:44"} +{"current_steps": 1989, "total_steps": 5676, "loss": 0.6344352960586548, "lr": 1.5463318841599408e-05, "epoch": 0.7009691629955948, "percentage": 35.04, "elapsed_time": "2:55:40", "remaining_time": "5:25:39"} +{"current_steps": 1990, "total_steps": 5676, "loss": 0.6708072423934937, "lr": 1.5458437907242084e-05, "epoch": 0.7013215859030837, "percentage": 35.06, "elapsed_time": "2:55:44", "remaining_time": "5:25:31"} +{"current_steps": 1991, "total_steps": 5676, "loss": 0.7018578052520752, "lr": 1.5453555119914963e-05, "epoch": 0.7016740088105727, "percentage": 35.08, "elapsed_time": "2:55:49", "remaining_time": "5:25:25"} +{"current_steps": 1992, "total_steps": 5676, "loss": 0.6966190338134766, "lr": 1.5448670481275604e-05, "epoch": 0.7020264317180617, "percentage": 35.1, "elapsed_time": "2:55:56", "remaining_time": "5:25:22"} +{"current_steps": 1993, "total_steps": 5676, "loss": 0.6280171871185303, "lr": 1.5443783992982182e-05, "epoch": 0.7023788546255506, "percentage": 35.11, "elapsed_time": "2:56:02", "remaining_time": "5:25:18"} +{"current_steps": 1994, "total_steps": 5676, "loss": 0.6644559502601624, "lr": 1.5438895656693512e-05, "epoch": 0.7027312775330397, "percentage": 35.13, "elapsed_time": "2:56:07", "remaining_time": "5:25:13"} +{"current_steps": 1995, "total_steps": 5676, "loss": 0.776411771774292, "lr": 1.543400547406903e-05, "epoch": 0.7030837004405286, "percentage": 35.15, "elapsed_time": "2:56:14", "remaining_time": "5:25:10"} +{"current_steps": 1996, "total_steps": 5676, "loss": 0.6353679895401001, "lr": 1.5429113446768805e-05, "epoch": 0.7034361233480176, "percentage": 35.17, "elapsed_time": "2:56:19", "remaining_time": "5:25:05"} +{"current_steps": 1997, "total_steps": 5676, "loss": 0.686774492263794, "lr": 1.5424219576453526e-05, "epoch": 0.7037885462555066, "percentage": 35.18, "elapsed_time": "2:56:24", "remaining_time": "5:25:00"} +{"current_steps": 1998, "total_steps": 5676, "loss": 0.5296701192855835, "lr": 1.5419323864784508e-05, "epoch": 0.7041409691629956, "percentage": 35.2, "elapsed_time": "2:56:31", "remaining_time": "5:24:56"} +{"current_steps": 1999, "total_steps": 5676, "loss": 0.6246802806854248, "lr": 1.5414426313423692e-05, "epoch": 0.7044933920704846, "percentage": 35.22, "elapsed_time": "2:56:36", "remaining_time": "5:24:51"} +{"current_steps": 2000, "total_steps": 5676, "loss": 0.6633912920951843, "lr": 1.5409526924033646e-05, "epoch": 0.7048458149779736, "percentage": 35.24, "elapsed_time": "2:56:41", "remaining_time": "5:24:46"} +{"current_steps": 2001, "total_steps": 5676, "loss": 0.7324577569961548, "lr": 1.540462569827756e-05, "epoch": 0.7051982378854625, "percentage": 35.25, "elapsed_time": "2:56:50", "remaining_time": "5:24:47"} +{"current_steps": 2002, "total_steps": 5676, "loss": 0.7988085746765137, "lr": 1.539972263781925e-05, "epoch": 0.7055506607929516, "percentage": 35.27, "elapsed_time": "2:56:54", "remaining_time": "5:24:40"} +{"current_steps": 2003, "total_steps": 5676, "loss": 0.6761256456375122, "lr": 1.539481774432315e-05, "epoch": 0.7059030837004405, "percentage": 35.29, "elapsed_time": "2:57:00", "remaining_time": "5:24:35"} +{"current_steps": 2004, "total_steps": 5676, "loss": 0.6647740006446838, "lr": 1.538991101945431e-05, "epoch": 0.7062555066079295, "percentage": 35.31, "elapsed_time": "2:57:05", "remaining_time": "5:24:30"} +{"current_steps": 2005, "total_steps": 5676, "loss": 0.7111536860466003, "lr": 1.538500246487843e-05, "epoch": 0.7066079295154185, "percentage": 35.32, "elapsed_time": "2:57:11", "remaining_time": "5:24:26"} +{"current_steps": 2006, "total_steps": 5676, "loss": 0.7395933270454407, "lr": 1.5380092082261797e-05, "epoch": 0.7069603524229074, "percentage": 35.34, "elapsed_time": "2:57:17", "remaining_time": "5:24:21"} +{"current_steps": 2007, "total_steps": 5676, "loss": 0.6158996820449829, "lr": 1.5375179873271335e-05, "epoch": 0.7073127753303965, "percentage": 35.36, "elapsed_time": "2:57:23", "remaining_time": "5:24:17"} +{"current_steps": 2008, "total_steps": 5676, "loss": 0.7259848117828369, "lr": 1.537026583957459e-05, "epoch": 0.7076651982378854, "percentage": 35.38, "elapsed_time": "2:57:28", "remaining_time": "5:24:11"} +{"current_steps": 2009, "total_steps": 5676, "loss": 0.8370928764343262, "lr": 1.5365349982839723e-05, "epoch": 0.7080176211453745, "percentage": 35.39, "elapsed_time": "2:57:33", "remaining_time": "5:24:05"} +{"current_steps": 2010, "total_steps": 5676, "loss": 0.6041784882545471, "lr": 1.536043230473551e-05, "epoch": 0.7083700440528634, "percentage": 35.41, "elapsed_time": "2:57:38", "remaining_time": "5:24:00"} +{"current_steps": 2011, "total_steps": 5676, "loss": 0.688548743724823, "lr": 1.535551280693135e-05, "epoch": 0.7087224669603525, "percentage": 35.43, "elapsed_time": "2:57:44", "remaining_time": "5:23:55"} +{"current_steps": 2012, "total_steps": 5676, "loss": 0.573681652545929, "lr": 1.5350591491097265e-05, "epoch": 0.7090748898678414, "percentage": 35.45, "elapsed_time": "2:57:48", "remaining_time": "5:23:47"} +{"current_steps": 2013, "total_steps": 5676, "loss": 0.6919670104980469, "lr": 1.5345668358903886e-05, "epoch": 0.7094273127753304, "percentage": 35.47, "elapsed_time": "2:57:53", "remaining_time": "5:23:42"} +{"current_steps": 2014, "total_steps": 5676, "loss": 0.6693999767303467, "lr": 1.534074341202246e-05, "epoch": 0.7097797356828194, "percentage": 35.48, "elapsed_time": "2:57:58", "remaining_time": "5:23:37"} +{"current_steps": 2015, "total_steps": 5676, "loss": 0.7204093337059021, "lr": 1.533581665212486e-05, "epoch": 0.7101321585903083, "percentage": 35.5, "elapsed_time": "2:58:04", "remaining_time": "5:23:31"} +{"current_steps": 2016, "total_steps": 5676, "loss": 0.6196314096450806, "lr": 1.5330888080883555e-05, "epoch": 0.7104845814977974, "percentage": 35.52, "elapsed_time": "2:58:07", "remaining_time": "5:23:23"} +{"current_steps": 2017, "total_steps": 5676, "loss": 0.7292872071266174, "lr": 1.5325957699971657e-05, "epoch": 0.7108370044052863, "percentage": 35.54, "elapsed_time": "2:58:12", "remaining_time": "5:23:17"} +{"current_steps": 2018, "total_steps": 5676, "loss": 0.7514410018920898, "lr": 1.532102551106287e-05, "epoch": 0.7111894273127753, "percentage": 35.55, "elapsed_time": "2:58:18", "remaining_time": "5:23:12"} +{"current_steps": 2019, "total_steps": 5676, "loss": 0.7683345079421997, "lr": 1.531609151583152e-05, "epoch": 0.7115418502202643, "percentage": 35.57, "elapsed_time": "2:58:23", "remaining_time": "5:23:06"} +{"current_steps": 2020, "total_steps": 5676, "loss": 0.6994156837463379, "lr": 1.5311155715952536e-05, "epoch": 0.7118942731277533, "percentage": 35.59, "elapsed_time": "2:58:28", "remaining_time": "5:23:01"} +{"current_steps": 2021, "total_steps": 5676, "loss": 0.5530328750610352, "lr": 1.5306218113101482e-05, "epoch": 0.7122466960352423, "percentage": 35.61, "elapsed_time": "2:58:33", "remaining_time": "5:22:55"} +{"current_steps": 2022, "total_steps": 5676, "loss": 0.6126301884651184, "lr": 1.530127870895451e-05, "epoch": 0.7125991189427313, "percentage": 35.62, "elapsed_time": "2:58:39", "remaining_time": "5:22:51"} +{"current_steps": 2023, "total_steps": 5676, "loss": 0.7514982223510742, "lr": 1.5296337505188403e-05, "epoch": 0.7129515418502202, "percentage": 35.64, "elapsed_time": "2:58:43", "remaining_time": "5:22:43"} +{"current_steps": 2024, "total_steps": 5676, "loss": 0.7087191939353943, "lr": 1.529139450348054e-05, "epoch": 0.7133039647577093, "percentage": 35.66, "elapsed_time": "2:58:48", "remaining_time": "5:22:37"} +{"current_steps": 2025, "total_steps": 5676, "loss": 0.5713562965393066, "lr": 1.5286449705508914e-05, "epoch": 0.7136563876651982, "percentage": 35.68, "elapsed_time": "2:58:53", "remaining_time": "5:22:31"} +{"current_steps": 2026, "total_steps": 5676, "loss": 0.6796679496765137, "lr": 1.5281503112952136e-05, "epoch": 0.7140088105726872, "percentage": 35.69, "elapsed_time": "2:58:58", "remaining_time": "5:22:26"} +{"current_steps": 2027, "total_steps": 5676, "loss": 0.7902421355247498, "lr": 1.5276554727489415e-05, "epoch": 0.7143612334801762, "percentage": 35.71, "elapsed_time": "2:59:03", "remaining_time": "5:22:20"} +{"current_steps": 2028, "total_steps": 5676, "loss": 0.6645491123199463, "lr": 1.527160455080058e-05, "epoch": 0.7147136563876652, "percentage": 35.73, "elapsed_time": "2:59:08", "remaining_time": "5:22:14"} +{"current_steps": 2029, "total_steps": 5676, "loss": 0.6077255606651306, "lr": 1.5266652584566056e-05, "epoch": 0.7150660792951542, "percentage": 35.75, "elapsed_time": "2:59:14", "remaining_time": "5:22:11"} +{"current_steps": 2030, "total_steps": 5676, "loss": 0.6219078302383423, "lr": 1.5261698830466888e-05, "epoch": 0.7154185022026431, "percentage": 35.76, "elapsed_time": "2:59:20", "remaining_time": "5:22:06"} +{"current_steps": 2031, "total_steps": 5676, "loss": 0.5895035266876221, "lr": 1.5256743290184713e-05, "epoch": 0.7157709251101322, "percentage": 35.78, "elapsed_time": "2:59:27", "remaining_time": "5:22:03"} +{"current_steps": 2032, "total_steps": 5676, "loss": 0.6735520958900452, "lr": 1.5251785965401786e-05, "epoch": 0.7161233480176211, "percentage": 35.8, "elapsed_time": "2:59:31", "remaining_time": "5:21:56"} +{"current_steps": 2033, "total_steps": 5676, "loss": 0.6212488412857056, "lr": 1.524682685780097e-05, "epoch": 0.7164757709251102, "percentage": 35.82, "elapsed_time": "2:59:36", "remaining_time": "5:21:51"} +{"current_steps": 2034, "total_steps": 5676, "loss": 0.7181172966957092, "lr": 1.524186596906572e-05, "epoch": 0.7168281938325991, "percentage": 35.84, "elapsed_time": "2:59:42", "remaining_time": "5:21:46"} +{"current_steps": 2035, "total_steps": 5676, "loss": 0.7156587839126587, "lr": 1.5236903300880107e-05, "epoch": 0.7171806167400882, "percentage": 35.85, "elapsed_time": "2:59:48", "remaining_time": "5:21:42"} +{"current_steps": 2036, "total_steps": 5676, "loss": 0.6989034414291382, "lr": 1.52319388549288e-05, "epoch": 0.7175330396475771, "percentage": 35.87, "elapsed_time": "2:59:53", "remaining_time": "5:21:36"} +{"current_steps": 2037, "total_steps": 5676, "loss": 0.7224982976913452, "lr": 1.5226972632897079e-05, "epoch": 0.7178854625550661, "percentage": 35.89, "elapsed_time": "2:59:57", "remaining_time": "5:21:29"} +{"current_steps": 2038, "total_steps": 5676, "loss": 0.6871547698974609, "lr": 1.522200463647082e-05, "epoch": 0.7182378854625551, "percentage": 35.91, "elapsed_time": "3:00:03", "remaining_time": "5:21:25"} +{"current_steps": 2039, "total_steps": 5676, "loss": 0.725049614906311, "lr": 1.5217034867336498e-05, "epoch": 0.718590308370044, "percentage": 35.92, "elapsed_time": "3:00:07", "remaining_time": "5:21:18"} +{"current_steps": 2040, "total_steps": 5676, "loss": 0.7105863094329834, "lr": 1.5212063327181197e-05, "epoch": 0.718942731277533, "percentage": 35.94, "elapsed_time": "3:00:13", "remaining_time": "5:21:12"} +{"current_steps": 2041, "total_steps": 5676, "loss": 0.5823827981948853, "lr": 1.5207090017692605e-05, "epoch": 0.719295154185022, "percentage": 35.96, "elapsed_time": "3:00:17", "remaining_time": "5:21:06"} +{"current_steps": 2042, "total_steps": 5676, "loss": 0.7087944746017456, "lr": 1.5202114940559005e-05, "epoch": 0.719647577092511, "percentage": 35.98, "elapsed_time": "3:00:23", "remaining_time": "5:21:01"} +{"current_steps": 2043, "total_steps": 5676, "loss": 0.6678824424743652, "lr": 1.5197138097469275e-05, "epoch": 0.72, "percentage": 35.99, "elapsed_time": "3:00:29", "remaining_time": "5:20:57"} +{"current_steps": 2044, "total_steps": 5676, "loss": 0.7318846583366394, "lr": 1.5192159490112904e-05, "epoch": 0.720352422907489, "percentage": 36.01, "elapsed_time": "3:00:34", "remaining_time": "5:20:52"} +{"current_steps": 2045, "total_steps": 5676, "loss": 0.7245825529098511, "lr": 1.5187179120179969e-05, "epoch": 0.720704845814978, "percentage": 36.03, "elapsed_time": "3:00:39", "remaining_time": "5:20:46"} +{"current_steps": 2046, "total_steps": 5676, "loss": 0.7691583633422852, "lr": 1.5182196989361155e-05, "epoch": 0.721057268722467, "percentage": 36.05, "elapsed_time": "3:00:44", "remaining_time": "5:20:41"} +{"current_steps": 2047, "total_steps": 5676, "loss": 0.7961187362670898, "lr": 1.517721309934774e-05, "epoch": 0.7214096916299559, "percentage": 36.06, "elapsed_time": "3:00:50", "remaining_time": "5:20:35"} +{"current_steps": 2048, "total_steps": 5676, "loss": 0.7163759469985962, "lr": 1.51722274518316e-05, "epoch": 0.721762114537445, "percentage": 36.08, "elapsed_time": "3:00:55", "remaining_time": "5:20:30"} +{"current_steps": 2049, "total_steps": 5676, "loss": 0.6807754039764404, "lr": 1.51672400485052e-05, "epoch": 0.7221145374449339, "percentage": 36.1, "elapsed_time": "3:01:00", "remaining_time": "5:20:24"} +{"current_steps": 2050, "total_steps": 5676, "loss": 0.7026433348655701, "lr": 1.516225089106162e-05, "epoch": 0.7224669603524229, "percentage": 36.12, "elapsed_time": "3:01:07", "remaining_time": "5:20:21"} +{"current_steps": 2051, "total_steps": 5676, "loss": 0.8230476379394531, "lr": 1.5157259981194514e-05, "epoch": 0.7228193832599119, "percentage": 36.13, "elapsed_time": "3:01:12", "remaining_time": "5:20:15"} +{"current_steps": 2052, "total_steps": 5676, "loss": 0.6466805934906006, "lr": 1.5152267320598149e-05, "epoch": 0.7231718061674008, "percentage": 36.15, "elapsed_time": "3:01:17", "remaining_time": "5:20:10"} +{"current_steps": 2053, "total_steps": 5676, "loss": 0.7203368544578552, "lr": 1.5147272910967368e-05, "epoch": 0.7235242290748899, "percentage": 36.17, "elapsed_time": "3:01:22", "remaining_time": "5:20:04"} +{"current_steps": 2054, "total_steps": 5676, "loss": 0.6455702781677246, "lr": 1.5142276753997627e-05, "epoch": 0.7238766519823788, "percentage": 36.19, "elapsed_time": "3:01:27", "remaining_time": "5:19:59"} +{"current_steps": 2055, "total_steps": 5676, "loss": 0.609260082244873, "lr": 1.5137278851384958e-05, "epoch": 0.7242290748898679, "percentage": 36.21, "elapsed_time": "3:01:31", "remaining_time": "5:19:51"} +{"current_steps": 2056, "total_steps": 5676, "loss": 0.8320673704147339, "lr": 1.5132279204826e-05, "epoch": 0.7245814977973568, "percentage": 36.22, "elapsed_time": "3:01:37", "remaining_time": "5:19:47"} +{"current_steps": 2057, "total_steps": 5676, "loss": 0.8497718572616577, "lr": 1.512727781601797e-05, "epoch": 0.7249339207048459, "percentage": 36.24, "elapsed_time": "3:01:44", "remaining_time": "5:19:44"} +{"current_steps": 2058, "total_steps": 5676, "loss": 0.6398370265960693, "lr": 1.5122274686658695e-05, "epoch": 0.7252863436123348, "percentage": 36.26, "elapsed_time": "3:01:49", "remaining_time": "5:19:39"} +{"current_steps": 2059, "total_steps": 5676, "loss": 0.7562476396560669, "lr": 1.511726981844657e-05, "epoch": 0.7256387665198238, "percentage": 36.28, "elapsed_time": "3:01:55", "remaining_time": "5:19:34"} +{"current_steps": 2060, "total_steps": 5676, "loss": 0.7948570251464844, "lr": 1.51122632130806e-05, "epoch": 0.7259911894273128, "percentage": 36.29, "elapsed_time": "3:02:00", "remaining_time": "5:19:29"} +{"current_steps": 2061, "total_steps": 5676, "loss": 0.7062652111053467, "lr": 1.5107254872260366e-05, "epoch": 0.7263436123348017, "percentage": 36.31, "elapsed_time": "3:02:04", "remaining_time": "5:19:21"} +{"current_steps": 2062, "total_steps": 5676, "loss": 0.6290205717086792, "lr": 1.5102244797686049e-05, "epoch": 0.7266960352422908, "percentage": 36.33, "elapsed_time": "3:02:08", "remaining_time": "5:19:13"} +{"current_steps": 2063, "total_steps": 5676, "loss": 0.727097749710083, "lr": 1.5097232991058409e-05, "epoch": 0.7270484581497797, "percentage": 36.35, "elapsed_time": "3:02:14", "remaining_time": "5:19:10"} +{"current_steps": 2064, "total_steps": 5676, "loss": 0.783380389213562, "lr": 1.5092219454078803e-05, "epoch": 0.7274008810572687, "percentage": 36.36, "elapsed_time": "3:02:19", "remaining_time": "5:19:04"} +{"current_steps": 2065, "total_steps": 5676, "loss": 0.6190629601478577, "lr": 1.5087204188449165e-05, "epoch": 0.7277533039647577, "percentage": 36.38, "elapsed_time": "3:02:25", "remaining_time": "5:18:59"} +{"current_steps": 2066, "total_steps": 5676, "loss": 0.6749798059463501, "lr": 1.5082187195872026e-05, "epoch": 0.7281057268722467, "percentage": 36.4, "elapsed_time": "3:02:30", "remaining_time": "5:18:53"} +{"current_steps": 2067, "total_steps": 5676, "loss": 0.6581153273582458, "lr": 1.5077168478050494e-05, "epoch": 0.7284581497797357, "percentage": 36.42, "elapsed_time": "3:02:36", "remaining_time": "5:18:49"} +{"current_steps": 2068, "total_steps": 5676, "loss": 0.6886252760887146, "lr": 1.5072148036688279e-05, "epoch": 0.7288105726872247, "percentage": 36.43, "elapsed_time": "3:02:41", "remaining_time": "5:18:44"} +{"current_steps": 2069, "total_steps": 5676, "loss": 0.6893814206123352, "lr": 1.506712587348965e-05, "epoch": 0.7291629955947136, "percentage": 36.45, "elapsed_time": "3:02:46", "remaining_time": "5:18:38"} +{"current_steps": 2070, "total_steps": 5676, "loss": 0.8242654800415039, "lr": 1.5062101990159486e-05, "epoch": 0.7295154185022027, "percentage": 36.47, "elapsed_time": "3:02:51", "remaining_time": "5:18:32"} +{"current_steps": 2071, "total_steps": 5676, "loss": 0.6331228017807007, "lr": 1.5057076388403229e-05, "epoch": 0.7298678414096916, "percentage": 36.49, "elapsed_time": "3:02:57", "remaining_time": "5:18:29"} +{"current_steps": 2072, "total_steps": 5676, "loss": 0.6440649032592773, "lr": 1.5052049069926927e-05, "epoch": 0.7302202643171806, "percentage": 36.5, "elapsed_time": "3:03:02", "remaining_time": "5:18:22"} +{"current_steps": 2073, "total_steps": 5676, "loss": 0.7575498819351196, "lr": 1.5047020036437187e-05, "epoch": 0.7305726872246696, "percentage": 36.52, "elapsed_time": "3:03:06", "remaining_time": "5:18:15"} +{"current_steps": 2074, "total_steps": 5676, "loss": 0.7530438899993896, "lr": 1.5041989289641215e-05, "epoch": 0.7309251101321586, "percentage": 36.54, "elapsed_time": "3:03:11", "remaining_time": "5:18:08"} +{"current_steps": 2075, "total_steps": 5676, "loss": 0.6035616397857666, "lr": 1.5036956831246792e-05, "epoch": 0.7312775330396476, "percentage": 36.56, "elapsed_time": "3:03:16", "remaining_time": "5:18:02"} +{"current_steps": 2076, "total_steps": 5676, "loss": 0.8199492692947388, "lr": 1.5031922662962279e-05, "epoch": 0.7316299559471365, "percentage": 36.58, "elapsed_time": "3:03:22", "remaining_time": "5:17:59"} +{"current_steps": 2077, "total_steps": 5676, "loss": 0.7700716257095337, "lr": 1.5026886786496624e-05, "epoch": 0.7319823788546256, "percentage": 36.59, "elapsed_time": "3:03:27", "remaining_time": "5:17:53"} +{"current_steps": 2078, "total_steps": 5676, "loss": 0.6147816777229309, "lr": 1.5021849203559347e-05, "epoch": 0.7323348017621145, "percentage": 36.61, "elapsed_time": "3:03:33", "remaining_time": "5:17:49"} +{"current_steps": 2079, "total_steps": 5676, "loss": 0.6841654777526855, "lr": 1.5016809915860549e-05, "epoch": 0.7326872246696036, "percentage": 36.63, "elapsed_time": "3:03:37", "remaining_time": "5:17:42"} +{"current_steps": 2080, "total_steps": 5676, "loss": 0.7212510108947754, "lr": 1.5011768925110915e-05, "epoch": 0.7330396475770925, "percentage": 36.65, "elapsed_time": "3:03:41", "remaining_time": "5:17:35"} +{"current_steps": 2081, "total_steps": 5676, "loss": 0.6695969104766846, "lr": 1.5006726233021702e-05, "epoch": 0.7333920704845815, "percentage": 36.66, "elapsed_time": "3:03:48", "remaining_time": "5:17:32"} +{"current_steps": 2082, "total_steps": 5676, "loss": 0.5991939306259155, "lr": 1.500168184130475e-05, "epoch": 0.7337444933920705, "percentage": 36.68, "elapsed_time": "3:03:54", "remaining_time": "5:17:27"} +{"current_steps": 2083, "total_steps": 5676, "loss": 0.7127671241760254, "lr": 1.4996635751672467e-05, "epoch": 0.7340969162995594, "percentage": 36.7, "elapsed_time": "3:04:00", "remaining_time": "5:17:23"} +{"current_steps": 2084, "total_steps": 5676, "loss": 0.6874737739562988, "lr": 1.4991587965837853e-05, "epoch": 0.7344493392070485, "percentage": 36.72, "elapsed_time": "3:04:06", "remaining_time": "5:17:19"} +{"current_steps": 2085, "total_steps": 5676, "loss": 0.6695086359977722, "lr": 1.4986538485514466e-05, "epoch": 0.7348017621145374, "percentage": 36.73, "elapsed_time": "3:04:11", "remaining_time": "5:17:14"} +{"current_steps": 2086, "total_steps": 5676, "loss": 0.8366880416870117, "lr": 1.4981487312416452e-05, "epoch": 0.7351541850220265, "percentage": 36.75, "elapsed_time": "3:04:16", "remaining_time": "5:17:09"} +{"current_steps": 2087, "total_steps": 5676, "loss": 0.6448042988777161, "lr": 1.4976434448258519e-05, "epoch": 0.7355066079295154, "percentage": 36.77, "elapsed_time": "3:04:23", "remaining_time": "5:17:05"} +{"current_steps": 2088, "total_steps": 5676, "loss": 0.7015181183815002, "lr": 1.4971379894755969e-05, "epoch": 0.7358590308370044, "percentage": 36.79, "elapsed_time": "3:04:29", "remaining_time": "5:17:01"} +{"current_steps": 2089, "total_steps": 5676, "loss": 0.6842815279960632, "lr": 1.4966323653624657e-05, "epoch": 0.7362114537444934, "percentage": 36.8, "elapsed_time": "3:04:34", "remaining_time": "5:16:55"} +{"current_steps": 2090, "total_steps": 5676, "loss": 0.6866877675056458, "lr": 1.4961265726581025e-05, "epoch": 0.7365638766519824, "percentage": 36.82, "elapsed_time": "3:04:40", "remaining_time": "5:16:51"} +{"current_steps": 2091, "total_steps": 5676, "loss": 0.5486865043640137, "lr": 1.4956206115342076e-05, "epoch": 0.7369162995594714, "percentage": 36.84, "elapsed_time": "3:04:45", "remaining_time": "5:16:45"} +{"current_steps": 2092, "total_steps": 5676, "loss": 0.7241986989974976, "lr": 1.4951144821625396e-05, "epoch": 0.7372687224669604, "percentage": 36.86, "elapsed_time": "3:04:50", "remaining_time": "5:16:39"} +{"current_steps": 2093, "total_steps": 5676, "loss": 0.8400537967681885, "lr": 1.4946081847149134e-05, "epoch": 0.7376211453744493, "percentage": 36.87, "elapsed_time": "3:04:55", "remaining_time": "5:16:35"} +{"current_steps": 2094, "total_steps": 5676, "loss": 0.6050147414207458, "lr": 1.4941017193632013e-05, "epoch": 0.7379735682819383, "percentage": 36.89, "elapsed_time": "3:05:00", "remaining_time": "5:16:28"} +{"current_steps": 2095, "total_steps": 5676, "loss": 0.6744229197502136, "lr": 1.4935950862793322e-05, "epoch": 0.7383259911894273, "percentage": 36.91, "elapsed_time": "3:05:06", "remaining_time": "5:16:23"} +{"current_steps": 2096, "total_steps": 5676, "loss": 0.6902294158935547, "lr": 1.493088285635293e-05, "epoch": 0.7386784140969163, "percentage": 36.93, "elapsed_time": "3:05:10", "remaining_time": "5:16:17"} +{"current_steps": 2097, "total_steps": 5676, "loss": 0.6328809261322021, "lr": 1.492581317603126e-05, "epoch": 0.7390308370044053, "percentage": 36.95, "elapsed_time": "3:05:16", "remaining_time": "5:16:12"} +{"current_steps": 2098, "total_steps": 5676, "loss": 0.5740914344787598, "lr": 1.4920741823549316e-05, "epoch": 0.7393832599118942, "percentage": 36.96, "elapsed_time": "3:05:21", "remaining_time": "5:16:07"} +{"current_steps": 2099, "total_steps": 5676, "loss": 0.676064133644104, "lr": 1.491566880062866e-05, "epoch": 0.7397356828193833, "percentage": 36.98, "elapsed_time": "3:05:27", "remaining_time": "5:16:02"} +{"current_steps": 2100, "total_steps": 5676, "loss": 0.655153751373291, "lr": 1.4910594108991427e-05, "epoch": 0.7400881057268722, "percentage": 37.0, "elapsed_time": "3:05:32", "remaining_time": "5:15:57"} +{"current_steps": 2101, "total_steps": 5676, "loss": 0.7406177520751953, "lr": 1.4905517750360321e-05, "epoch": 0.7404405286343613, "percentage": 37.02, "elapsed_time": "3:05:42", "remaining_time": "5:15:58"} +{"current_steps": 2102, "total_steps": 5676, "loss": 0.6568606495857239, "lr": 1.4900439726458602e-05, "epoch": 0.7407929515418502, "percentage": 37.03, "elapsed_time": "3:05:47", "remaining_time": "5:15:53"} +{"current_steps": 2103, "total_steps": 5676, "loss": 0.8073545098304749, "lr": 1.4895360039010101e-05, "epoch": 0.7411453744493393, "percentage": 37.05, "elapsed_time": "3:05:52", "remaining_time": "5:15:47"} +{"current_steps": 2104, "total_steps": 5676, "loss": 0.6350502967834473, "lr": 1.4890278689739219e-05, "epoch": 0.7414977973568282, "percentage": 37.07, "elapsed_time": "3:05:56", "remaining_time": "5:15:40"} +{"current_steps": 2105, "total_steps": 5676, "loss": 0.6419750452041626, "lr": 1.4885195680370915e-05, "epoch": 0.7418502202643171, "percentage": 37.09, "elapsed_time": "3:06:02", "remaining_time": "5:15:37"} +{"current_steps": 2106, "total_steps": 5676, "loss": 0.72661292552948, "lr": 1.4880111012630706e-05, "epoch": 0.7422026431718062, "percentage": 37.1, "elapsed_time": "3:06:07", "remaining_time": "5:15:31"} +{"current_steps": 2107, "total_steps": 5676, "loss": 0.6996778845787048, "lr": 1.4875024688244683e-05, "epoch": 0.7425550660792951, "percentage": 37.12, "elapsed_time": "3:06:13", "remaining_time": "5:15:26"} +{"current_steps": 2108, "total_steps": 5676, "loss": 0.8383389711380005, "lr": 1.4869936708939497e-05, "epoch": 0.7429074889867842, "percentage": 37.14, "elapsed_time": "3:06:20", "remaining_time": "5:15:23"} +{"current_steps": 2109, "total_steps": 5676, "loss": 0.6863676905632019, "lr": 1.4864847076442358e-05, "epoch": 0.7432599118942731, "percentage": 37.16, "elapsed_time": "3:06:26", "remaining_time": "5:15:19"} +{"current_steps": 2110, "total_steps": 5676, "loss": 0.8493780493736267, "lr": 1.4859755792481032e-05, "epoch": 0.7436123348017621, "percentage": 37.17, "elapsed_time": "3:06:30", "remaining_time": "5:15:12"} +{"current_steps": 2111, "total_steps": 5676, "loss": 0.6172446012496948, "lr": 1.4854662858783857e-05, "epoch": 0.7439647577092511, "percentage": 37.19, "elapsed_time": "3:06:35", "remaining_time": "5:15:07"} +{"current_steps": 2112, "total_steps": 5676, "loss": 0.8390353918075562, "lr": 1.4849568277079724e-05, "epoch": 0.7443171806167401, "percentage": 37.21, "elapsed_time": "3:06:39", "remaining_time": "5:14:59"} +{"current_steps": 2113, "total_steps": 5676, "loss": 0.7108968496322632, "lr": 1.4844472049098087e-05, "epoch": 0.7446696035242291, "percentage": 37.23, "elapsed_time": "3:06:45", "remaining_time": "5:14:54"} +{"current_steps": 2114, "total_steps": 5676, "loss": 0.7322912812232971, "lr": 1.4839374176568956e-05, "epoch": 0.7450220264317181, "percentage": 37.24, "elapsed_time": "3:06:49", "remaining_time": "5:14:47"} +{"current_steps": 2115, "total_steps": 5676, "loss": 0.6371238231658936, "lr": 1.4834274661222896e-05, "epoch": 0.745374449339207, "percentage": 37.26, "elapsed_time": "3:06:55", "remaining_time": "5:14:42"} +{"current_steps": 2116, "total_steps": 5676, "loss": 0.8346511125564575, "lr": 1.4829173504791035e-05, "epoch": 0.745726872246696, "percentage": 37.28, "elapsed_time": "3:06:59", "remaining_time": "5:14:36"} +{"current_steps": 2117, "total_steps": 5676, "loss": 0.5893645286560059, "lr": 1.4824070709005063e-05, "epoch": 0.746079295154185, "percentage": 37.3, "elapsed_time": "3:07:04", "remaining_time": "5:14:30"} +{"current_steps": 2118, "total_steps": 5676, "loss": 0.60541832447052, "lr": 1.4818966275597213e-05, "epoch": 0.746431718061674, "percentage": 37.32, "elapsed_time": "3:07:11", "remaining_time": "5:14:26"} +{"current_steps": 2119, "total_steps": 5676, "loss": 0.5823955535888672, "lr": 1.4813860206300286e-05, "epoch": 0.746784140969163, "percentage": 37.33, "elapsed_time": "3:07:18", "remaining_time": "5:14:24"} +{"current_steps": 2120, "total_steps": 5676, "loss": 0.6751007437705994, "lr": 1.480875250284763e-05, "epoch": 0.747136563876652, "percentage": 37.35, "elapsed_time": "3:07:23", "remaining_time": "5:14:19"} +{"current_steps": 2121, "total_steps": 5676, "loss": 0.6878843307495117, "lr": 1.4803643166973155e-05, "epoch": 0.747488986784141, "percentage": 37.37, "elapsed_time": "3:07:28", "remaining_time": "5:14:13"} +{"current_steps": 2122, "total_steps": 5676, "loss": 0.6732173562049866, "lr": 1.4798532200411319e-05, "epoch": 0.7478414096916299, "percentage": 37.39, "elapsed_time": "3:07:33", "remaining_time": "5:14:08"} +{"current_steps": 2123, "total_steps": 5676, "loss": 0.6383658647537231, "lr": 1.479341960489714e-05, "epoch": 0.748193832599119, "percentage": 37.4, "elapsed_time": "3:07:38", "remaining_time": "5:14:02"} +{"current_steps": 2124, "total_steps": 5676, "loss": 0.7444638013839722, "lr": 1.4788305382166174e-05, "epoch": 0.7485462555066079, "percentage": 37.42, "elapsed_time": "3:07:43", "remaining_time": "5:13:56"} +{"current_steps": 2125, "total_steps": 5676, "loss": 0.5492427349090576, "lr": 1.4783189533954555e-05, "epoch": 0.748898678414097, "percentage": 37.44, "elapsed_time": "3:07:49", "remaining_time": "5:13:52"} +{"current_steps": 2126, "total_steps": 5676, "loss": 0.6193333864212036, "lr": 1.4778072061998944e-05, "epoch": 0.7492511013215859, "percentage": 37.46, "elapsed_time": "3:07:54", "remaining_time": "5:13:45"} +{"current_steps": 2127, "total_steps": 5676, "loss": 0.853213906288147, "lr": 1.4772952968036572e-05, "epoch": 0.7496035242290748, "percentage": 37.47, "elapsed_time": "3:07:58", "remaining_time": "5:13:39"} +{"current_steps": 2128, "total_steps": 5676, "loss": 0.6128672361373901, "lr": 1.4767832253805203e-05, "epoch": 0.7499559471365639, "percentage": 37.49, "elapsed_time": "3:08:04", "remaining_time": "5:13:34"} +{"current_steps": 2129, "total_steps": 5676, "loss": 0.7298723459243774, "lr": 1.4762709921043166e-05, "epoch": 0.7503083700440528, "percentage": 37.51, "elapsed_time": "3:08:10", "remaining_time": "5:13:30"} +{"current_steps": 2130, "total_steps": 5676, "loss": 0.6578782796859741, "lr": 1.475758597148933e-05, "epoch": 0.7506607929515419, "percentage": 37.53, "elapsed_time": "3:08:16", "remaining_time": "5:13:26"} +{"current_steps": 2131, "total_steps": 5676, "loss": 0.6490681171417236, "lr": 1.4752460406883122e-05, "epoch": 0.7510132158590308, "percentage": 37.54, "elapsed_time": "3:08:20", "remaining_time": "5:13:18"} +{"current_steps": 2132, "total_steps": 5676, "loss": 0.657980740070343, "lr": 1.4747333228964502e-05, "epoch": 0.7513656387665198, "percentage": 37.56, "elapsed_time": "3:08:26", "remaining_time": "5:13:14"} +{"current_steps": 2133, "total_steps": 5676, "loss": 0.8431578874588013, "lr": 1.4742204439473999e-05, "epoch": 0.7517180616740088, "percentage": 37.58, "elapsed_time": "3:08:32", "remaining_time": "5:13:10"} +{"current_steps": 2134, "total_steps": 5676, "loss": 0.7217377424240112, "lr": 1.4737074040152667e-05, "epoch": 0.7520704845814978, "percentage": 37.6, "elapsed_time": "3:08:37", "remaining_time": "5:13:05"} +{"current_steps": 2135, "total_steps": 5676, "loss": 0.6299912333488464, "lr": 1.4731942032742127e-05, "epoch": 0.7524229074889868, "percentage": 37.61, "elapsed_time": "3:08:41", "remaining_time": "5:12:56"} +{"current_steps": 2136, "total_steps": 5676, "loss": 0.6325603723526001, "lr": 1.4726808418984527e-05, "epoch": 0.7527753303964758, "percentage": 37.63, "elapsed_time": "3:08:45", "remaining_time": "5:12:49"} +{"current_steps": 2137, "total_steps": 5676, "loss": 0.6785098314285278, "lr": 1.4721673200622572e-05, "epoch": 0.7531277533039648, "percentage": 37.65, "elapsed_time": "3:08:50", "remaining_time": "5:12:43"} +{"current_steps": 2138, "total_steps": 5676, "loss": 0.7311918139457703, "lr": 1.471653637939951e-05, "epoch": 0.7534801762114537, "percentage": 37.67, "elapsed_time": "3:08:56", "remaining_time": "5:12:39"} +{"current_steps": 2139, "total_steps": 5676, "loss": 0.7117096781730652, "lr": 1.4711397957059132e-05, "epoch": 0.7538325991189427, "percentage": 37.68, "elapsed_time": "3:09:02", "remaining_time": "5:12:35"} +{"current_steps": 2140, "total_steps": 5676, "loss": 0.6709408760070801, "lr": 1.4706257935345772e-05, "epoch": 0.7541850220264317, "percentage": 37.7, "elapsed_time": "3:09:08", "remaining_time": "5:12:30"} +{"current_steps": 2141, "total_steps": 5676, "loss": 0.6478008031845093, "lr": 1.4701116316004307e-05, "epoch": 0.7545374449339207, "percentage": 37.72, "elapsed_time": "3:09:12", "remaining_time": "5:12:23"} +{"current_steps": 2142, "total_steps": 5676, "loss": 0.6414140462875366, "lr": 1.4695973100780154e-05, "epoch": 0.7548898678414097, "percentage": 37.74, "elapsed_time": "3:09:17", "remaining_time": "5:12:19"} +{"current_steps": 2143, "total_steps": 5676, "loss": 0.6947815418243408, "lr": 1.4690828291419283e-05, "epoch": 0.7552422907488987, "percentage": 37.76, "elapsed_time": "3:09:22", "remaining_time": "5:12:12"} +{"current_steps": 2144, "total_steps": 5676, "loss": 0.6614837646484375, "lr": 1.4685681889668187e-05, "epoch": 0.7555947136563876, "percentage": 37.77, "elapsed_time": "3:09:26", "remaining_time": "5:12:05"} +{"current_steps": 2145, "total_steps": 5676, "loss": 0.7803678512573242, "lr": 1.4680533897273913e-05, "epoch": 0.7559471365638767, "percentage": 37.79, "elapsed_time": "3:09:31", "remaining_time": "5:11:58"} +{"current_steps": 2146, "total_steps": 5676, "loss": 0.8411567211151123, "lr": 1.4675384315984045e-05, "epoch": 0.7562995594713656, "percentage": 37.81, "elapsed_time": "3:09:35", "remaining_time": "5:11:52"} +{"current_steps": 2147, "total_steps": 5676, "loss": 0.8379243016242981, "lr": 1.4670233147546708e-05, "epoch": 0.7566519823788547, "percentage": 37.83, "elapsed_time": "3:09:39", "remaining_time": "5:11:44"} +{"current_steps": 2148, "total_steps": 5676, "loss": 0.6419194936752319, "lr": 1.4665080393710558e-05, "epoch": 0.7570044052863436, "percentage": 37.84, "elapsed_time": "3:09:44", "remaining_time": "5:11:38"} +{"current_steps": 2149, "total_steps": 5676, "loss": 0.7791979908943176, "lr": 1.4659926056224798e-05, "epoch": 0.7573568281938327, "percentage": 37.86, "elapsed_time": "3:09:49", "remaining_time": "5:11:31"} +{"current_steps": 2150, "total_steps": 5676, "loss": 0.7237389087677002, "lr": 1.465477013683916e-05, "epoch": 0.7577092511013216, "percentage": 37.88, "elapsed_time": "3:09:54", "remaining_time": "5:11:26"} +{"current_steps": 2151, "total_steps": 5676, "loss": 0.6750755906105042, "lr": 1.464961263730393e-05, "epoch": 0.7580616740088105, "percentage": 37.9, "elapsed_time": "3:09:58", "remaining_time": "5:11:19"} +{"current_steps": 2152, "total_steps": 5676, "loss": 0.5412150621414185, "lr": 1.4644453559369904e-05, "epoch": 0.7584140969162996, "percentage": 37.91, "elapsed_time": "3:10:04", "remaining_time": "5:11:15"} +{"current_steps": 2153, "total_steps": 5676, "loss": 0.7009850740432739, "lr": 1.463929290478844e-05, "epoch": 0.7587665198237885, "percentage": 37.93, "elapsed_time": "3:10:11", "remaining_time": "5:11:12"} +{"current_steps": 2154, "total_steps": 5676, "loss": 0.8678998351097107, "lr": 1.4634130675311411e-05, "epoch": 0.7591189427312776, "percentage": 37.95, "elapsed_time": "3:10:16", "remaining_time": "5:11:07"} +{"current_steps": 2155, "total_steps": 5676, "loss": 0.7395705580711365, "lr": 1.4628966872691241e-05, "epoch": 0.7594713656387665, "percentage": 37.97, "elapsed_time": "3:10:21", "remaining_time": "5:11:01"} +{"current_steps": 2156, "total_steps": 5676, "loss": 0.5950812101364136, "lr": 1.4623801498680875e-05, "epoch": 0.7598237885462555, "percentage": 37.98, "elapsed_time": "3:10:27", "remaining_time": "5:10:56"} +{"current_steps": 2157, "total_steps": 5676, "loss": 0.7133630514144897, "lr": 1.46186345550338e-05, "epoch": 0.7601762114537445, "percentage": 38.0, "elapsed_time": "3:10:31", "remaining_time": "5:10:49"} +{"current_steps": 2158, "total_steps": 5676, "loss": 0.7551965117454529, "lr": 1.4613466043504026e-05, "epoch": 0.7605286343612335, "percentage": 38.02, "elapsed_time": "3:10:37", "remaining_time": "5:10:45"} +{"current_steps": 2159, "total_steps": 5676, "loss": 0.6654022932052612, "lr": 1.4608295965846111e-05, "epoch": 0.7608810572687225, "percentage": 38.04, "elapsed_time": "3:10:43", "remaining_time": "5:10:41"} +{"current_steps": 2160, "total_steps": 5676, "loss": 0.8081967830657959, "lr": 1.460312432381513e-05, "epoch": 0.7612334801762115, "percentage": 38.05, "elapsed_time": "3:10:49", "remaining_time": "5:10:37"} +{"current_steps": 2161, "total_steps": 5676, "loss": 0.7478348016738892, "lr": 1.4597951119166696e-05, "epoch": 0.7615859030837004, "percentage": 38.07, "elapsed_time": "3:10:55", "remaining_time": "5:10:32"} +{"current_steps": 2162, "total_steps": 5676, "loss": 0.7866748571395874, "lr": 1.4592776353656948e-05, "epoch": 0.7619383259911894, "percentage": 38.09, "elapsed_time": "3:11:00", "remaining_time": "5:10:27"} +{"current_steps": 2163, "total_steps": 5676, "loss": 0.6675869226455688, "lr": 1.4587600029042563e-05, "epoch": 0.7622907488986784, "percentage": 38.11, "elapsed_time": "3:11:06", "remaining_time": "5:10:23"} +{"current_steps": 2164, "total_steps": 5676, "loss": 0.6881103515625, "lr": 1.4582422147080739e-05, "epoch": 0.7626431718061674, "percentage": 38.13, "elapsed_time": "3:11:12", "remaining_time": "5:10:18"} +{"current_steps": 2165, "total_steps": 5676, "loss": 0.7298593521118164, "lr": 1.457724270952921e-05, "epoch": 0.7629955947136564, "percentage": 38.14, "elapsed_time": "3:11:17", "remaining_time": "5:10:13"} +{"current_steps": 2166, "total_steps": 5676, "loss": 0.7102776765823364, "lr": 1.4572061718146224e-05, "epoch": 0.7633480176211453, "percentage": 38.16, "elapsed_time": "3:11:22", "remaining_time": "5:10:06"} +{"current_steps": 2167, "total_steps": 5676, "loss": 0.7767213582992554, "lr": 1.4566879174690576e-05, "epoch": 0.7637004405286344, "percentage": 38.18, "elapsed_time": "3:11:27", "remaining_time": "5:10:02"} +{"current_steps": 2168, "total_steps": 5676, "loss": 0.7480257749557495, "lr": 1.4561695080921573e-05, "epoch": 0.7640528634361233, "percentage": 38.2, "elapsed_time": "3:11:34", "remaining_time": "5:09:59"} +{"current_steps": 2169, "total_steps": 5676, "loss": 0.7419564723968506, "lr": 1.4556509438599057e-05, "epoch": 0.7644052863436124, "percentage": 38.21, "elapsed_time": "3:11:41", "remaining_time": "5:09:57"} +{"current_steps": 2170, "total_steps": 5676, "loss": 0.6820264458656311, "lr": 1.4551322249483388e-05, "epoch": 0.7647577092511013, "percentage": 38.23, "elapsed_time": "3:11:45", "remaining_time": "5:09:49"} +{"current_steps": 2171, "total_steps": 5676, "loss": 0.5947732329368591, "lr": 1.4546133515335462e-05, "epoch": 0.7651101321585904, "percentage": 38.25, "elapsed_time": "3:11:50", "remaining_time": "5:09:43"} +{"current_steps": 2172, "total_steps": 5676, "loss": 0.6772021055221558, "lr": 1.4540943237916685e-05, "epoch": 0.7654625550660793, "percentage": 38.27, "elapsed_time": "3:11:54", "remaining_time": "5:09:36"} +{"current_steps": 2173, "total_steps": 5676, "loss": 0.7822210192680359, "lr": 1.4535751418989e-05, "epoch": 0.7658149779735682, "percentage": 38.28, "elapsed_time": "3:11:59", "remaining_time": "5:09:29"} +{"current_steps": 2174, "total_steps": 5676, "loss": 0.6208021640777588, "lr": 1.4530558060314866e-05, "epoch": 0.7661674008810573, "percentage": 38.3, "elapsed_time": "3:12:04", "remaining_time": "5:09:24"} +{"current_steps": 2175, "total_steps": 5676, "loss": 0.8017063140869141, "lr": 1.4525363163657264e-05, "epoch": 0.7665198237885462, "percentage": 38.32, "elapsed_time": "3:12:08", "remaining_time": "5:09:17"} +{"current_steps": 2176, "total_steps": 5676, "loss": 0.738383948802948, "lr": 1.4520166730779704e-05, "epoch": 0.7668722466960353, "percentage": 38.34, "elapsed_time": "3:12:15", "remaining_time": "5:09:13"} +{"current_steps": 2177, "total_steps": 5676, "loss": 0.7698314785957336, "lr": 1.4514968763446213e-05, "epoch": 0.7672246696035242, "percentage": 38.35, "elapsed_time": "3:12:19", "remaining_time": "5:09:07"} +{"current_steps": 2178, "total_steps": 5676, "loss": 0.789836049079895, "lr": 1.4509769263421337e-05, "epoch": 0.7675770925110132, "percentage": 38.37, "elapsed_time": "3:12:24", "remaining_time": "5:09:00"} +{"current_steps": 2179, "total_steps": 5676, "loss": 0.6437339782714844, "lr": 1.4504568232470145e-05, "epoch": 0.7679295154185022, "percentage": 38.39, "elapsed_time": "3:12:28", "remaining_time": "5:08:53"} +{"current_steps": 2180, "total_steps": 5676, "loss": 0.6684735417366028, "lr": 1.4499365672358226e-05, "epoch": 0.7682819383259912, "percentage": 38.41, "elapsed_time": "3:12:33", "remaining_time": "5:08:47"} +{"current_steps": 2181, "total_steps": 5676, "loss": 0.6577454805374146, "lr": 1.4494161584851687e-05, "epoch": 0.7686343612334802, "percentage": 38.42, "elapsed_time": "3:12:37", "remaining_time": "5:08:40"} +{"current_steps": 2182, "total_steps": 5676, "loss": 0.5975776314735413, "lr": 1.4488955971717154e-05, "epoch": 0.7689867841409692, "percentage": 38.44, "elapsed_time": "3:12:42", "remaining_time": "5:08:35"} +{"current_steps": 2183, "total_steps": 5676, "loss": 0.6385577917098999, "lr": 1.4483748834721767e-05, "epoch": 0.7693392070484582, "percentage": 38.46, "elapsed_time": "3:12:47", "remaining_time": "5:08:28"} +{"current_steps": 2184, "total_steps": 5676, "loss": 0.6295928955078125, "lr": 1.4478540175633193e-05, "epoch": 0.7696916299559471, "percentage": 38.48, "elapsed_time": "3:12:52", "remaining_time": "5:08:23"} +{"current_steps": 2185, "total_steps": 5676, "loss": 0.6848496198654175, "lr": 1.4473329996219605e-05, "epoch": 0.7700440528634361, "percentage": 38.5, "elapsed_time": "3:12:57", "remaining_time": "5:08:17"} +{"current_steps": 2186, "total_steps": 5676, "loss": 0.6476501226425171, "lr": 1.44681182982497e-05, "epoch": 0.7703964757709251, "percentage": 38.51, "elapsed_time": "3:13:01", "remaining_time": "5:08:10"} +{"current_steps": 2187, "total_steps": 5676, "loss": 0.750103235244751, "lr": 1.4462905083492683e-05, "epoch": 0.7707488986784141, "percentage": 38.53, "elapsed_time": "3:13:07", "remaining_time": "5:08:05"} +{"current_steps": 2188, "total_steps": 5676, "loss": 0.668454110622406, "lr": 1.4457690353718285e-05, "epoch": 0.771101321585903, "percentage": 38.55, "elapsed_time": "3:13:13", "remaining_time": "5:08:02"} +{"current_steps": 2189, "total_steps": 5676, "loss": 0.6671048402786255, "lr": 1.4452474110696738e-05, "epoch": 0.7714537444933921, "percentage": 38.57, "elapsed_time": "3:13:19", "remaining_time": "5:07:57"} +{"current_steps": 2190, "total_steps": 5676, "loss": 0.6261379718780518, "lr": 1.4447256356198797e-05, "epoch": 0.771806167400881, "percentage": 38.58, "elapsed_time": "3:13:25", "remaining_time": "5:07:53"} +{"current_steps": 2191, "total_steps": 5676, "loss": 0.6128308176994324, "lr": 1.4442037091995726e-05, "epoch": 0.7721585903083701, "percentage": 38.6, "elapsed_time": "3:13:29", "remaining_time": "5:07:46"} +{"current_steps": 2192, "total_steps": 5676, "loss": 0.7709108591079712, "lr": 1.4436816319859306e-05, "epoch": 0.772511013215859, "percentage": 38.62, "elapsed_time": "3:13:35", "remaining_time": "5:07:41"} +{"current_steps": 2193, "total_steps": 5676, "loss": 0.6242028474807739, "lr": 1.4431594041561822e-05, "epoch": 0.7728634361233481, "percentage": 38.64, "elapsed_time": "3:13:40", "remaining_time": "5:07:35"} +{"current_steps": 2194, "total_steps": 5676, "loss": 0.8030718564987183, "lr": 1.4426370258876079e-05, "epoch": 0.773215859030837, "percentage": 38.65, "elapsed_time": "3:13:44", "remaining_time": "5:07:28"} +{"current_steps": 2195, "total_steps": 5676, "loss": 0.7785710692405701, "lr": 1.4421144973575386e-05, "epoch": 0.7735682819383259, "percentage": 38.67, "elapsed_time": "3:13:49", "remaining_time": "5:07:22"} +{"current_steps": 2196, "total_steps": 5676, "loss": 0.6846014857292175, "lr": 1.4415918187433564e-05, "epoch": 0.773920704845815, "percentage": 38.69, "elapsed_time": "3:13:54", "remaining_time": "5:07:16"} +{"current_steps": 2197, "total_steps": 5676, "loss": 0.7883827686309814, "lr": 1.4410689902224947e-05, "epoch": 0.7742731277533039, "percentage": 38.71, "elapsed_time": "3:13:58", "remaining_time": "5:07:10"} +{"current_steps": 2198, "total_steps": 5676, "loss": 0.8285650610923767, "lr": 1.4405460119724377e-05, "epoch": 0.774625550660793, "percentage": 38.72, "elapsed_time": "3:14:03", "remaining_time": "5:07:04"} +{"current_steps": 2199, "total_steps": 5676, "loss": 0.6101093292236328, "lr": 1.4400228841707193e-05, "epoch": 0.7749779735682819, "percentage": 38.74, "elapsed_time": "3:14:09", "remaining_time": "5:07:00"} +{"current_steps": 2200, "total_steps": 5676, "loss": 0.6627891063690186, "lr": 1.4394996069949262e-05, "epoch": 0.775330396475771, "percentage": 38.76, "elapsed_time": "3:14:14", "remaining_time": "5:06:54"} +{"current_steps": 2201, "total_steps": 5676, "loss": 0.6755822896957397, "lr": 1.4389761806226943e-05, "epoch": 0.7756828193832599, "percentage": 38.78, "elapsed_time": "3:14:24", "remaining_time": "5:06:56"} +{"current_steps": 2202, "total_steps": 5676, "loss": 0.6718465089797974, "lr": 1.4384526052317106e-05, "epoch": 0.7760352422907489, "percentage": 38.79, "elapsed_time": "3:14:29", "remaining_time": "5:06:50"} +{"current_steps": 2203, "total_steps": 5676, "loss": 0.5857758522033691, "lr": 1.4379288809997121e-05, "epoch": 0.7763876651982379, "percentage": 38.81, "elapsed_time": "3:14:35", "remaining_time": "5:06:45"} +{"current_steps": 2204, "total_steps": 5676, "loss": 0.5861783027648926, "lr": 1.4374050081044876e-05, "epoch": 0.7767400881057269, "percentage": 38.83, "elapsed_time": "3:14:42", "remaining_time": "5:06:44"} +{"current_steps": 2205, "total_steps": 5676, "loss": 0.6862374544143677, "lr": 1.4368809867238754e-05, "epoch": 0.7770925110132159, "percentage": 38.85, "elapsed_time": "3:14:47", "remaining_time": "5:06:38"} +{"current_steps": 2206, "total_steps": 5676, "loss": 0.6787701845169067, "lr": 1.4363568170357646e-05, "epoch": 0.7774449339207048, "percentage": 38.87, "elapsed_time": "3:14:52", "remaining_time": "5:06:32"} +{"current_steps": 2207, "total_steps": 5676, "loss": 0.5671687126159668, "lr": 1.435832499218094e-05, "epoch": 0.7777973568281938, "percentage": 38.88, "elapsed_time": "3:14:57", "remaining_time": "5:06:26"} +{"current_steps": 2208, "total_steps": 5676, "loss": 0.8243429064750671, "lr": 1.435308033448854e-05, "epoch": 0.7781497797356828, "percentage": 38.9, "elapsed_time": "3:15:02", "remaining_time": "5:06:20"} +{"current_steps": 2209, "total_steps": 5676, "loss": 0.5880655646324158, "lr": 1.4347834199060835e-05, "epoch": 0.7785022026431718, "percentage": 38.92, "elapsed_time": "3:15:08", "remaining_time": "5:06:16"} +{"current_steps": 2210, "total_steps": 5676, "loss": 0.7085679769515991, "lr": 1.4342586587678734e-05, "epoch": 0.7788546255506608, "percentage": 38.94, "elapsed_time": "3:15:13", "remaining_time": "5:06:10"} +{"current_steps": 2211, "total_steps": 5676, "loss": 0.7011853456497192, "lr": 1.4337337502123627e-05, "epoch": 0.7792070484581498, "percentage": 38.95, "elapsed_time": "3:15:18", "remaining_time": "5:06:04"} +{"current_steps": 2212, "total_steps": 5676, "loss": 0.755327582359314, "lr": 1.4332086944177426e-05, "epoch": 0.7795594713656387, "percentage": 38.97, "elapsed_time": "3:15:23", "remaining_time": "5:05:59"} +{"current_steps": 2213, "total_steps": 5676, "loss": 0.7152736186981201, "lr": 1.4326834915622522e-05, "epoch": 0.7799118942731278, "percentage": 38.99, "elapsed_time": "3:15:29", "remaining_time": "5:05:55"} +{"current_steps": 2214, "total_steps": 5676, "loss": 0.6744083166122437, "lr": 1.4321581418241825e-05, "epoch": 0.7802643171806167, "percentage": 39.01, "elapsed_time": "3:15:35", "remaining_time": "5:05:51"} +{"current_steps": 2215, "total_steps": 5676, "loss": 0.6112288236618042, "lr": 1.4316326453818728e-05, "epoch": 0.7806167400881058, "percentage": 39.02, "elapsed_time": "3:15:40", "remaining_time": "5:05:45"} +{"current_steps": 2216, "total_steps": 5676, "loss": 0.5569246411323547, "lr": 1.4311070024137128e-05, "epoch": 0.7809691629955947, "percentage": 39.04, "elapsed_time": "3:15:46", "remaining_time": "5:05:41"} +{"current_steps": 2217, "total_steps": 5676, "loss": 0.6214494705200195, "lr": 1.4305812130981418e-05, "epoch": 0.7813215859030836, "percentage": 39.06, "elapsed_time": "3:15:51", "remaining_time": "5:05:35"} +{"current_steps": 2218, "total_steps": 5676, "loss": 0.5401003956794739, "lr": 1.4300552776136497e-05, "epoch": 0.7816740088105727, "percentage": 39.08, "elapsed_time": "3:15:57", "remaining_time": "5:05:30"} +{"current_steps": 2219, "total_steps": 5676, "loss": 0.5128720998764038, "lr": 1.4295291961387742e-05, "epoch": 0.7820264317180616, "percentage": 39.09, "elapsed_time": "3:16:02", "remaining_time": "5:05:25"} +{"current_steps": 2220, "total_steps": 5676, "loss": 0.5495916604995728, "lr": 1.4290029688521043e-05, "epoch": 0.7823788546255507, "percentage": 39.11, "elapsed_time": "3:16:07", "remaining_time": "5:05:19"} +{"current_steps": 2221, "total_steps": 5676, "loss": 0.628544807434082, "lr": 1.4284765959322772e-05, "epoch": 0.7827312775330396, "percentage": 39.13, "elapsed_time": "3:16:12", "remaining_time": "5:05:13"} +{"current_steps": 2222, "total_steps": 5676, "loss": 0.7171294689178467, "lr": 1.427950077557981e-05, "epoch": 0.7830837004405287, "percentage": 39.15, "elapsed_time": "3:16:18", "remaining_time": "5:05:08"} +{"current_steps": 2223, "total_steps": 5676, "loss": 0.7436389327049255, "lr": 1.4274234139079513e-05, "epoch": 0.7834361233480176, "percentage": 39.16, "elapsed_time": "3:16:23", "remaining_time": "5:05:03"} +{"current_steps": 2224, "total_steps": 5676, "loss": 0.7154244780540466, "lr": 1.426896605160975e-05, "epoch": 0.7837885462555066, "percentage": 39.18, "elapsed_time": "3:16:27", "remaining_time": "5:04:55"} +{"current_steps": 2225, "total_steps": 5676, "loss": 0.6433268189430237, "lr": 1.426369651495886e-05, "epoch": 0.7841409691629956, "percentage": 39.2, "elapsed_time": "3:16:32", "remaining_time": "5:04:49"} +{"current_steps": 2226, "total_steps": 5676, "loss": 0.6612321734428406, "lr": 1.4258425530915703e-05, "epoch": 0.7844933920704846, "percentage": 39.22, "elapsed_time": "3:16:37", "remaining_time": "5:04:45"} +{"current_steps": 2227, "total_steps": 5676, "loss": 0.6384811401367188, "lr": 1.42531531012696e-05, "epoch": 0.7848458149779736, "percentage": 39.24, "elapsed_time": "3:16:44", "remaining_time": "5:04:41"} +{"current_steps": 2228, "total_steps": 5676, "loss": 0.5592762231826782, "lr": 1.4247879227810384e-05, "epoch": 0.7851982378854625, "percentage": 39.25, "elapsed_time": "3:16:48", "remaining_time": "5:04:35"} +{"current_steps": 2229, "total_steps": 5676, "loss": 0.6904512643814087, "lr": 1.4242603912328367e-05, "epoch": 0.7855506607929515, "percentage": 39.27, "elapsed_time": "3:16:54", "remaining_time": "5:04:30"} +{"current_steps": 2230, "total_steps": 5676, "loss": 0.7165266871452332, "lr": 1.4237327156614358e-05, "epoch": 0.7859030837004405, "percentage": 39.29, "elapsed_time": "3:16:58", "remaining_time": "5:04:23"} +{"current_steps": 2231, "total_steps": 5676, "loss": 0.8567172288894653, "lr": 1.423204896245965e-05, "epoch": 0.7862555066079295, "percentage": 39.31, "elapsed_time": "3:17:04", "remaining_time": "5:04:18"} +{"current_steps": 2232, "total_steps": 5676, "loss": 0.6595934629440308, "lr": 1.4226769331656028e-05, "epoch": 0.7866079295154185, "percentage": 39.32, "elapsed_time": "3:17:09", "remaining_time": "5:04:13"} +{"current_steps": 2233, "total_steps": 5676, "loss": 0.750861644744873, "lr": 1.4221488265995755e-05, "epoch": 0.7869603524229075, "percentage": 39.34, "elapsed_time": "3:17:14", "remaining_time": "5:04:06"} +{"current_steps": 2234, "total_steps": 5676, "loss": 0.7146387696266174, "lr": 1.4216205767271597e-05, "epoch": 0.7873127753303965, "percentage": 39.36, "elapsed_time": "3:17:19", "remaining_time": "5:04:02"} +{"current_steps": 2235, "total_steps": 5676, "loss": 0.58647221326828, "lr": 1.4210921837276792e-05, "epoch": 0.7876651982378855, "percentage": 39.38, "elapsed_time": "3:17:25", "remaining_time": "5:03:57"} +{"current_steps": 2236, "total_steps": 5676, "loss": 0.6893318891525269, "lr": 1.4205636477805072e-05, "epoch": 0.7880176211453744, "percentage": 39.39, "elapsed_time": "3:17:31", "remaining_time": "5:03:53"} +{"current_steps": 2237, "total_steps": 5676, "loss": 0.5545464158058167, "lr": 1.4200349690650654e-05, "epoch": 0.7883700440528635, "percentage": 39.41, "elapsed_time": "3:17:38", "remaining_time": "5:03:50"} +{"current_steps": 2238, "total_steps": 5676, "loss": 0.6088600158691406, "lr": 1.4195061477608234e-05, "epoch": 0.7887224669603524, "percentage": 39.43, "elapsed_time": "3:17:43", "remaining_time": "5:03:44"} +{"current_steps": 2239, "total_steps": 5676, "loss": 0.6330769658088684, "lr": 1.4189771840472997e-05, "epoch": 0.7890748898678414, "percentage": 39.45, "elapsed_time": "3:17:48", "remaining_time": "5:03:39"} +{"current_steps": 2240, "total_steps": 5676, "loss": 0.678654670715332, "lr": 1.4184480781040613e-05, "epoch": 0.7894273127753304, "percentage": 39.46, "elapsed_time": "3:17:54", "remaining_time": "5:03:34"} +{"current_steps": 2241, "total_steps": 5676, "loss": 0.6259177923202515, "lr": 1.417918830110723e-05, "epoch": 0.7897797356828193, "percentage": 39.48, "elapsed_time": "3:17:58", "remaining_time": "5:03:27"} +{"current_steps": 2242, "total_steps": 5676, "loss": 0.634982168674469, "lr": 1.4173894402469477e-05, "epoch": 0.7901321585903084, "percentage": 39.5, "elapsed_time": "3:18:04", "remaining_time": "5:03:22"} +{"current_steps": 2243, "total_steps": 5676, "loss": 0.6610612869262695, "lr": 1.4168599086924473e-05, "epoch": 0.7904845814977973, "percentage": 39.52, "elapsed_time": "3:18:10", "remaining_time": "5:03:18"} +{"current_steps": 2244, "total_steps": 5676, "loss": 0.6952961683273315, "lr": 1.416330235626981e-05, "epoch": 0.7908370044052864, "percentage": 39.53, "elapsed_time": "3:18:16", "remaining_time": "5:03:13"} +{"current_steps": 2245, "total_steps": 5676, "loss": 0.5055881142616272, "lr": 1.4158004212303565e-05, "epoch": 0.7911894273127753, "percentage": 39.55, "elapsed_time": "3:18:19", "remaining_time": "5:03:06"} +{"current_steps": 2246, "total_steps": 5676, "loss": 0.7284455299377441, "lr": 1.4152704656824288e-05, "epoch": 0.7915418502202644, "percentage": 39.57, "elapsed_time": "3:18:25", "remaining_time": "5:03:01"} +{"current_steps": 2247, "total_steps": 5676, "loss": 0.6985108852386475, "lr": 1.414740369163102e-05, "epoch": 0.7918942731277533, "percentage": 39.59, "elapsed_time": "3:18:32", "remaining_time": "5:02:58"} +{"current_steps": 2248, "total_steps": 5676, "loss": 0.5967550277709961, "lr": 1.4142101318523271e-05, "epoch": 0.7922466960352423, "percentage": 39.61, "elapsed_time": "3:18:38", "remaining_time": "5:02:55"} +{"current_steps": 2249, "total_steps": 5676, "loss": 0.7696695327758789, "lr": 1.4136797539301033e-05, "epoch": 0.7925991189427313, "percentage": 39.62, "elapsed_time": "3:18:43", "remaining_time": "5:02:48"} +{"current_steps": 2250, "total_steps": 5676, "loss": 0.8131378293037415, "lr": 1.413149235576477e-05, "epoch": 0.7929515418502202, "percentage": 39.64, "elapsed_time": "3:18:49", "remaining_time": "5:02:44"} +{"current_steps": 2251, "total_steps": 5676, "loss": 0.8029932975769043, "lr": 1.4126185769715428e-05, "epoch": 0.7933039647577093, "percentage": 39.66, "elapsed_time": "3:18:53", "remaining_time": "5:02:37"} +{"current_steps": 2252, "total_steps": 5676, "loss": 0.7408573031425476, "lr": 1.412087778295443e-05, "epoch": 0.7936563876651982, "percentage": 39.68, "elapsed_time": "3:18:58", "remaining_time": "5:02:31"} +{"current_steps": 2253, "total_steps": 5676, "loss": 0.8624325394630432, "lr": 1.411556839728367e-05, "epoch": 0.7940088105726872, "percentage": 39.69, "elapsed_time": "3:19:02", "remaining_time": "5:02:24"} +{"current_steps": 2254, "total_steps": 5676, "loss": 0.7635384798049927, "lr": 1.411025761450552e-05, "epoch": 0.7943612334801762, "percentage": 39.71, "elapsed_time": "3:19:08", "remaining_time": "5:02:19"} +{"current_steps": 2255, "total_steps": 5676, "loss": 0.5612920522689819, "lr": 1.4104945436422832e-05, "epoch": 0.7947136563876652, "percentage": 39.73, "elapsed_time": "3:19:13", "remaining_time": "5:02:13"} +{"current_steps": 2256, "total_steps": 5676, "loss": 0.5792248845100403, "lr": 1.4099631864838912e-05, "epoch": 0.7950660792951542, "percentage": 39.75, "elapsed_time": "3:19:18", "remaining_time": "5:02:08"} +{"current_steps": 2257, "total_steps": 5676, "loss": 0.7405142188072205, "lr": 1.4094316901557563e-05, "epoch": 0.7954185022026432, "percentage": 39.76, "elapsed_time": "3:19:24", "remaining_time": "5:02:03"} +{"current_steps": 2258, "total_steps": 5676, "loss": 0.630780816078186, "lr": 1.4089000548383044e-05, "epoch": 0.7957709251101321, "percentage": 39.78, "elapsed_time": "3:19:28", "remaining_time": "5:01:57"} +{"current_steps": 2259, "total_steps": 5676, "loss": 0.6737201809883118, "lr": 1.4083682807120092e-05, "epoch": 0.7961233480176212, "percentage": 39.8, "elapsed_time": "3:19:33", "remaining_time": "5:01:50"} +{"current_steps": 2260, "total_steps": 5676, "loss": 0.6469985842704773, "lr": 1.4078363679573918e-05, "epoch": 0.7964757709251101, "percentage": 39.82, "elapsed_time": "3:19:37", "remaining_time": "5:01:44"} +{"current_steps": 2261, "total_steps": 5676, "loss": 0.6315224170684814, "lr": 1.4073043167550198e-05, "epoch": 0.7968281938325992, "percentage": 39.83, "elapsed_time": "3:19:43", "remaining_time": "5:01:39"} +{"current_steps": 2262, "total_steps": 5676, "loss": 0.6785402297973633, "lr": 1.4067721272855079e-05, "epoch": 0.7971806167400881, "percentage": 39.85, "elapsed_time": "3:19:50", "remaining_time": "5:01:37"} +{"current_steps": 2263, "total_steps": 5676, "loss": 0.7131394147872925, "lr": 1.406239799729518e-05, "epoch": 0.797533039647577, "percentage": 39.87, "elapsed_time": "3:19:55", "remaining_time": "5:01:31"} +{"current_steps": 2264, "total_steps": 5676, "loss": 0.6921142339706421, "lr": 1.405707334267759e-05, "epoch": 0.7978854625550661, "percentage": 39.89, "elapsed_time": "3:20:01", "remaining_time": "5:01:26"} +{"current_steps": 2265, "total_steps": 5676, "loss": 0.695213794708252, "lr": 1.4051747310809863e-05, "epoch": 0.798237885462555, "percentage": 39.9, "elapsed_time": "3:20:06", "remaining_time": "5:01:20"} +{"current_steps": 2266, "total_steps": 5676, "loss": 0.7081988453865051, "lr": 1.4046419903500013e-05, "epoch": 0.7985903083700441, "percentage": 39.92, "elapsed_time": "3:20:11", "remaining_time": "5:01:15"} +{"current_steps": 2267, "total_steps": 5676, "loss": 0.6404637098312378, "lr": 1.4041091122556539e-05, "epoch": 0.798942731277533, "percentage": 39.94, "elapsed_time": "3:20:16", "remaining_time": "5:01:10"} +{"current_steps": 2268, "total_steps": 5676, "loss": 0.6404134631156921, "lr": 1.403576096978839e-05, "epoch": 0.7992951541850221, "percentage": 39.96, "elapsed_time": "3:20:21", "remaining_time": "5:01:04"} +{"current_steps": 2269, "total_steps": 5676, "loss": 0.7963751554489136, "lr": 1.4030429447004992e-05, "epoch": 0.799647577092511, "percentage": 39.98, "elapsed_time": "3:20:27", "remaining_time": "5:00:59"} +{"current_steps": 2270, "total_steps": 5676, "loss": 0.6648174524307251, "lr": 1.4025096556016224e-05, "epoch": 0.8, "percentage": 39.99, "elapsed_time": "3:20:31", "remaining_time": "5:00:52"} +{"current_steps": 2271, "total_steps": 5676, "loss": 0.6661815047264099, "lr": 1.4019762298632445e-05, "epoch": 0.800352422907489, "percentage": 40.01, "elapsed_time": "3:20:36", "remaining_time": "5:00:46"} +{"current_steps": 2272, "total_steps": 5676, "loss": 0.6194477081298828, "lr": 1.4014426676664462e-05, "epoch": 0.800704845814978, "percentage": 40.03, "elapsed_time": "3:20:42", "remaining_time": "5:00:42"} +{"current_steps": 2273, "total_steps": 5676, "loss": 0.6869276762008667, "lr": 1.400908969192356e-05, "epoch": 0.801057268722467, "percentage": 40.05, "elapsed_time": "3:20:46", "remaining_time": "5:00:34"} +{"current_steps": 2274, "total_steps": 5676, "loss": 0.7352420091629028, "lr": 1.4003751346221472e-05, "epoch": 0.8014096916299559, "percentage": 40.06, "elapsed_time": "3:20:50", "remaining_time": "5:00:28"} +{"current_steps": 2275, "total_steps": 5676, "loss": 0.8212440609931946, "lr": 1.3998411641370405e-05, "epoch": 0.801762114537445, "percentage": 40.08, "elapsed_time": "3:20:55", "remaining_time": "5:00:22"} +{"current_steps": 2276, "total_steps": 5676, "loss": 0.6897045969963074, "lr": 1.3993070579183021e-05, "epoch": 0.8021145374449339, "percentage": 40.1, "elapsed_time": "3:21:00", "remaining_time": "5:00:15"} +{"current_steps": 2277, "total_steps": 5676, "loss": 0.8406906127929688, "lr": 1.3987728161472442e-05, "epoch": 0.8024669603524229, "percentage": 40.12, "elapsed_time": "3:21:06", "remaining_time": "5:00:12"} +{"current_steps": 2278, "total_steps": 5676, "loss": 0.6236976385116577, "lr": 1.3982384390052257e-05, "epoch": 0.8028193832599119, "percentage": 40.13, "elapsed_time": "3:21:11", "remaining_time": "5:00:07"} +{"current_steps": 2279, "total_steps": 5676, "loss": 0.8110965490341187, "lr": 1.3977039266736508e-05, "epoch": 0.8031718061674009, "percentage": 40.15, "elapsed_time": "3:21:15", "remaining_time": "4:59:59"} +{"current_steps": 2280, "total_steps": 5676, "loss": 0.635534405708313, "lr": 1.3971692793339697e-05, "epoch": 0.8035242290748899, "percentage": 40.17, "elapsed_time": "3:21:20", "remaining_time": "4:59:53"} +{"current_steps": 2281, "total_steps": 5676, "loss": 0.7806028127670288, "lr": 1.3966344971676789e-05, "epoch": 0.8038766519823789, "percentage": 40.19, "elapsed_time": "3:21:25", "remaining_time": "4:59:47"} +{"current_steps": 2282, "total_steps": 5676, "loss": 0.6635935306549072, "lr": 1.3960995803563195e-05, "epoch": 0.8042290748898678, "percentage": 40.2, "elapsed_time": "3:21:29", "remaining_time": "4:59:40"} +{"current_steps": 2283, "total_steps": 5676, "loss": 0.6064634323120117, "lr": 1.39556452908148e-05, "epoch": 0.8045814977973569, "percentage": 40.22, "elapsed_time": "3:21:34", "remaining_time": "4:59:34"} +{"current_steps": 2284, "total_steps": 5676, "loss": 0.760187029838562, "lr": 1.3950293435247933e-05, "epoch": 0.8049339207048458, "percentage": 40.24, "elapsed_time": "3:21:39", "remaining_time": "4:59:29"} +{"current_steps": 2285, "total_steps": 5676, "loss": 0.7004644274711609, "lr": 1.3944940238679384e-05, "epoch": 0.8052863436123348, "percentage": 40.26, "elapsed_time": "3:21:45", "remaining_time": "4:59:25"} +{"current_steps": 2286, "total_steps": 5676, "loss": 0.7662780284881592, "lr": 1.393958570292639e-05, "epoch": 0.8056387665198238, "percentage": 40.27, "elapsed_time": "3:21:49", "remaining_time": "4:59:18"} +{"current_steps": 2287, "total_steps": 5676, "loss": 0.7939090132713318, "lr": 1.393422982980666e-05, "epoch": 0.8059911894273127, "percentage": 40.29, "elapsed_time": "3:21:54", "remaining_time": "4:59:12"} +{"current_steps": 2288, "total_steps": 5676, "loss": 0.7461861371994019, "lr": 1.3928872621138337e-05, "epoch": 0.8063436123348018, "percentage": 40.31, "elapsed_time": "3:22:00", "remaining_time": "4:59:07"} +{"current_steps": 2289, "total_steps": 5676, "loss": 0.5997019410133362, "lr": 1.3923514078740032e-05, "epoch": 0.8066960352422907, "percentage": 40.33, "elapsed_time": "3:22:06", "remaining_time": "4:59:03"} +{"current_steps": 2290, "total_steps": 5676, "loss": 0.5437384843826294, "lr": 1.3918154204430801e-05, "epoch": 0.8070484581497798, "percentage": 40.35, "elapsed_time": "3:22:11", "remaining_time": "4:58:58"} +{"current_steps": 2291, "total_steps": 5676, "loss": 0.7387127876281738, "lr": 1.3912793000030154e-05, "epoch": 0.8074008810572687, "percentage": 40.36, "elapsed_time": "3:22:17", "remaining_time": "4:58:53"} +{"current_steps": 2292, "total_steps": 5676, "loss": 0.483035147190094, "lr": 1.3907430467358054e-05, "epoch": 0.8077533039647578, "percentage": 40.38, "elapsed_time": "3:22:23", "remaining_time": "4:58:49"} +{"current_steps": 2293, "total_steps": 5676, "loss": 0.6208503842353821, "lr": 1.3902066608234919e-05, "epoch": 0.8081057268722467, "percentage": 40.4, "elapsed_time": "3:22:28", "remaining_time": "4:58:43"} +{"current_steps": 2294, "total_steps": 5676, "loss": 0.6691559553146362, "lr": 1.3896701424481603e-05, "epoch": 0.8084581497797357, "percentage": 40.42, "elapsed_time": "3:22:34", "remaining_time": "4:58:39"} +{"current_steps": 2295, "total_steps": 5676, "loss": 0.8960802555084229, "lr": 1.3891334917919422e-05, "epoch": 0.8088105726872247, "percentage": 40.43, "elapsed_time": "3:22:39", "remaining_time": "4:58:33"} +{"current_steps": 2296, "total_steps": 5676, "loss": 0.669715404510498, "lr": 1.388596709037014e-05, "epoch": 0.8091629955947136, "percentage": 40.45, "elapsed_time": "3:22:45", "remaining_time": "4:58:28"} +{"current_steps": 2297, "total_steps": 5676, "loss": 0.7356190085411072, "lr": 1.3880597943655972e-05, "epoch": 0.8095154185022027, "percentage": 40.47, "elapsed_time": "3:22:51", "remaining_time": "4:58:24"} +{"current_steps": 2298, "total_steps": 5676, "loss": 0.9158750176429749, "lr": 1.3875227479599565e-05, "epoch": 0.8098678414096916, "percentage": 40.49, "elapsed_time": "3:22:56", "remaining_time": "4:58:19"} +{"current_steps": 2299, "total_steps": 5676, "loss": 0.7395786643028259, "lr": 1.3869855700024031e-05, "epoch": 0.8102202643171806, "percentage": 40.5, "elapsed_time": "3:23:01", "remaining_time": "4:58:13"} +{"current_steps": 2300, "total_steps": 5676, "loss": 0.594106912612915, "lr": 1.3864482606752922e-05, "epoch": 0.8105726872246696, "percentage": 40.52, "elapsed_time": "3:23:07", "remaining_time": "4:58:09"} +{"current_steps": 2301, "total_steps": 5676, "loss": 0.7853089570999146, "lr": 1.3859108201610236e-05, "epoch": 0.8109251101321586, "percentage": 40.54, "elapsed_time": "3:23:16", "remaining_time": "4:58:09"} +{"current_steps": 2302, "total_steps": 5676, "loss": 0.8346991539001465, "lr": 1.3853732486420413e-05, "epoch": 0.8112775330396476, "percentage": 40.56, "elapsed_time": "3:23:21", "remaining_time": "4:58:02"} +{"current_steps": 2303, "total_steps": 5676, "loss": 0.5493819117546082, "lr": 1.3848355463008344e-05, "epoch": 0.8116299559471366, "percentage": 40.57, "elapsed_time": "3:23:26", "remaining_time": "4:57:57"} +{"current_steps": 2304, "total_steps": 5676, "loss": 0.7474828958511353, "lr": 1.3842977133199363e-05, "epoch": 0.8119823788546255, "percentage": 40.59, "elapsed_time": "3:23:31", "remaining_time": "4:57:52"} +{"current_steps": 2305, "total_steps": 5676, "loss": 0.6599621772766113, "lr": 1.3837597498819242e-05, "epoch": 0.8123348017621146, "percentage": 40.61, "elapsed_time": "3:23:37", "remaining_time": "4:57:47"} +{"current_steps": 2306, "total_steps": 5676, "loss": 0.6751214861869812, "lr": 1.38322165616942e-05, "epoch": 0.8126872246696035, "percentage": 40.63, "elapsed_time": "3:23:42", "remaining_time": "4:57:42"} +{"current_steps": 2307, "total_steps": 5676, "loss": 0.7450453042984009, "lr": 1.3826834323650899e-05, "epoch": 0.8130396475770925, "percentage": 40.64, "elapsed_time": "3:23:47", "remaining_time": "4:57:36"} +{"current_steps": 2308, "total_steps": 5676, "loss": 0.7015345692634583, "lr": 1.382145078651644e-05, "epoch": 0.8133920704845815, "percentage": 40.66, "elapsed_time": "3:23:51", "remaining_time": "4:57:29"} +{"current_steps": 2309, "total_steps": 5676, "loss": 0.7161329984664917, "lr": 1.3816065952118368e-05, "epoch": 0.8137444933920704, "percentage": 40.68, "elapsed_time": "3:23:57", "remaining_time": "4:57:24"} +{"current_steps": 2310, "total_steps": 5676, "loss": 0.771783709526062, "lr": 1.3810679822284665e-05, "epoch": 0.8140969162995595, "percentage": 40.7, "elapsed_time": "3:24:03", "remaining_time": "4:57:21"} +{"current_steps": 2311, "total_steps": 5676, "loss": 0.6710794568061829, "lr": 1.3805292398843755e-05, "epoch": 0.8144493392070484, "percentage": 40.72, "elapsed_time": "3:24:08", "remaining_time": "4:57:14"} +{"current_steps": 2312, "total_steps": 5676, "loss": 0.6614924669265747, "lr": 1.3799903683624503e-05, "epoch": 0.8148017621145375, "percentage": 40.73, "elapsed_time": "3:24:13", "remaining_time": "4:57:08"} +{"current_steps": 2313, "total_steps": 5676, "loss": 0.6432225704193115, "lr": 1.3794513678456203e-05, "epoch": 0.8151541850220264, "percentage": 40.75, "elapsed_time": "3:24:18", "remaining_time": "4:57:02"} +{"current_steps": 2314, "total_steps": 5676, "loss": 0.6228311061859131, "lr": 1.3789122385168604e-05, "epoch": 0.8155066079295155, "percentage": 40.77, "elapsed_time": "3:24:23", "remaining_time": "4:56:57"} +{"current_steps": 2315, "total_steps": 5676, "loss": 0.5597498416900635, "lr": 1.3783729805591875e-05, "epoch": 0.8158590308370044, "percentage": 40.79, "elapsed_time": "3:24:29", "remaining_time": "4:56:53"} +{"current_steps": 2316, "total_steps": 5676, "loss": 0.7651177048683167, "lr": 1.3778335941556629e-05, "epoch": 0.8162114537444934, "percentage": 40.8, "elapsed_time": "3:24:34", "remaining_time": "4:56:47"} +{"current_steps": 2317, "total_steps": 5676, "loss": 0.5482406616210938, "lr": 1.3772940794893916e-05, "epoch": 0.8165638766519824, "percentage": 40.82, "elapsed_time": "3:24:38", "remaining_time": "4:56:40"} +{"current_steps": 2318, "total_steps": 5676, "loss": 0.767236590385437, "lr": 1.3767544367435229e-05, "epoch": 0.8169162995594713, "percentage": 40.84, "elapsed_time": "3:24:43", "remaining_time": "4:56:35"} +{"current_steps": 2319, "total_steps": 5676, "loss": 0.705253541469574, "lr": 1.3762146661012471e-05, "epoch": 0.8172687224669604, "percentage": 40.86, "elapsed_time": "3:24:49", "remaining_time": "4:56:29"} +{"current_steps": 2320, "total_steps": 5676, "loss": 0.7800463438034058, "lr": 1.3756747677458008e-05, "epoch": 0.8176211453744493, "percentage": 40.87, "elapsed_time": "3:24:54", "remaining_time": "4:56:25"} +{"current_steps": 2321, "total_steps": 5676, "loss": 0.7615088224411011, "lr": 1.3751347418604623e-05, "epoch": 0.8179735682819383, "percentage": 40.89, "elapsed_time": "3:24:59", "remaining_time": "4:56:19"} +{"current_steps": 2322, "total_steps": 5676, "loss": 0.8004297614097595, "lr": 1.3745945886285536e-05, "epoch": 0.8183259911894273, "percentage": 40.91, "elapsed_time": "3:25:03", "remaining_time": "4:56:12"} +{"current_steps": 2323, "total_steps": 5676, "loss": 0.6428912281990051, "lr": 1.3740543082334399e-05, "epoch": 0.8186784140969163, "percentage": 40.93, "elapsed_time": "3:25:09", "remaining_time": "4:56:06"} +{"current_steps": 2324, "total_steps": 5676, "loss": 0.6702802777290344, "lr": 1.3735139008585294e-05, "epoch": 0.8190308370044053, "percentage": 40.94, "elapsed_time": "3:25:14", "remaining_time": "4:56:01"} +{"current_steps": 2325, "total_steps": 5676, "loss": 0.6003440022468567, "lr": 1.3729733666872736e-05, "epoch": 0.8193832599118943, "percentage": 40.96, "elapsed_time": "3:25:19", "remaining_time": "4:55:56"} +{"current_steps": 2326, "total_steps": 5676, "loss": 0.8264240622520447, "lr": 1.3724327059031677e-05, "epoch": 0.8197356828193832, "percentage": 40.98, "elapsed_time": "3:25:23", "remaining_time": "4:55:49"} +{"current_steps": 2327, "total_steps": 5676, "loss": 0.6974462866783142, "lr": 1.3718919186897481e-05, "epoch": 0.8200881057268723, "percentage": 41.0, "elapsed_time": "3:25:30", "remaining_time": "4:55:45"} +{"current_steps": 2328, "total_steps": 5676, "loss": 0.8273947238922119, "lr": 1.3713510052305962e-05, "epoch": 0.8204405286343612, "percentage": 41.01, "elapsed_time": "3:25:34", "remaining_time": "4:55:39"} +{"current_steps": 2329, "total_steps": 5676, "loss": 0.6230529546737671, "lr": 1.3708099657093348e-05, "epoch": 0.8207929515418502, "percentage": 41.03, "elapsed_time": "3:25:39", "remaining_time": "4:55:32"} +{"current_steps": 2330, "total_steps": 5676, "loss": 0.6997084021568298, "lr": 1.37026880030963e-05, "epoch": 0.8211453744493392, "percentage": 41.05, "elapsed_time": "3:25:44", "remaining_time": "4:55:26"} +{"current_steps": 2331, "total_steps": 5676, "loss": 0.7212036848068237, "lr": 1.3697275092151908e-05, "epoch": 0.8214977973568282, "percentage": 41.07, "elapsed_time": "3:25:49", "remaining_time": "4:55:21"} +{"current_steps": 2332, "total_steps": 5676, "loss": 0.7758737206459045, "lr": 1.3691860926097685e-05, "epoch": 0.8218502202643172, "percentage": 41.09, "elapsed_time": "3:25:53", "remaining_time": "4:55:14"} +{"current_steps": 2333, "total_steps": 5676, "loss": 0.62369704246521, "lr": 1.368644550677157e-05, "epoch": 0.8222026431718061, "percentage": 41.1, "elapsed_time": "3:25:57", "remaining_time": "4:55:07"} +{"current_steps": 2334, "total_steps": 5676, "loss": 0.8051841855049133, "lr": 1.3681028836011935e-05, "epoch": 0.8225550660792952, "percentage": 41.12, "elapsed_time": "3:26:02", "remaining_time": "4:55:01"} +{"current_steps": 2335, "total_steps": 5676, "loss": 0.6087243556976318, "lr": 1.3675610915657568e-05, "epoch": 0.8229074889867841, "percentage": 41.14, "elapsed_time": "3:26:07", "remaining_time": "4:54:56"} +{"current_steps": 2336, "total_steps": 5676, "loss": 0.6949581503868103, "lr": 1.3670191747547685e-05, "epoch": 0.8232599118942732, "percentage": 41.16, "elapsed_time": "3:26:11", "remaining_time": "4:54:48"} +{"current_steps": 2337, "total_steps": 5676, "loss": 0.5621528029441833, "lr": 1.3664771333521922e-05, "epoch": 0.8236123348017621, "percentage": 41.17, "elapsed_time": "3:26:16", "remaining_time": "4:54:43"} +{"current_steps": 2338, "total_steps": 5676, "loss": 0.8731498718261719, "lr": 1.3659349675420346e-05, "epoch": 0.8239647577092511, "percentage": 41.19, "elapsed_time": "3:26:22", "remaining_time": "4:54:39"} +{"current_steps": 2339, "total_steps": 5676, "loss": 0.6997240781784058, "lr": 1.3653926775083437e-05, "epoch": 0.8243171806167401, "percentage": 41.21, "elapsed_time": "3:26:27", "remaining_time": "4:54:32"} +{"current_steps": 2340, "total_steps": 5676, "loss": 0.8061426877975464, "lr": 1.3648502634352104e-05, "epoch": 0.824669603524229, "percentage": 41.23, "elapsed_time": "3:26:32", "remaining_time": "4:54:26"} +{"current_steps": 2341, "total_steps": 5676, "loss": 0.6186845302581787, "lr": 1.3643077255067667e-05, "epoch": 0.8250220264317181, "percentage": 41.24, "elapsed_time": "3:26:36", "remaining_time": "4:54:20"} +{"current_steps": 2342, "total_steps": 5676, "loss": 0.8098937273025513, "lr": 1.3637650639071884e-05, "epoch": 0.825374449339207, "percentage": 41.26, "elapsed_time": "3:26:42", "remaining_time": "4:54:16"} +{"current_steps": 2343, "total_steps": 5676, "loss": 0.5810271501541138, "lr": 1.3632222788206916e-05, "epoch": 0.825726872246696, "percentage": 41.28, "elapsed_time": "3:26:49", "remaining_time": "4:54:12"} +{"current_steps": 2344, "total_steps": 5676, "loss": 0.48309600353240967, "lr": 1.3626793704315348e-05, "epoch": 0.826079295154185, "percentage": 41.3, "elapsed_time": "3:26:54", "remaining_time": "4:54:06"} +{"current_steps": 2345, "total_steps": 5676, "loss": 0.7366980314254761, "lr": 1.3621363389240188e-05, "epoch": 0.826431718061674, "percentage": 41.31, "elapsed_time": "3:26:58", "remaining_time": "4:54:00"} +{"current_steps": 2346, "total_steps": 5676, "loss": 0.6572252511978149, "lr": 1.3615931844824859e-05, "epoch": 0.826784140969163, "percentage": 41.33, "elapsed_time": "3:27:04", "remaining_time": "4:53:55"} +{"current_steps": 2347, "total_steps": 5676, "loss": 0.6776653528213501, "lr": 1.3610499072913204e-05, "epoch": 0.827136563876652, "percentage": 41.35, "elapsed_time": "3:27:10", "remaining_time": "4:53:50"} +{"current_steps": 2348, "total_steps": 5676, "loss": 0.6536053419113159, "lr": 1.3605065075349473e-05, "epoch": 0.827488986784141, "percentage": 41.37, "elapsed_time": "3:27:14", "remaining_time": "4:53:44"} +{"current_steps": 2349, "total_steps": 5676, "loss": 0.7000117301940918, "lr": 1.3599629853978342e-05, "epoch": 0.82784140969163, "percentage": 41.38, "elapsed_time": "3:27:18", "remaining_time": "4:53:37"} +{"current_steps": 2350, "total_steps": 5676, "loss": 0.6480045318603516, "lr": 1.3594193410644902e-05, "epoch": 0.8281938325991189, "percentage": 41.4, "elapsed_time": "3:27:24", "remaining_time": "4:53:33"} +{"current_steps": 2351, "total_steps": 5676, "loss": 0.6428179740905762, "lr": 1.3588755747194656e-05, "epoch": 0.8285462555066079, "percentage": 41.42, "elapsed_time": "3:27:29", "remaining_time": "4:53:26"} +{"current_steps": 2352, "total_steps": 5676, "loss": 0.618633508682251, "lr": 1.3583316865473517e-05, "epoch": 0.8288986784140969, "percentage": 41.44, "elapsed_time": "3:27:34", "remaining_time": "4:53:22"} +{"current_steps": 2353, "total_steps": 5676, "loss": 0.7289671897888184, "lr": 1.357787676732782e-05, "epoch": 0.8292511013215859, "percentage": 41.46, "elapsed_time": "3:27:40", "remaining_time": "4:53:17"} +{"current_steps": 2354, "total_steps": 5676, "loss": 0.6969538927078247, "lr": 1.3572435454604307e-05, "epoch": 0.8296035242290749, "percentage": 41.47, "elapsed_time": "3:27:45", "remaining_time": "4:53:11"} +{"current_steps": 2355, "total_steps": 5676, "loss": 0.8490859270095825, "lr": 1.3566992929150137e-05, "epoch": 0.8299559471365638, "percentage": 41.49, "elapsed_time": "3:27:50", "remaining_time": "4:53:06"} +{"current_steps": 2356, "total_steps": 5676, "loss": 0.6883271336555481, "lr": 1.3561549192812877e-05, "epoch": 0.8303083700440529, "percentage": 41.51, "elapsed_time": "3:27:56", "remaining_time": "4:53:01"} +{"current_steps": 2357, "total_steps": 5676, "loss": 0.68092280626297, "lr": 1.3556104247440504e-05, "epoch": 0.8306607929515418, "percentage": 41.53, "elapsed_time": "3:28:01", "remaining_time": "4:52:55"} +{"current_steps": 2358, "total_steps": 5676, "loss": 0.7077454924583435, "lr": 1.3550658094881413e-05, "epoch": 0.8310132158590309, "percentage": 41.54, "elapsed_time": "3:28:07", "remaining_time": "4:52:52"} +{"current_steps": 2359, "total_steps": 5676, "loss": 0.7364591360092163, "lr": 1.3545210736984393e-05, "epoch": 0.8313656387665198, "percentage": 41.56, "elapsed_time": "3:28:12", "remaining_time": "4:52:45"} +{"current_steps": 2360, "total_steps": 5676, "loss": 0.8047930002212524, "lr": 1.3539762175598666e-05, "epoch": 0.8317180616740089, "percentage": 41.58, "elapsed_time": "3:28:17", "remaining_time": "4:52:40"} +{"current_steps": 2361, "total_steps": 5676, "loss": 0.7717781066894531, "lr": 1.3534312412573836e-05, "epoch": 0.8320704845814978, "percentage": 41.6, "elapsed_time": "3:28:22", "remaining_time": "4:52:34"} +{"current_steps": 2362, "total_steps": 5676, "loss": 0.7228613495826721, "lr": 1.3528861449759938e-05, "epoch": 0.8324229074889867, "percentage": 41.61, "elapsed_time": "3:28:29", "remaining_time": "4:52:31"} +{"current_steps": 2363, "total_steps": 5676, "loss": 0.8025436401367188, "lr": 1.3523409289007399e-05, "epoch": 0.8327753303964758, "percentage": 41.63, "elapsed_time": "3:28:33", "remaining_time": "4:52:24"} +{"current_steps": 2364, "total_steps": 5676, "loss": 0.6653664112091064, "lr": 1.3517955932167057e-05, "epoch": 0.8331277533039647, "percentage": 41.65, "elapsed_time": "3:28:38", "remaining_time": "4:52:18"} +{"current_steps": 2365, "total_steps": 5676, "loss": 0.709527313709259, "lr": 1.3512501381090158e-05, "epoch": 0.8334801762114538, "percentage": 41.67, "elapsed_time": "3:28:44", "remaining_time": "4:52:14"} +{"current_steps": 2366, "total_steps": 5676, "loss": 0.7317520380020142, "lr": 1.3507045637628355e-05, "epoch": 0.8338325991189427, "percentage": 41.68, "elapsed_time": "3:28:49", "remaining_time": "4:52:08"} +{"current_steps": 2367, "total_steps": 5676, "loss": 0.734069287776947, "lr": 1.3501588703633703e-05, "epoch": 0.8341850220264317, "percentage": 41.7, "elapsed_time": "3:28:54", "remaining_time": "4:52:02"} +{"current_steps": 2368, "total_steps": 5676, "loss": 0.5950552225112915, "lr": 1.349613058095866e-05, "epoch": 0.8345374449339207, "percentage": 41.72, "elapsed_time": "3:28:59", "remaining_time": "4:51:57"} +{"current_steps": 2369, "total_steps": 5676, "loss": 0.5958857536315918, "lr": 1.3490671271456084e-05, "epoch": 0.8348898678414097, "percentage": 41.74, "elapsed_time": "3:29:04", "remaining_time": "4:51:51"} +{"current_steps": 2370, "total_steps": 5676, "loss": 0.7094449996948242, "lr": 1.348521077697925e-05, "epoch": 0.8352422907488987, "percentage": 41.75, "elapsed_time": "3:29:09", "remaining_time": "4:51:45"} +{"current_steps": 2371, "total_steps": 5676, "loss": 0.6260385513305664, "lr": 1.3479749099381818e-05, "epoch": 0.8355947136563877, "percentage": 41.77, "elapsed_time": "3:29:15", "remaining_time": "4:51:40"} +{"current_steps": 2372, "total_steps": 5676, "loss": 0.65608811378479, "lr": 1.3474286240517862e-05, "epoch": 0.8359471365638766, "percentage": 41.79, "elapsed_time": "3:29:20", "remaining_time": "4:51:35"} +{"current_steps": 2373, "total_steps": 5676, "loss": 0.6942586898803711, "lr": 1.346882220224185e-05, "epoch": 0.8362995594713656, "percentage": 41.81, "elapsed_time": "3:29:26", "remaining_time": "4:51:30"} +{"current_steps": 2374, "total_steps": 5676, "loss": 0.6831374168395996, "lr": 1.3463356986408653e-05, "epoch": 0.8366519823788546, "percentage": 41.83, "elapsed_time": "3:29:31", "remaining_time": "4:51:26"} +{"current_steps": 2375, "total_steps": 5676, "loss": 0.6363790035247803, "lr": 1.3457890594873546e-05, "epoch": 0.8370044052863436, "percentage": 41.84, "elapsed_time": "3:29:37", "remaining_time": "4:51:21"} +{"current_steps": 2376, "total_steps": 5676, "loss": 0.698935866355896, "lr": 1.3452423029492194e-05, "epoch": 0.8373568281938326, "percentage": 41.86, "elapsed_time": "3:29:42", "remaining_time": "4:51:16"} +{"current_steps": 2377, "total_steps": 5676, "loss": 0.8569005727767944, "lr": 1.3446954292120667e-05, "epoch": 0.8377092511013216, "percentage": 41.88, "elapsed_time": "3:29:48", "remaining_time": "4:51:11"} +{"current_steps": 2378, "total_steps": 5676, "loss": 0.8461613655090332, "lr": 1.3441484384615428e-05, "epoch": 0.8380616740088106, "percentage": 41.9, "elapsed_time": "3:29:53", "remaining_time": "4:51:05"} +{"current_steps": 2379, "total_steps": 5676, "loss": 0.7481078505516052, "lr": 1.343601330883335e-05, "epoch": 0.8384140969162995, "percentage": 41.91, "elapsed_time": "3:29:57", "remaining_time": "4:50:58"} +{"current_steps": 2380, "total_steps": 5676, "loss": 0.5632544755935669, "lr": 1.343054106663168e-05, "epoch": 0.8387665198237886, "percentage": 41.93, "elapsed_time": "3:30:04", "remaining_time": "4:50:55"} +{"current_steps": 2381, "total_steps": 5676, "loss": 0.528980016708374, "lr": 1.3425067659868084e-05, "epoch": 0.8391189427312775, "percentage": 41.95, "elapsed_time": "3:30:10", "remaining_time": "4:50:50"} +{"current_steps": 2382, "total_steps": 5676, "loss": 0.5520849227905273, "lr": 1.341959309040061e-05, "epoch": 0.8394713656387666, "percentage": 41.97, "elapsed_time": "3:30:14", "remaining_time": "4:50:44"} +{"current_steps": 2383, "total_steps": 5676, "loss": 0.569744348526001, "lr": 1.34141173600877e-05, "epoch": 0.8398237885462555, "percentage": 41.98, "elapsed_time": "3:30:21", "remaining_time": "4:50:41"} +{"current_steps": 2384, "total_steps": 5676, "loss": 0.595065712928772, "lr": 1.3408640470788202e-05, "epoch": 0.8401762114537445, "percentage": 42.0, "elapsed_time": "3:30:28", "remaining_time": "4:50:38"} +{"current_steps": 2385, "total_steps": 5676, "loss": 0.6993277072906494, "lr": 1.3403162424361342e-05, "epoch": 0.8405286343612335, "percentage": 42.02, "elapsed_time": "3:30:33", "remaining_time": "4:50:32"} +{"current_steps": 2386, "total_steps": 5676, "loss": 0.6183342933654785, "lr": 1.3397683222666748e-05, "epoch": 0.8408810572687224, "percentage": 42.04, "elapsed_time": "3:30:39", "remaining_time": "4:50:27"} +{"current_steps": 2387, "total_steps": 5676, "loss": 0.7280797362327576, "lr": 1.339220286756444e-05, "epoch": 0.8412334801762115, "percentage": 42.05, "elapsed_time": "3:30:44", "remaining_time": "4:50:21"} +{"current_steps": 2388, "total_steps": 5676, "loss": 0.7377837896347046, "lr": 1.3386721360914829e-05, "epoch": 0.8415859030837004, "percentage": 42.07, "elapsed_time": "3:30:49", "remaining_time": "4:50:17"} +{"current_steps": 2389, "total_steps": 5676, "loss": 0.7202758193016052, "lr": 1.3381238704578718e-05, "epoch": 0.8419383259911895, "percentage": 42.09, "elapsed_time": "3:30:55", "remaining_time": "4:50:12"} +{"current_steps": 2390, "total_steps": 5676, "loss": 0.5899994969367981, "lr": 1.3375754900417291e-05, "epoch": 0.8422907488986784, "percentage": 42.11, "elapsed_time": "3:31:00", "remaining_time": "4:50:06"} +{"current_steps": 2391, "total_steps": 5676, "loss": 0.8128558993339539, "lr": 1.3370269950292133e-05, "epoch": 0.8426431718061674, "percentage": 42.12, "elapsed_time": "3:31:04", "remaining_time": "4:49:59"} +{"current_steps": 2392, "total_steps": 5676, "loss": 0.8222962617874146, "lr": 1.3364783856065213e-05, "epoch": 0.8429955947136564, "percentage": 42.14, "elapsed_time": "3:31:10", "remaining_time": "4:49:54"} +{"current_steps": 2393, "total_steps": 5676, "loss": 0.7898896932601929, "lr": 1.3359296619598894e-05, "epoch": 0.8433480176211454, "percentage": 42.16, "elapsed_time": "3:31:14", "remaining_time": "4:49:48"} +{"current_steps": 2394, "total_steps": 5676, "loss": 0.6596726179122925, "lr": 1.3353808242755912e-05, "epoch": 0.8437004405286344, "percentage": 42.18, "elapsed_time": "3:31:19", "remaining_time": "4:49:42"} +{"current_steps": 2395, "total_steps": 5676, "loss": 0.8073080778121948, "lr": 1.3348318727399411e-05, "epoch": 0.8440528634361234, "percentage": 42.2, "elapsed_time": "3:31:23", "remaining_time": "4:49:35"} +{"current_steps": 2396, "total_steps": 5676, "loss": 0.6640043258666992, "lr": 1.3342828075392902e-05, "epoch": 0.8444052863436123, "percentage": 42.21, "elapsed_time": "3:31:28", "remaining_time": "4:49:29"} +{"current_steps": 2397, "total_steps": 5676, "loss": 0.6067632436752319, "lr": 1.3337336288600297e-05, "epoch": 0.8447577092511013, "percentage": 42.23, "elapsed_time": "3:31:33", "remaining_time": "4:49:24"} +{"current_steps": 2398, "total_steps": 5676, "loss": 0.6891398429870605, "lr": 1.3331843368885882e-05, "epoch": 0.8451101321585903, "percentage": 42.25, "elapsed_time": "3:31:38", "remaining_time": "4:49:18"} +{"current_steps": 2399, "total_steps": 5676, "loss": 0.6007423996925354, "lr": 1.3326349318114335e-05, "epoch": 0.8454625550660793, "percentage": 42.27, "elapsed_time": "3:31:44", "remaining_time": "4:49:14"} +{"current_steps": 2400, "total_steps": 5676, "loss": 0.7314017415046692, "lr": 1.3320854138150712e-05, "epoch": 0.8458149779735683, "percentage": 42.28, "elapsed_time": "3:31:50", "remaining_time": "4:49:09"} +{"current_steps": 2401, "total_steps": 5676, "loss": 0.7352335453033447, "lr": 1.3315357830860461e-05, "epoch": 0.8461674008810572, "percentage": 42.3, "elapsed_time": "3:32:00", "remaining_time": "4:49:11"} +{"current_steps": 2402, "total_steps": 5676, "loss": 0.6546785831451416, "lr": 1.3309860398109402e-05, "epoch": 0.8465198237885463, "percentage": 42.32, "elapsed_time": "3:32:07", "remaining_time": "4:49:08"} +{"current_steps": 2403, "total_steps": 5676, "loss": 0.590252697467804, "lr": 1.3304361841763746e-05, "epoch": 0.8468722466960352, "percentage": 42.34, "elapsed_time": "3:32:13", "remaining_time": "4:49:03"} +{"current_steps": 2404, "total_steps": 5676, "loss": 0.6864089369773865, "lr": 1.3298862163690078e-05, "epoch": 0.8472246696035243, "percentage": 42.35, "elapsed_time": "3:32:18", "remaining_time": "4:48:58"} +{"current_steps": 2405, "total_steps": 5676, "loss": 0.7818390130996704, "lr": 1.3293361365755373e-05, "epoch": 0.8475770925110132, "percentage": 42.37, "elapsed_time": "3:32:23", "remaining_time": "4:48:52"} +{"current_steps": 2406, "total_steps": 5676, "loss": 0.7461166381835938, "lr": 1.3287859449826977e-05, "epoch": 0.8479295154185023, "percentage": 42.39, "elapsed_time": "3:32:29", "remaining_time": "4:48:47"} +{"current_steps": 2407, "total_steps": 5676, "loss": 0.7519750595092773, "lr": 1.3282356417772618e-05, "epoch": 0.8482819383259912, "percentage": 42.41, "elapsed_time": "3:32:33", "remaining_time": "4:48:41"} +{"current_steps": 2408, "total_steps": 5676, "loss": 0.7041791081428528, "lr": 1.3276852271460406e-05, "epoch": 0.8486343612334801, "percentage": 42.42, "elapsed_time": "3:32:38", "remaining_time": "4:48:34"} +{"current_steps": 2409, "total_steps": 5676, "loss": 0.5649428367614746, "lr": 1.327134701275883e-05, "epoch": 0.8489867841409692, "percentage": 42.44, "elapsed_time": "3:32:42", "remaining_time": "4:48:28"} +{"current_steps": 2410, "total_steps": 5676, "loss": 0.6607545614242554, "lr": 1.3265840643536746e-05, "epoch": 0.8493392070484581, "percentage": 42.46, "elapsed_time": "3:32:47", "remaining_time": "4:48:22"} +{"current_steps": 2411, "total_steps": 5676, "loss": 0.7393547892570496, "lr": 1.3260333165663406e-05, "epoch": 0.8496916299559472, "percentage": 42.48, "elapsed_time": "3:32:52", "remaining_time": "4:48:16"} +{"current_steps": 2412, "total_steps": 5676, "loss": 0.6550742387771606, "lr": 1.325482458100842e-05, "epoch": 0.8500440528634361, "percentage": 42.49, "elapsed_time": "3:32:57", "remaining_time": "4:48:10"} +{"current_steps": 2413, "total_steps": 5676, "loss": 0.5104576349258423, "lr": 1.324931489144178e-05, "epoch": 0.8503964757709251, "percentage": 42.51, "elapsed_time": "3:33:02", "remaining_time": "4:48:05"} +{"current_steps": 2414, "total_steps": 5676, "loss": 0.7679733037948608, "lr": 1.3243804098833859e-05, "epoch": 0.8507488986784141, "percentage": 42.53, "elapsed_time": "3:33:07", "remaining_time": "4:48:00"} +{"current_steps": 2415, "total_steps": 5676, "loss": 0.6516377925872803, "lr": 1.3238292205055397e-05, "epoch": 0.8511013215859031, "percentage": 42.55, "elapsed_time": "3:33:12", "remaining_time": "4:47:54"} +{"current_steps": 2416, "total_steps": 5676, "loss": 0.8509281277656555, "lr": 1.3232779211977509e-05, "epoch": 0.8514537444933921, "percentage": 42.57, "elapsed_time": "3:33:17", "remaining_time": "4:47:48"} +{"current_steps": 2417, "total_steps": 5676, "loss": 0.5643317103385925, "lr": 1.3227265121471691e-05, "epoch": 0.8518061674008811, "percentage": 42.58, "elapsed_time": "3:33:21", "remaining_time": "4:47:41"} +{"current_steps": 2418, "total_steps": 5676, "loss": 0.5294302105903625, "lr": 1.3221749935409798e-05, "epoch": 0.85215859030837, "percentage": 42.6, "elapsed_time": "3:33:27", "remaining_time": "4:47:36"} +{"current_steps": 2419, "total_steps": 5676, "loss": 0.6301594972610474, "lr": 1.3216233655664067e-05, "epoch": 0.852511013215859, "percentage": 42.62, "elapsed_time": "3:33:32", "remaining_time": "4:47:31"} +{"current_steps": 2420, "total_steps": 5676, "loss": 0.6930294036865234, "lr": 1.32107162841071e-05, "epoch": 0.852863436123348, "percentage": 42.64, "elapsed_time": "3:33:38", "remaining_time": "4:47:26"} +{"current_steps": 2421, "total_steps": 5676, "loss": 0.6266883611679077, "lr": 1.3205197822611876e-05, "epoch": 0.853215859030837, "percentage": 42.65, "elapsed_time": "3:33:43", "remaining_time": "4:47:20"} +{"current_steps": 2422, "total_steps": 5676, "loss": 0.7789868116378784, "lr": 1.3199678273051743e-05, "epoch": 0.853568281938326, "percentage": 42.67, "elapsed_time": "3:33:47", "remaining_time": "4:47:13"} +{"current_steps": 2423, "total_steps": 5676, "loss": 0.7148274779319763, "lr": 1.3194157637300416e-05, "epoch": 0.853920704845815, "percentage": 42.69, "elapsed_time": "3:33:53", "remaining_time": "4:47:09"} +{"current_steps": 2424, "total_steps": 5676, "loss": 0.550403356552124, "lr": 1.3188635917231972e-05, "epoch": 0.854273127753304, "percentage": 42.71, "elapsed_time": "3:33:58", "remaining_time": "4:47:03"} +{"current_steps": 2425, "total_steps": 5676, "loss": 0.6650338768959045, "lr": 1.3183113114720872e-05, "epoch": 0.8546255506607929, "percentage": 42.72, "elapsed_time": "3:34:02", "remaining_time": "4:46:57"} +{"current_steps": 2426, "total_steps": 5676, "loss": 0.7774436473846436, "lr": 1.317758923164193e-05, "epoch": 0.854977973568282, "percentage": 42.74, "elapsed_time": "3:34:07", "remaining_time": "4:46:51"} +{"current_steps": 2427, "total_steps": 5676, "loss": 0.6192025542259216, "lr": 1.3172064269870335e-05, "epoch": 0.8553303964757709, "percentage": 42.76, "elapsed_time": "3:34:13", "remaining_time": "4:46:46"} +{"current_steps": 2428, "total_steps": 5676, "loss": 0.6758309602737427, "lr": 1.3166538231281635e-05, "epoch": 0.85568281938326, "percentage": 42.78, "elapsed_time": "3:34:17", "remaining_time": "4:46:39"} +{"current_steps": 2429, "total_steps": 5676, "loss": 0.7311116456985474, "lr": 1.3161011117751756e-05, "epoch": 0.8560352422907489, "percentage": 42.79, "elapsed_time": "3:34:23", "remaining_time": "4:46:35"} +{"current_steps": 2430, "total_steps": 5676, "loss": 0.7525666952133179, "lr": 1.3155482931156977e-05, "epoch": 0.8563876651982378, "percentage": 42.81, "elapsed_time": "3:34:27", "remaining_time": "4:46:28"} +{"current_steps": 2431, "total_steps": 5676, "loss": 0.6903671026229858, "lr": 1.3149953673373945e-05, "epoch": 0.8567400881057269, "percentage": 42.83, "elapsed_time": "3:34:31", "remaining_time": "4:46:21"} +{"current_steps": 2432, "total_steps": 5676, "loss": 0.6036638021469116, "lr": 1.314442334627967e-05, "epoch": 0.8570925110132158, "percentage": 42.85, "elapsed_time": "3:34:38", "remaining_time": "4:46:18"} +{"current_steps": 2433, "total_steps": 5676, "loss": 0.6490209698677063, "lr": 1.3138891951751526e-05, "epoch": 0.8574449339207049, "percentage": 42.86, "elapsed_time": "3:34:42", "remaining_time": "4:46:11"} +{"current_steps": 2434, "total_steps": 5676, "loss": 0.692024290561676, "lr": 1.3133359491667252e-05, "epoch": 0.8577973568281938, "percentage": 42.88, "elapsed_time": "3:34:48", "remaining_time": "4:46:06"} +{"current_steps": 2435, "total_steps": 5676, "loss": 0.6977943181991577, "lr": 1.3127825967904944e-05, "epoch": 0.8581497797356828, "percentage": 42.9, "elapsed_time": "3:34:53", "remaining_time": "4:46:01"} +{"current_steps": 2436, "total_steps": 5676, "loss": 0.625649094581604, "lr": 1.312229138234306e-05, "epoch": 0.8585022026431718, "percentage": 42.92, "elapsed_time": "3:34:58", "remaining_time": "4:45:55"} +{"current_steps": 2437, "total_steps": 5676, "loss": 0.671939492225647, "lr": 1.3116755736860422e-05, "epoch": 0.8588546255506608, "percentage": 42.94, "elapsed_time": "3:35:03", "remaining_time": "4:45:49"} +{"current_steps": 2438, "total_steps": 5676, "loss": 0.700029194355011, "lr": 1.3111219033336211e-05, "epoch": 0.8592070484581498, "percentage": 42.95, "elapsed_time": "3:35:07", "remaining_time": "4:45:43"} +{"current_steps": 2439, "total_steps": 5676, "loss": 0.6339718699455261, "lr": 1.3105681273649959e-05, "epoch": 0.8595594713656388, "percentage": 42.97, "elapsed_time": "3:35:14", "remaining_time": "4:45:39"} +{"current_steps": 2440, "total_steps": 5676, "loss": 0.7105488777160645, "lr": 1.3100142459681569e-05, "epoch": 0.8599118942731278, "percentage": 42.99, "elapsed_time": "3:35:20", "remaining_time": "4:45:34"} +{"current_steps": 2441, "total_steps": 5676, "loss": 0.616797924041748, "lr": 1.3094602593311294e-05, "epoch": 0.8602643171806167, "percentage": 43.01, "elapsed_time": "3:35:26", "remaining_time": "4:45:31"} +{"current_steps": 2442, "total_steps": 5676, "loss": 0.7167524099349976, "lr": 1.3089061676419746e-05, "epoch": 0.8606167400881057, "percentage": 43.02, "elapsed_time": "3:35:30", "remaining_time": "4:45:24"} +{"current_steps": 2443, "total_steps": 5676, "loss": 0.5499090552330017, "lr": 1.3083519710887895e-05, "epoch": 0.8609691629955947, "percentage": 43.04, "elapsed_time": "3:35:35", "remaining_time": "4:45:18"} +{"current_steps": 2444, "total_steps": 5676, "loss": 0.5764151811599731, "lr": 1.3077976698597064e-05, "epoch": 0.8613215859030837, "percentage": 43.06, "elapsed_time": "3:35:40", "remaining_time": "4:45:12"} +{"current_steps": 2445, "total_steps": 5676, "loss": 0.7171419858932495, "lr": 1.3072432641428931e-05, "epoch": 0.8616740088105727, "percentage": 43.08, "elapsed_time": "3:35:46", "remaining_time": "4:45:07"} +{"current_steps": 2446, "total_steps": 5676, "loss": 0.7546026706695557, "lr": 1.3066887541265539e-05, "epoch": 0.8620264317180617, "percentage": 43.09, "elapsed_time": "3:35:51", "remaining_time": "4:45:02"} +{"current_steps": 2447, "total_steps": 5676, "loss": 0.5884296298027039, "lr": 1.306134139998927e-05, "epoch": 0.8623788546255506, "percentage": 43.11, "elapsed_time": "3:35:57", "remaining_time": "4:44:58"} +{"current_steps": 2448, "total_steps": 5676, "loss": 0.6877926588058472, "lr": 1.3055794219482867e-05, "epoch": 0.8627312775330397, "percentage": 43.13, "elapsed_time": "3:36:04", "remaining_time": "4:44:54"} +{"current_steps": 2449, "total_steps": 5676, "loss": 0.598037838935852, "lr": 1.3050246001629425e-05, "epoch": 0.8630837004405286, "percentage": 43.15, "elapsed_time": "3:36:09", "remaining_time": "4:44:49"} +{"current_steps": 2450, "total_steps": 5676, "loss": 0.6560795307159424, "lr": 1.3044696748312395e-05, "epoch": 0.8634361233480177, "percentage": 43.16, "elapsed_time": "3:36:14", "remaining_time": "4:44:43"} +{"current_steps": 2451, "total_steps": 5676, "loss": 0.7130829691886902, "lr": 1.3039146461415575e-05, "epoch": 0.8637885462555066, "percentage": 43.18, "elapsed_time": "3:36:19", "remaining_time": "4:44:38"} +{"current_steps": 2452, "total_steps": 5676, "loss": 0.7402251958847046, "lr": 1.303359514282311e-05, "epoch": 0.8641409691629955, "percentage": 43.2, "elapsed_time": "3:36:23", "remaining_time": "4:44:31"} +{"current_steps": 2453, "total_steps": 5676, "loss": 0.6610683798789978, "lr": 1.3028042794419502e-05, "epoch": 0.8644933920704846, "percentage": 43.22, "elapsed_time": "3:36:28", "remaining_time": "4:44:26"} +{"current_steps": 2454, "total_steps": 5676, "loss": 0.84892737865448, "lr": 1.3022489418089606e-05, "epoch": 0.8648458149779735, "percentage": 43.23, "elapsed_time": "3:36:34", "remaining_time": "4:44:21"} +{"current_steps": 2455, "total_steps": 5676, "loss": 0.7285948991775513, "lr": 1.3016935015718612e-05, "epoch": 0.8651982378854626, "percentage": 43.25, "elapsed_time": "3:36:41", "remaining_time": "4:44:17"} +{"current_steps": 2456, "total_steps": 5676, "loss": 0.6800004839897156, "lr": 1.3011379589192074e-05, "epoch": 0.8655506607929515, "percentage": 43.27, "elapsed_time": "3:36:46", "remaining_time": "4:44:12"} +{"current_steps": 2457, "total_steps": 5676, "loss": 0.618618369102478, "lr": 1.3005823140395878e-05, "epoch": 0.8659030837004406, "percentage": 43.29, "elapsed_time": "3:36:52", "remaining_time": "4:44:07"} +{"current_steps": 2458, "total_steps": 5676, "loss": 0.7657757997512817, "lr": 1.3000265671216278e-05, "epoch": 0.8662555066079295, "percentage": 43.31, "elapsed_time": "3:36:58", "remaining_time": "4:44:03"} +{"current_steps": 2459, "total_steps": 5676, "loss": 0.7814151644706726, "lr": 1.2994707183539848e-05, "epoch": 0.8666079295154185, "percentage": 43.32, "elapsed_time": "3:37:02", "remaining_time": "4:43:57"} +{"current_steps": 2460, "total_steps": 5676, "loss": 0.6494930982589722, "lr": 1.2989147679253531e-05, "epoch": 0.8669603524229075, "percentage": 43.34, "elapsed_time": "3:37:08", "remaining_time": "4:43:52"} +{"current_steps": 2461, "total_steps": 5676, "loss": 0.6498425006866455, "lr": 1.2983587160244602e-05, "epoch": 0.8673127753303965, "percentage": 43.36, "elapsed_time": "3:37:14", "remaining_time": "4:43:48"} +{"current_steps": 2462, "total_steps": 5676, "loss": 0.635313093662262, "lr": 1.2978025628400684e-05, "epoch": 0.8676651982378855, "percentage": 43.38, "elapsed_time": "3:37:20", "remaining_time": "4:43:43"} +{"current_steps": 2463, "total_steps": 5676, "loss": 0.6892971992492676, "lr": 1.2972463085609744e-05, "epoch": 0.8680176211453744, "percentage": 43.39, "elapsed_time": "3:37:25", "remaining_time": "4:43:38"} +{"current_steps": 2464, "total_steps": 5676, "loss": 0.691922128200531, "lr": 1.2966899533760095e-05, "epoch": 0.8683700440528634, "percentage": 43.41, "elapsed_time": "3:37:29", "remaining_time": "4:43:31"} +{"current_steps": 2465, "total_steps": 5676, "loss": 0.5764378309249878, "lr": 1.2961334974740386e-05, "epoch": 0.8687224669603524, "percentage": 43.43, "elapsed_time": "3:37:34", "remaining_time": "4:43:24"} +{"current_steps": 2466, "total_steps": 5676, "loss": 0.8193005919456482, "lr": 1.2955769410439616e-05, "epoch": 0.8690748898678414, "percentage": 43.45, "elapsed_time": "3:37:40", "remaining_time": "4:43:20"} +{"current_steps": 2467, "total_steps": 5676, "loss": 0.6141501665115356, "lr": 1.2950202842747115e-05, "epoch": 0.8694273127753304, "percentage": 43.46, "elapsed_time": "3:37:46", "remaining_time": "4:43:16"} +{"current_steps": 2468, "total_steps": 5676, "loss": 0.7464454174041748, "lr": 1.2944635273552565e-05, "epoch": 0.8697797356828194, "percentage": 43.48, "elapsed_time": "3:37:52", "remaining_time": "4:43:11"} +{"current_steps": 2469, "total_steps": 5676, "loss": 0.5970025062561035, "lr": 1.293906670474598e-05, "epoch": 0.8701321585903083, "percentage": 43.5, "elapsed_time": "3:37:57", "remaining_time": "4:43:06"} +{"current_steps": 2470, "total_steps": 5676, "loss": 0.7247673273086548, "lr": 1.2933497138217714e-05, "epoch": 0.8704845814977974, "percentage": 43.52, "elapsed_time": "3:38:02", "remaining_time": "4:43:00"} +{"current_steps": 2471, "total_steps": 5676, "loss": 0.746272087097168, "lr": 1.2927926575858463e-05, "epoch": 0.8708370044052863, "percentage": 43.53, "elapsed_time": "3:38:07", "remaining_time": "4:42:54"} +{"current_steps": 2472, "total_steps": 5676, "loss": 0.6918776035308838, "lr": 1.2922355019559265e-05, "epoch": 0.8711894273127754, "percentage": 43.55, "elapsed_time": "3:38:11", "remaining_time": "4:42:48"} +{"current_steps": 2473, "total_steps": 5676, "loss": 0.6056039929389954, "lr": 1.2916782471211478e-05, "epoch": 0.8715418502202643, "percentage": 43.57, "elapsed_time": "3:38:16", "remaining_time": "4:42:42"} +{"current_steps": 2474, "total_steps": 5676, "loss": 0.6699481010437012, "lr": 1.2911208932706821e-05, "epoch": 0.8718942731277532, "percentage": 43.59, "elapsed_time": "3:38:21", "remaining_time": "4:42:36"} +{"current_steps": 2475, "total_steps": 5676, "loss": 0.5141814947128296, "lr": 1.2905634405937327e-05, "epoch": 0.8722466960352423, "percentage": 43.6, "elapsed_time": "3:38:26", "remaining_time": "4:42:31"} +{"current_steps": 2476, "total_steps": 5676, "loss": 0.7521284818649292, "lr": 1.2900058892795383e-05, "epoch": 0.8725991189427312, "percentage": 43.62, "elapsed_time": "3:38:32", "remaining_time": "4:42:26"} +{"current_steps": 2477, "total_steps": 5676, "loss": 0.6878937482833862, "lr": 1.2894482395173695e-05, "epoch": 0.8729515418502203, "percentage": 43.64, "elapsed_time": "3:38:38", "remaining_time": "4:42:21"} +{"current_steps": 2478, "total_steps": 5676, "loss": 0.5963379144668579, "lr": 1.2888904914965317e-05, "epoch": 0.8733039647577092, "percentage": 43.66, "elapsed_time": "3:38:44", "remaining_time": "4:42:17"} +{"current_steps": 2479, "total_steps": 5676, "loss": 0.7572320103645325, "lr": 1.2883326454063623e-05, "epoch": 0.8736563876651983, "percentage": 43.68, "elapsed_time": "3:38:49", "remaining_time": "4:42:12"} +{"current_steps": 2480, "total_steps": 5676, "loss": 0.7047982215881348, "lr": 1.2877747014362334e-05, "epoch": 0.8740088105726872, "percentage": 43.69, "elapsed_time": "3:38:55", "remaining_time": "4:42:07"} +{"current_steps": 2481, "total_steps": 5676, "loss": 0.6449024677276611, "lr": 1.2872166597755488e-05, "epoch": 0.8743612334801762, "percentage": 43.71, "elapsed_time": "3:38:59", "remaining_time": "4:42:01"} +{"current_steps": 2482, "total_steps": 5676, "loss": 0.7590922117233276, "lr": 1.2866585206137469e-05, "epoch": 0.8747136563876652, "percentage": 43.73, "elapsed_time": "3:39:05", "remaining_time": "4:41:56"} +{"current_steps": 2483, "total_steps": 5676, "loss": 0.7534210085868835, "lr": 1.2861002841402983e-05, "epoch": 0.8750660792951542, "percentage": 43.75, "elapsed_time": "3:39:10", "remaining_time": "4:41:50"} +{"current_steps": 2484, "total_steps": 5676, "loss": 0.7091225385665894, "lr": 1.2855419505447073e-05, "epoch": 0.8754185022026432, "percentage": 43.76, "elapsed_time": "3:39:15", "remaining_time": "4:41:45"} +{"current_steps": 2485, "total_steps": 5676, "loss": 0.7578933835029602, "lr": 1.2849835200165104e-05, "epoch": 0.8757709251101321, "percentage": 43.78, "elapsed_time": "3:39:21", "remaining_time": "4:41:40"} +{"current_steps": 2486, "total_steps": 5676, "loss": 0.5938349962234497, "lr": 1.2844249927452771e-05, "epoch": 0.8761233480176212, "percentage": 43.8, "elapsed_time": "3:39:27", "remaining_time": "4:41:35"} +{"current_steps": 2487, "total_steps": 5676, "loss": 0.5726315379142761, "lr": 1.2838663689206108e-05, "epoch": 0.8764757709251101, "percentage": 43.82, "elapsed_time": "3:39:32", "remaining_time": "4:41:30"} +{"current_steps": 2488, "total_steps": 5676, "loss": 0.8181554079055786, "lr": 1.2833076487321465e-05, "epoch": 0.8768281938325991, "percentage": 43.83, "elapsed_time": "3:39:36", "remaining_time": "4:41:24"} +{"current_steps": 2489, "total_steps": 5676, "loss": 0.7465275526046753, "lr": 1.2827488323695522e-05, "epoch": 0.8771806167400881, "percentage": 43.85, "elapsed_time": "3:39:41", "remaining_time": "4:41:18"} +{"current_steps": 2490, "total_steps": 5676, "loss": 0.6083456873893738, "lr": 1.2821899200225288e-05, "epoch": 0.8775330396475771, "percentage": 43.87, "elapsed_time": "3:39:47", "remaining_time": "4:41:13"} +{"current_steps": 2491, "total_steps": 5676, "loss": 0.6393307447433472, "lr": 1.2816309118808095e-05, "epoch": 0.877885462555066, "percentage": 43.89, "elapsed_time": "3:39:52", "remaining_time": "4:41:07"} +{"current_steps": 2492, "total_steps": 5676, "loss": 0.6562504768371582, "lr": 1.2810718081341604e-05, "epoch": 0.8782378854625551, "percentage": 43.9, "elapsed_time": "3:39:58", "remaining_time": "4:41:03"} +{"current_steps": 2493, "total_steps": 5676, "loss": 0.6737300753593445, "lr": 1.2805126089723798e-05, "epoch": 0.878590308370044, "percentage": 43.92, "elapsed_time": "3:40:03", "remaining_time": "4:40:57"} +{"current_steps": 2494, "total_steps": 5676, "loss": 0.6246815919876099, "lr": 1.2799533145852982e-05, "epoch": 0.8789427312775331, "percentage": 43.94, "elapsed_time": "3:40:08", "remaining_time": "4:40:52"} +{"current_steps": 2495, "total_steps": 5676, "loss": 0.7499577403068542, "lr": 1.2793939251627788e-05, "epoch": 0.879295154185022, "percentage": 43.96, "elapsed_time": "3:40:13", "remaining_time": "4:40:45"} +{"current_steps": 2496, "total_steps": 5676, "loss": 0.7645655870437622, "lr": 1.2788344408947171e-05, "epoch": 0.8796475770925111, "percentage": 43.97, "elapsed_time": "3:40:17", "remaining_time": "4:40:40"} +{"current_steps": 2497, "total_steps": 5676, "loss": 0.7407524585723877, "lr": 1.27827486197104e-05, "epoch": 0.88, "percentage": 43.99, "elapsed_time": "3:40:22", "remaining_time": "4:40:34"} +{"current_steps": 2498, "total_steps": 5676, "loss": 0.6401108503341675, "lr": 1.2777151885817078e-05, "epoch": 0.8803524229074889, "percentage": 44.01, "elapsed_time": "3:40:27", "remaining_time": "4:40:28"} +{"current_steps": 2499, "total_steps": 5676, "loss": 0.8332269191741943, "lr": 1.2771554209167116e-05, "epoch": 0.880704845814978, "percentage": 44.03, "elapsed_time": "3:40:32", "remaining_time": "4:40:22"} +{"current_steps": 2500, "total_steps": 5676, "loss": 0.7677830457687378, "lr": 1.2765955591660757e-05, "epoch": 0.8810572687224669, "percentage": 44.05, "elapsed_time": "3:40:37", "remaining_time": "4:40:16"} +{"current_steps": 2501, "total_steps": 5676, "loss": 0.8532943725585938, "lr": 1.2760356035198553e-05, "epoch": 0.881409691629956, "percentage": 44.06, "elapsed_time": "3:40:46", "remaining_time": "4:40:16"} +{"current_steps": 2502, "total_steps": 5676, "loss": 0.6287009716033936, "lr": 1.2754755541681384e-05, "epoch": 0.8817621145374449, "percentage": 44.08, "elapsed_time": "3:40:51", "remaining_time": "4:40:11"} +{"current_steps": 2503, "total_steps": 5676, "loss": 0.7039133310317993, "lr": 1.2749154113010432e-05, "epoch": 0.882114537444934, "percentage": 44.1, "elapsed_time": "3:40:56", "remaining_time": "4:40:05"} +{"current_steps": 2504, "total_steps": 5676, "loss": 0.6959357857704163, "lr": 1.2743551751087222e-05, "epoch": 0.8824669603524229, "percentage": 44.12, "elapsed_time": "3:41:02", "remaining_time": "4:40:00"} +{"current_steps": 2505, "total_steps": 5676, "loss": 0.6862938404083252, "lr": 1.2737948457813571e-05, "epoch": 0.8828193832599119, "percentage": 44.13, "elapsed_time": "3:41:07", "remaining_time": "4:39:54"} +{"current_steps": 2506, "total_steps": 5676, "loss": 0.6903352737426758, "lr": 1.273234423509163e-05, "epoch": 0.8831718061674009, "percentage": 44.15, "elapsed_time": "3:41:12", "remaining_time": "4:39:49"} +{"current_steps": 2507, "total_steps": 5676, "loss": 0.7226145267486572, "lr": 1.2726739084823851e-05, "epoch": 0.8835242290748899, "percentage": 44.17, "elapsed_time": "3:41:18", "remaining_time": "4:39:44"} +{"current_steps": 2508, "total_steps": 5676, "loss": 0.7865043878555298, "lr": 1.2721133008913015e-05, "epoch": 0.8838766519823789, "percentage": 44.19, "elapsed_time": "3:41:23", "remaining_time": "4:39:39"} +{"current_steps": 2509, "total_steps": 5676, "loss": 0.6594572067260742, "lr": 1.2715526009262209e-05, "epoch": 0.8842290748898678, "percentage": 44.2, "elapsed_time": "3:41:28", "remaining_time": "4:39:33"} +{"current_steps": 2510, "total_steps": 5676, "loss": 0.5967481136322021, "lr": 1.270991808777483e-05, "epoch": 0.8845814977973568, "percentage": 44.22, "elapsed_time": "3:41:33", "remaining_time": "4:39:28"} +{"current_steps": 2511, "total_steps": 5676, "loss": 0.7843632698059082, "lr": 1.2704309246354599e-05, "epoch": 0.8849339207048458, "percentage": 44.24, "elapsed_time": "3:41:38", "remaining_time": "4:39:22"} +{"current_steps": 2512, "total_steps": 5676, "loss": 0.7475506067276001, "lr": 1.2698699486905538e-05, "epoch": 0.8852863436123348, "percentage": 44.26, "elapsed_time": "3:41:43", "remaining_time": "4:39:16"} +{"current_steps": 2513, "total_steps": 5676, "loss": 0.8082534670829773, "lr": 1.2693088811331987e-05, "epoch": 0.8856387665198238, "percentage": 44.27, "elapsed_time": "3:41:48", "remaining_time": "4:39:10"} +{"current_steps": 2514, "total_steps": 5676, "loss": 0.7421785593032837, "lr": 1.2687477221538598e-05, "epoch": 0.8859911894273128, "percentage": 44.29, "elapsed_time": "3:41:52", "remaining_time": "4:39:03"} +{"current_steps": 2515, "total_steps": 5676, "loss": 0.6268718242645264, "lr": 1.2681864719430328e-05, "epoch": 0.8863436123348017, "percentage": 44.31, "elapsed_time": "3:41:56", "remaining_time": "4:38:57"} +{"current_steps": 2516, "total_steps": 5676, "loss": 0.7285459041595459, "lr": 1.2676251306912448e-05, "epoch": 0.8866960352422908, "percentage": 44.33, "elapsed_time": "3:42:01", "remaining_time": "4:38:51"} +{"current_steps": 2517, "total_steps": 5676, "loss": 0.6132184267044067, "lr": 1.2670636985890542e-05, "epoch": 0.8870484581497797, "percentage": 44.34, "elapsed_time": "3:42:06", "remaining_time": "4:38:45"} +{"current_steps": 2518, "total_steps": 5676, "loss": 0.5550754070281982, "lr": 1.2665021758270488e-05, "epoch": 0.8874008810572688, "percentage": 44.36, "elapsed_time": "3:42:12", "remaining_time": "4:38:41"} +{"current_steps": 2519, "total_steps": 5676, "loss": 0.5357390642166138, "lr": 1.2659405625958488e-05, "epoch": 0.8877533039647577, "percentage": 44.38, "elapsed_time": "3:42:19", "remaining_time": "4:38:37"} +{"current_steps": 2520, "total_steps": 5676, "loss": 0.5858328342437744, "lr": 1.2653788590861039e-05, "epoch": 0.8881057268722466, "percentage": 44.4, "elapsed_time": "3:42:24", "remaining_time": "4:38:32"} +{"current_steps": 2521, "total_steps": 5676, "loss": 0.7109283208847046, "lr": 1.2648170654884955e-05, "epoch": 0.8884581497797357, "percentage": 44.42, "elapsed_time": "3:42:30", "remaining_time": "4:38:27"} +{"current_steps": 2522, "total_steps": 5676, "loss": 0.808137834072113, "lr": 1.2642551819937348e-05, "epoch": 0.8888105726872246, "percentage": 44.43, "elapsed_time": "3:42:35", "remaining_time": "4:38:22"} +{"current_steps": 2523, "total_steps": 5676, "loss": 0.587998628616333, "lr": 1.2636932087925637e-05, "epoch": 0.8891629955947137, "percentage": 44.45, "elapsed_time": "3:42:40", "remaining_time": "4:38:16"} +{"current_steps": 2524, "total_steps": 5676, "loss": 0.5555537343025208, "lr": 1.2631311460757545e-05, "epoch": 0.8895154185022026, "percentage": 44.47, "elapsed_time": "3:42:44", "remaining_time": "4:38:10"} +{"current_steps": 2525, "total_steps": 5676, "loss": 0.641632080078125, "lr": 1.2625689940341102e-05, "epoch": 0.8898678414096917, "percentage": 44.49, "elapsed_time": "3:42:51", "remaining_time": "4:38:06"} +{"current_steps": 2526, "total_steps": 5676, "loss": 0.7005184888839722, "lr": 1.262006752858464e-05, "epoch": 0.8902202643171806, "percentage": 44.5, "elapsed_time": "3:42:57", "remaining_time": "4:38:01"} +{"current_steps": 2527, "total_steps": 5676, "loss": 0.6907261610031128, "lr": 1.2614444227396792e-05, "epoch": 0.8905726872246696, "percentage": 44.52, "elapsed_time": "3:43:01", "remaining_time": "4:37:55"} +{"current_steps": 2528, "total_steps": 5676, "loss": 0.5757718086242676, "lr": 1.2608820038686492e-05, "epoch": 0.8909251101321586, "percentage": 44.54, "elapsed_time": "3:43:05", "remaining_time": "4:37:48"} +{"current_steps": 2529, "total_steps": 5676, "loss": 0.6462569832801819, "lr": 1.2603194964362979e-05, "epoch": 0.8912775330396476, "percentage": 44.56, "elapsed_time": "3:43:11", "remaining_time": "4:37:43"} +{"current_steps": 2530, "total_steps": 5676, "loss": 0.7028999328613281, "lr": 1.2597569006335787e-05, "epoch": 0.8916299559471366, "percentage": 44.57, "elapsed_time": "3:43:15", "remaining_time": "4:37:37"} +{"current_steps": 2531, "total_steps": 5676, "loss": 0.5789325833320618, "lr": 1.2591942166514763e-05, "epoch": 0.8919823788546255, "percentage": 44.59, "elapsed_time": "3:43:21", "remaining_time": "4:37:32"} +{"current_steps": 2532, "total_steps": 5676, "loss": 0.6545255184173584, "lr": 1.258631444681003e-05, "epoch": 0.8923348017621145, "percentage": 44.61, "elapsed_time": "3:43:26", "remaining_time": "4:37:27"} +{"current_steps": 2533, "total_steps": 5676, "loss": 0.7227469682693481, "lr": 1.258068584913204e-05, "epoch": 0.8926872246696035, "percentage": 44.63, "elapsed_time": "3:43:30", "remaining_time": "4:37:20"} +{"current_steps": 2534, "total_steps": 5676, "loss": 0.5985771417617798, "lr": 1.2575056375391513e-05, "epoch": 0.8930396475770925, "percentage": 44.64, "elapsed_time": "3:43:36", "remaining_time": "4:37:16"} +{"current_steps": 2535, "total_steps": 5676, "loss": 0.6705960035324097, "lr": 1.2569426027499485e-05, "epoch": 0.8933920704845815, "percentage": 44.66, "elapsed_time": "3:43:40", "remaining_time": "4:37:09"} +{"current_steps": 2536, "total_steps": 5676, "loss": 0.6662027835845947, "lr": 1.2563794807367284e-05, "epoch": 0.8937444933920705, "percentage": 44.68, "elapsed_time": "3:43:46", "remaining_time": "4:37:03"} +{"current_steps": 2537, "total_steps": 5676, "loss": 0.7742453813552856, "lr": 1.2558162716906537e-05, "epoch": 0.8940969162995595, "percentage": 44.7, "elapsed_time": "3:43:52", "remaining_time": "4:36:59"} +{"current_steps": 2538, "total_steps": 5676, "loss": 0.6124528050422668, "lr": 1.255252975802916e-05, "epoch": 0.8944493392070485, "percentage": 44.71, "elapsed_time": "3:43:56", "remaining_time": "4:36:52"} +{"current_steps": 2539, "total_steps": 5676, "loss": 0.5728615522384644, "lr": 1.2546895932647365e-05, "epoch": 0.8948017621145374, "percentage": 44.73, "elapsed_time": "3:44:01", "remaining_time": "4:36:47"} +{"current_steps": 2540, "total_steps": 5676, "loss": 0.6347167491912842, "lr": 1.2541261242673665e-05, "epoch": 0.8951541850220265, "percentage": 44.75, "elapsed_time": "3:44:05", "remaining_time": "4:36:40"} +{"current_steps": 2541, "total_steps": 5676, "loss": 0.6350656747817993, "lr": 1.2535625690020861e-05, "epoch": 0.8955066079295154, "percentage": 44.77, "elapsed_time": "3:44:11", "remaining_time": "4:36:35"} +{"current_steps": 2542, "total_steps": 5676, "loss": 0.7538303732872009, "lr": 1.2529989276602043e-05, "epoch": 0.8958590308370044, "percentage": 44.79, "elapsed_time": "3:44:16", "remaining_time": "4:36:30"} +{"current_steps": 2543, "total_steps": 5676, "loss": 0.8154318928718567, "lr": 1.2524352004330607e-05, "epoch": 0.8962114537444934, "percentage": 44.8, "elapsed_time": "3:44:21", "remaining_time": "4:36:25"} +{"current_steps": 2544, "total_steps": 5676, "loss": 0.5313037633895874, "lr": 1.2518713875120222e-05, "epoch": 0.8965638766519823, "percentage": 44.82, "elapsed_time": "3:44:26", "remaining_time": "4:36:18"} +{"current_steps": 2545, "total_steps": 5676, "loss": 0.740921139717102, "lr": 1.2513074890884864e-05, "epoch": 0.8969162995594714, "percentage": 44.84, "elapsed_time": "3:44:31", "remaining_time": "4:36:13"} +{"current_steps": 2546, "total_steps": 5676, "loss": 0.6079888343811035, "lr": 1.250743505353879e-05, "epoch": 0.8972687224669603, "percentage": 44.86, "elapsed_time": "3:44:35", "remaining_time": "4:36:07"} +{"current_steps": 2547, "total_steps": 5676, "loss": 0.46736663579940796, "lr": 1.2501794364996553e-05, "epoch": 0.8976211453744494, "percentage": 44.87, "elapsed_time": "3:44:41", "remaining_time": "4:36:02"} +{"current_steps": 2548, "total_steps": 5676, "loss": 0.5670880079269409, "lr": 1.2496152827172982e-05, "epoch": 0.8979735682819383, "percentage": 44.89, "elapsed_time": "3:44:46", "remaining_time": "4:35:56"} +{"current_steps": 2549, "total_steps": 5676, "loss": 0.7845931649208069, "lr": 1.2490510441983212e-05, "epoch": 0.8983259911894274, "percentage": 44.91, "elapsed_time": "3:44:52", "remaining_time": "4:35:51"} +{"current_steps": 2550, "total_steps": 5676, "loss": 0.5625143647193909, "lr": 1.2484867211342653e-05, "epoch": 0.8986784140969163, "percentage": 44.93, "elapsed_time": "3:44:57", "remaining_time": "4:35:46"} +{"current_steps": 2551, "total_steps": 5676, "loss": 0.6631217002868652, "lr": 1.2479223137167011e-05, "epoch": 0.8990308370044053, "percentage": 44.94, "elapsed_time": "3:45:01", "remaining_time": "4:35:39"} +{"current_steps": 2552, "total_steps": 5676, "loss": 0.6588548421859741, "lr": 1.247357822137227e-05, "epoch": 0.8993832599118943, "percentage": 44.96, "elapsed_time": "3:45:06", "remaining_time": "4:35:33"} +{"current_steps": 2553, "total_steps": 5676, "loss": 0.8046029806137085, "lr": 1.24679324658747e-05, "epoch": 0.8997356828193832, "percentage": 44.98, "elapsed_time": "3:45:12", "remaining_time": "4:35:29"} +{"current_steps": 2554, "total_steps": 5676, "loss": 0.6651894450187683, "lr": 1.2462285872590862e-05, "epoch": 0.9000881057268723, "percentage": 45.0, "elapsed_time": "3:45:17", "remaining_time": "4:35:24"} +{"current_steps": 2555, "total_steps": 5676, "loss": 0.5888474583625793, "lr": 1.2456638443437605e-05, "epoch": 0.9004405286343612, "percentage": 45.01, "elapsed_time": "3:45:23", "remaining_time": "4:35:19"} +{"current_steps": 2556, "total_steps": 5676, "loss": 0.5915735363960266, "lr": 1.2450990180332045e-05, "epoch": 0.9007929515418502, "percentage": 45.03, "elapsed_time": "3:45:27", "remaining_time": "4:35:13"} +{"current_steps": 2557, "total_steps": 5676, "loss": 0.6830431222915649, "lr": 1.24453410851916e-05, "epoch": 0.9011453744493392, "percentage": 45.05, "elapsed_time": "3:45:33", "remaining_time": "4:35:07"} +{"current_steps": 2558, "total_steps": 5676, "loss": 0.6812379956245422, "lr": 1.2439691159933955e-05, "epoch": 0.9014977973568282, "percentage": 45.07, "elapsed_time": "3:45:39", "remaining_time": "4:35:04"} +{"current_steps": 2559, "total_steps": 5676, "loss": 0.6887152791023254, "lr": 1.2434040406477092e-05, "epoch": 0.9018502202643172, "percentage": 45.08, "elapsed_time": "3:45:45", "remaining_time": "4:34:59"} +{"current_steps": 2560, "total_steps": 5676, "loss": 0.677071213722229, "lr": 1.2428388826739254e-05, "epoch": 0.9022026431718062, "percentage": 45.1, "elapsed_time": "3:45:50", "remaining_time": "4:34:53"} +{"current_steps": 2561, "total_steps": 5676, "loss": 0.6635652780532837, "lr": 1.242273642263899e-05, "epoch": 0.9025550660792951, "percentage": 45.12, "elapsed_time": "3:45:55", "remaining_time": "4:34:47"} +{"current_steps": 2562, "total_steps": 5676, "loss": 0.7543712854385376, "lr": 1.2417083196095105e-05, "epoch": 0.9029074889867842, "percentage": 45.14, "elapsed_time": "3:45:59", "remaining_time": "4:34:40"} +{"current_steps": 2563, "total_steps": 5676, "loss": 0.6219073534011841, "lr": 1.2411429149026701e-05, "epoch": 0.9032599118942731, "percentage": 45.16, "elapsed_time": "3:46:04", "remaining_time": "4:34:35"} +{"current_steps": 2564, "total_steps": 5676, "loss": 0.6576533317565918, "lr": 1.2405774283353144e-05, "epoch": 0.9036123348017621, "percentage": 45.17, "elapsed_time": "3:46:09", "remaining_time": "4:34:29"} +{"current_steps": 2565, "total_steps": 5676, "loss": 0.6458585262298584, "lr": 1.240011860099409e-05, "epoch": 0.9039647577092511, "percentage": 45.19, "elapsed_time": "3:46:13", "remaining_time": "4:34:23"} +{"current_steps": 2566, "total_steps": 5676, "loss": 0.6943198442459106, "lr": 1.2394462103869464e-05, "epoch": 0.90431718061674, "percentage": 45.21, "elapsed_time": "3:46:19", "remaining_time": "4:34:17"} +{"current_steps": 2567, "total_steps": 5676, "loss": 0.6684235334396362, "lr": 1.2388804793899473e-05, "epoch": 0.9046696035242291, "percentage": 45.23, "elapsed_time": "3:46:24", "remaining_time": "4:34:12"} +{"current_steps": 2568, "total_steps": 5676, "loss": 0.6707017421722412, "lr": 1.2383146673004598e-05, "epoch": 0.905022026431718, "percentage": 45.24, "elapsed_time": "3:46:29", "remaining_time": "4:34:07"} +{"current_steps": 2569, "total_steps": 5676, "loss": 0.6009544134140015, "lr": 1.2377487743105593e-05, "epoch": 0.9053744493392071, "percentage": 45.26, "elapsed_time": "3:46:34", "remaining_time": "4:34:01"} +{"current_steps": 2570, "total_steps": 5676, "loss": 0.57770836353302, "lr": 1.2371828006123488e-05, "epoch": 0.905726872246696, "percentage": 45.28, "elapsed_time": "3:46:40", "remaining_time": "4:33:57"} +{"current_steps": 2571, "total_steps": 5676, "loss": 0.6146866083145142, "lr": 1.236616746397959e-05, "epoch": 0.9060792951541851, "percentage": 45.3, "elapsed_time": "3:46:43", "remaining_time": "4:33:49"} +{"current_steps": 2572, "total_steps": 5676, "loss": 0.6374951601028442, "lr": 1.2360506118595476e-05, "epoch": 0.906431718061674, "percentage": 45.31, "elapsed_time": "3:46:49", "remaining_time": "4:33:44"} +{"current_steps": 2573, "total_steps": 5676, "loss": 0.6933800578117371, "lr": 1.2354843971892998e-05, "epoch": 0.906784140969163, "percentage": 45.33, "elapsed_time": "3:46:54", "remaining_time": "4:33:38"} +{"current_steps": 2574, "total_steps": 5676, "loss": 0.857126772403717, "lr": 1.2349181025794278e-05, "epoch": 0.907136563876652, "percentage": 45.35, "elapsed_time": "3:46:59", "remaining_time": "4:33:32"} +{"current_steps": 2575, "total_steps": 5676, "loss": 0.7316192388534546, "lr": 1.2343517282221704e-05, "epoch": 0.9074889867841409, "percentage": 45.37, "elapsed_time": "3:47:04", "remaining_time": "4:33:27"} +{"current_steps": 2576, "total_steps": 5676, "loss": 0.7916824817657471, "lr": 1.2337852743097947e-05, "epoch": 0.90784140969163, "percentage": 45.38, "elapsed_time": "3:47:07", "remaining_time": "4:33:19"} +{"current_steps": 2577, "total_steps": 5676, "loss": 0.6437021493911743, "lr": 1.2332187410345941e-05, "epoch": 0.9081938325991189, "percentage": 45.4, "elapsed_time": "3:47:11", "remaining_time": "4:33:13"} +{"current_steps": 2578, "total_steps": 5676, "loss": 0.8788109421730042, "lr": 1.2326521285888892e-05, "epoch": 0.908546255506608, "percentage": 45.42, "elapsed_time": "3:47:16", "remaining_time": "4:33:07"} +{"current_steps": 2579, "total_steps": 5676, "loss": 0.704395055770874, "lr": 1.2320854371650268e-05, "epoch": 0.9088986784140969, "percentage": 45.44, "elapsed_time": "3:47:22", "remaining_time": "4:33:02"} +{"current_steps": 2580, "total_steps": 5676, "loss": 0.6579844951629639, "lr": 1.2315186669553814e-05, "epoch": 0.9092511013215859, "percentage": 45.45, "elapsed_time": "3:47:28", "remaining_time": "4:32:57"} +{"current_steps": 2581, "total_steps": 5676, "loss": 0.6329941749572754, "lr": 1.2309518181523537e-05, "epoch": 0.9096035242290749, "percentage": 45.47, "elapsed_time": "3:47:33", "remaining_time": "4:32:52"} +{"current_steps": 2582, "total_steps": 5676, "loss": 0.8868603706359863, "lr": 1.2303848909483711e-05, "epoch": 0.9099559471365639, "percentage": 45.49, "elapsed_time": "3:47:38", "remaining_time": "4:32:46"} +{"current_steps": 2583, "total_steps": 5676, "loss": 0.6402688026428223, "lr": 1.2298178855358875e-05, "epoch": 0.9103083700440529, "percentage": 45.51, "elapsed_time": "3:47:43", "remaining_time": "4:32:41"} +{"current_steps": 2584, "total_steps": 5676, "loss": 0.8017194271087646, "lr": 1.2292508021073846e-05, "epoch": 0.9106607929515419, "percentage": 45.53, "elapsed_time": "3:47:49", "remaining_time": "4:32:36"} +{"current_steps": 2585, "total_steps": 5676, "loss": 0.7396517992019653, "lr": 1.2286836408553687e-05, "epoch": 0.9110132158590308, "percentage": 45.54, "elapsed_time": "3:47:53", "remaining_time": "4:32:29"} +{"current_steps": 2586, "total_steps": 5676, "loss": 0.6123272776603699, "lr": 1.2281164019723737e-05, "epoch": 0.9113656387665198, "percentage": 45.56, "elapsed_time": "3:47:59", "remaining_time": "4:32:25"} +{"current_steps": 2587, "total_steps": 5676, "loss": 0.7675807476043701, "lr": 1.2275490856509591e-05, "epoch": 0.9117180616740088, "percentage": 45.58, "elapsed_time": "3:48:04", "remaining_time": "4:32:20"} +{"current_steps": 2588, "total_steps": 5676, "loss": 0.6814998388290405, "lr": 1.2269816920837121e-05, "epoch": 0.9120704845814978, "percentage": 45.6, "elapsed_time": "3:48:09", "remaining_time": "4:32:14"} +{"current_steps": 2589, "total_steps": 5676, "loss": 0.6290348768234253, "lr": 1.2264142214632441e-05, "epoch": 0.9124229074889868, "percentage": 45.61, "elapsed_time": "3:48:15", "remaining_time": "4:32:10"} +{"current_steps": 2590, "total_steps": 5676, "loss": 0.6752464175224304, "lr": 1.2258466739821946e-05, "epoch": 0.9127753303964757, "percentage": 45.63, "elapsed_time": "3:48:20", "remaining_time": "4:32:04"} +{"current_steps": 2591, "total_steps": 5676, "loss": 0.6153687238693237, "lr": 1.2252790498332275e-05, "epoch": 0.9131277533039648, "percentage": 45.65, "elapsed_time": "3:48:25", "remaining_time": "4:31:58"} +{"current_steps": 2592, "total_steps": 5676, "loss": 0.5952479839324951, "lr": 1.2247113492090344e-05, "epoch": 0.9134801762114537, "percentage": 45.67, "elapsed_time": "3:48:30", "remaining_time": "4:31:52"} +{"current_steps": 2593, "total_steps": 5676, "loss": 0.5457659959793091, "lr": 1.2241435723023309e-05, "epoch": 0.9138325991189428, "percentage": 45.68, "elapsed_time": "3:48:35", "remaining_time": "4:31:47"} +{"current_steps": 2594, "total_steps": 5676, "loss": 0.7373491525650024, "lr": 1.2235757193058607e-05, "epoch": 0.9141850220264317, "percentage": 45.7, "elapsed_time": "3:48:40", "remaining_time": "4:31:41"} +{"current_steps": 2595, "total_steps": 5676, "loss": 0.6564488410949707, "lr": 1.2230077904123914e-05, "epoch": 0.9145374449339208, "percentage": 45.72, "elapsed_time": "3:48:46", "remaining_time": "4:31:37"} +{"current_steps": 2596, "total_steps": 5676, "loss": 0.6790947914123535, "lr": 1.2224397858147176e-05, "epoch": 0.9148898678414097, "percentage": 45.74, "elapsed_time": "3:48:51", "remaining_time": "4:31:31"} +{"current_steps": 2597, "total_steps": 5676, "loss": 0.8304486274719238, "lr": 1.2218717057056592e-05, "epoch": 0.9152422907488986, "percentage": 45.75, "elapsed_time": "3:48:56", "remaining_time": "4:31:25"} +{"current_steps": 2598, "total_steps": 5676, "loss": 0.7452701330184937, "lr": 1.2213035502780616e-05, "epoch": 0.9155947136563877, "percentage": 45.77, "elapsed_time": "3:49:00", "remaining_time": "4:31:19"} +{"current_steps": 2599, "total_steps": 5676, "loss": 0.572200357913971, "lr": 1.2207353197247957e-05, "epoch": 0.9159471365638766, "percentage": 45.79, "elapsed_time": "3:49:07", "remaining_time": "4:31:15"} +{"current_steps": 2600, "total_steps": 5676, "loss": 0.7142342925071716, "lr": 1.2201670142387587e-05, "epoch": 0.9162995594713657, "percentage": 45.81, "elapsed_time": "3:49:12", "remaining_time": "4:31:10"} +{"current_steps": 2601, "total_steps": 5676, "loss": 0.9390528202056885, "lr": 1.219598634012872e-05, "epoch": 0.9166519823788546, "percentage": 45.82, "elapsed_time": "3:49:22", "remaining_time": "4:31:10"} +{"current_steps": 2602, "total_steps": 5676, "loss": 0.6897540092468262, "lr": 1.2190301792400832e-05, "epoch": 0.9170044052863436, "percentage": 45.84, "elapsed_time": "3:49:28", "remaining_time": "4:31:06"} +{"current_steps": 2603, "total_steps": 5676, "loss": 0.7309582233428955, "lr": 1.2184616501133649e-05, "epoch": 0.9173568281938326, "percentage": 45.86, "elapsed_time": "3:49:33", "remaining_time": "4:31:00"} +{"current_steps": 2604, "total_steps": 5676, "loss": 0.7692370414733887, "lr": 1.2178930468257154e-05, "epoch": 0.9177092511013216, "percentage": 45.88, "elapsed_time": "3:49:38", "remaining_time": "4:30:55"} +{"current_steps": 2605, "total_steps": 5676, "loss": 0.7650456428527832, "lr": 1.2173243695701575e-05, "epoch": 0.9180616740088106, "percentage": 45.89, "elapsed_time": "3:49:43", "remaining_time": "4:30:48"} +{"current_steps": 2606, "total_steps": 5676, "loss": 0.6000699996948242, "lr": 1.2167556185397396e-05, "epoch": 0.9184140969162996, "percentage": 45.91, "elapsed_time": "3:49:48", "remaining_time": "4:30:43"} +{"current_steps": 2607, "total_steps": 5676, "loss": 0.6227651834487915, "lr": 1.2161867939275344e-05, "epoch": 0.9187665198237885, "percentage": 45.93, "elapsed_time": "3:49:55", "remaining_time": "4:30:39"} +{"current_steps": 2608, "total_steps": 5676, "loss": 0.6554160118103027, "lr": 1.2156178959266414e-05, "epoch": 0.9191189427312776, "percentage": 45.95, "elapsed_time": "3:49:59", "remaining_time": "4:30:32"} +{"current_steps": 2609, "total_steps": 5676, "loss": 0.5360773801803589, "lr": 1.2150489247301826e-05, "epoch": 0.9194713656387665, "percentage": 45.97, "elapsed_time": "3:50:03", "remaining_time": "4:30:27"} +{"current_steps": 2610, "total_steps": 5676, "loss": 0.7184062004089355, "lr": 1.2144798805313065e-05, "epoch": 0.9198237885462555, "percentage": 45.98, "elapsed_time": "3:50:08", "remaining_time": "4:30:21"} +{"current_steps": 2611, "total_steps": 5676, "loss": 0.646910548210144, "lr": 1.2139107635231857e-05, "epoch": 0.9201762114537445, "percentage": 46.0, "elapsed_time": "3:50:13", "remaining_time": "4:30:15"} +{"current_steps": 2612, "total_steps": 5676, "loss": 0.7794413566589355, "lr": 1.2133415738990179e-05, "epoch": 0.9205286343612334, "percentage": 46.02, "elapsed_time": "3:50:20", "remaining_time": "4:30:12"} +{"current_steps": 2613, "total_steps": 5676, "loss": 0.5904654860496521, "lr": 1.2127723118520254e-05, "epoch": 0.9208810572687225, "percentage": 46.04, "elapsed_time": "3:50:25", "remaining_time": "4:30:06"} +{"current_steps": 2614, "total_steps": 5676, "loss": 0.5526635646820068, "lr": 1.2122029775754545e-05, "epoch": 0.9212334801762114, "percentage": 46.05, "elapsed_time": "3:50:31", "remaining_time": "4:30:01"} +{"current_steps": 2615, "total_steps": 5676, "loss": 0.6832528710365295, "lr": 1.2116335712625766e-05, "epoch": 0.9215859030837005, "percentage": 46.07, "elapsed_time": "3:50:35", "remaining_time": "4:29:55"} +{"current_steps": 2616, "total_steps": 5676, "loss": 0.5858304500579834, "lr": 1.211064093106688e-05, "epoch": 0.9219383259911894, "percentage": 46.09, "elapsed_time": "3:50:40", "remaining_time": "4:29:49"} +{"current_steps": 2617, "total_steps": 5676, "loss": 0.7383478879928589, "lr": 1.2104945433011079e-05, "epoch": 0.9222907488986785, "percentage": 46.11, "elapsed_time": "3:50:45", "remaining_time": "4:29:43"} +{"current_steps": 2618, "total_steps": 5676, "loss": 0.6466768980026245, "lr": 1.2099249220391815e-05, "epoch": 0.9226431718061674, "percentage": 46.12, "elapsed_time": "3:50:50", "remaining_time": "4:29:38"} +{"current_steps": 2619, "total_steps": 5676, "loss": 0.5681238174438477, "lr": 1.209355229514277e-05, "epoch": 0.9229955947136564, "percentage": 46.14, "elapsed_time": "3:50:54", "remaining_time": "4:29:31"} +{"current_steps": 2620, "total_steps": 5676, "loss": 0.5726606249809265, "lr": 1.2087854659197874e-05, "epoch": 0.9233480176211454, "percentage": 46.16, "elapsed_time": "3:50:59", "remaining_time": "4:29:26"} +{"current_steps": 2621, "total_steps": 5676, "loss": 0.6643342971801758, "lr": 1.2082156314491298e-05, "epoch": 0.9237004405286343, "percentage": 46.18, "elapsed_time": "3:51:06", "remaining_time": "4:29:22"} +{"current_steps": 2622, "total_steps": 5676, "loss": 0.5408967733383179, "lr": 1.2076457262957454e-05, "epoch": 0.9240528634361234, "percentage": 46.19, "elapsed_time": "3:51:12", "remaining_time": "4:29:17"} +{"current_steps": 2623, "total_steps": 5676, "loss": 0.706169843673706, "lr": 1.207075750653099e-05, "epoch": 0.9244052863436123, "percentage": 46.21, "elapsed_time": "3:51:17", "remaining_time": "4:29:11"} +{"current_steps": 2624, "total_steps": 5676, "loss": 0.7973969578742981, "lr": 1.2065057047146797e-05, "epoch": 0.9247577092511013, "percentage": 46.23, "elapsed_time": "3:51:22", "remaining_time": "4:29:06"} +{"current_steps": 2625, "total_steps": 5676, "loss": 0.6907010674476624, "lr": 1.2059355886740002e-05, "epoch": 0.9251101321585903, "percentage": 46.25, "elapsed_time": "3:51:29", "remaining_time": "4:29:03"} +{"current_steps": 2626, "total_steps": 5676, "loss": 0.8174253702163696, "lr": 1.2053654027245977e-05, "epoch": 0.9254625550660793, "percentage": 46.26, "elapsed_time": "3:51:33", "remaining_time": "4:28:56"} +{"current_steps": 2627, "total_steps": 5676, "loss": 0.60319983959198, "lr": 1.204795147060032e-05, "epoch": 0.9258149779735683, "percentage": 46.28, "elapsed_time": "3:51:39", "remaining_time": "4:28:52"} +{"current_steps": 2628, "total_steps": 5676, "loss": 0.718228816986084, "lr": 1.204224821873887e-05, "epoch": 0.9261674008810573, "percentage": 46.3, "elapsed_time": "3:51:44", "remaining_time": "4:28:46"} +{"current_steps": 2629, "total_steps": 5676, "loss": 0.7385132312774658, "lr": 1.2036544273597708e-05, "epoch": 0.9265198237885462, "percentage": 46.32, "elapsed_time": "3:51:49", "remaining_time": "4:28:41"} +{"current_steps": 2630, "total_steps": 5676, "loss": 0.7700635194778442, "lr": 1.203083963711315e-05, "epoch": 0.9268722466960353, "percentage": 46.34, "elapsed_time": "3:51:55", "remaining_time": "4:28:37"} +{"current_steps": 2631, "total_steps": 5676, "loss": 0.8767666816711426, "lr": 1.2025134311221732e-05, "epoch": 0.9272246696035242, "percentage": 46.35, "elapsed_time": "3:52:02", "remaining_time": "4:28:32"} +{"current_steps": 2632, "total_steps": 5676, "loss": 0.6517986059188843, "lr": 1.2019428297860241e-05, "epoch": 0.9275770925110132, "percentage": 46.37, "elapsed_time": "3:52:07", "remaining_time": "4:28:27"} +{"current_steps": 2633, "total_steps": 5676, "loss": 0.5967941284179688, "lr": 1.2013721598965688e-05, "epoch": 0.9279295154185022, "percentage": 46.39, "elapsed_time": "3:52:15", "remaining_time": "4:28:25"} +{"current_steps": 2634, "total_steps": 5676, "loss": 0.7480533123016357, "lr": 1.2008014216475327e-05, "epoch": 0.9282819383259912, "percentage": 46.41, "elapsed_time": "3:52:21", "remaining_time": "4:28:20"} +{"current_steps": 2635, "total_steps": 5676, "loss": 0.8020488023757935, "lr": 1.2002306152326626e-05, "epoch": 0.9286343612334802, "percentage": 46.42, "elapsed_time": "3:52:27", "remaining_time": "4:28:17"} +{"current_steps": 2636, "total_steps": 5676, "loss": 0.5535889863967896, "lr": 1.1996597408457302e-05, "epoch": 0.9289867841409691, "percentage": 46.44, "elapsed_time": "3:52:33", "remaining_time": "4:28:12"} +{"current_steps": 2637, "total_steps": 5676, "loss": 0.6888864040374756, "lr": 1.1990887986805295e-05, "epoch": 0.9293392070484582, "percentage": 46.46, "elapsed_time": "3:52:38", "remaining_time": "4:28:06"} +{"current_steps": 2638, "total_steps": 5676, "loss": 0.7723515033721924, "lr": 1.1985177889308777e-05, "epoch": 0.9296916299559471, "percentage": 46.48, "elapsed_time": "3:52:43", "remaining_time": "4:28:01"} +{"current_steps": 2639, "total_steps": 5676, "loss": 0.6929488182067871, "lr": 1.1979467117906143e-05, "epoch": 0.9300440528634362, "percentage": 46.49, "elapsed_time": "3:52:49", "remaining_time": "4:27:55"} +{"current_steps": 2640, "total_steps": 5676, "loss": 0.7025216221809387, "lr": 1.1973755674536027e-05, "epoch": 0.9303964757709251, "percentage": 46.51, "elapsed_time": "3:52:53", "remaining_time": "4:27:49"} +{"current_steps": 2641, "total_steps": 5676, "loss": 0.6618740558624268, "lr": 1.1968043561137287e-05, "epoch": 0.9307488986784141, "percentage": 46.53, "elapsed_time": "3:52:58", "remaining_time": "4:27:43"} +{"current_steps": 2642, "total_steps": 5676, "loss": 0.5308352708816528, "lr": 1.1962330779649002e-05, "epoch": 0.9311013215859031, "percentage": 46.55, "elapsed_time": "3:53:04", "remaining_time": "4:27:38"} +{"current_steps": 2643, "total_steps": 5676, "loss": 0.6559470891952515, "lr": 1.1956617332010488e-05, "epoch": 0.931453744493392, "percentage": 46.56, "elapsed_time": "3:53:08", "remaining_time": "4:27:32"} +{"current_steps": 2644, "total_steps": 5676, "loss": 0.6572221517562866, "lr": 1.1950903220161286e-05, "epoch": 0.9318061674008811, "percentage": 46.58, "elapsed_time": "3:53:11", "remaining_time": "4:27:25"} +{"current_steps": 2645, "total_steps": 5676, "loss": 0.7854161262512207, "lr": 1.194518844604115e-05, "epoch": 0.93215859030837, "percentage": 46.6, "elapsed_time": "3:53:17", "remaining_time": "4:27:20"} +{"current_steps": 2646, "total_steps": 5676, "loss": 0.6471760869026184, "lr": 1.1939473011590075e-05, "epoch": 0.932511013215859, "percentage": 46.62, "elapsed_time": "3:53:23", "remaining_time": "4:27:15"} +{"current_steps": 2647, "total_steps": 5676, "loss": 0.6261184215545654, "lr": 1.1933756918748271e-05, "epoch": 0.932863436123348, "percentage": 46.63, "elapsed_time": "3:53:28", "remaining_time": "4:27:09"} +{"current_steps": 2648, "total_steps": 5676, "loss": 0.6876180171966553, "lr": 1.1928040169456176e-05, "epoch": 0.933215859030837, "percentage": 46.65, "elapsed_time": "3:53:33", "remaining_time": "4:27:04"} +{"current_steps": 2649, "total_steps": 5676, "loss": 0.6782447099685669, "lr": 1.1922322765654446e-05, "epoch": 0.933568281938326, "percentage": 46.67, "elapsed_time": "3:53:38", "remaining_time": "4:26:58"} +{"current_steps": 2650, "total_steps": 5676, "loss": 0.6085894107818604, "lr": 1.1916604709283958e-05, "epoch": 0.933920704845815, "percentage": 46.69, "elapsed_time": "3:53:44", "remaining_time": "4:26:54"} +{"current_steps": 2651, "total_steps": 5676, "loss": 0.6940577030181885, "lr": 1.1910886002285822e-05, "epoch": 0.934273127753304, "percentage": 46.71, "elapsed_time": "3:53:50", "remaining_time": "4:26:50"} +{"current_steps": 2652, "total_steps": 5676, "loss": 0.8204144239425659, "lr": 1.1905166646601356e-05, "epoch": 0.934625550660793, "percentage": 46.72, "elapsed_time": "3:53:54", "remaining_time": "4:26:43"} +{"current_steps": 2653, "total_steps": 5676, "loss": 0.6184309720993042, "lr": 1.1899446644172106e-05, "epoch": 0.9349779735682819, "percentage": 46.74, "elapsed_time": "3:54:02", "remaining_time": "4:26:40"} +{"current_steps": 2654, "total_steps": 5676, "loss": 0.7499250173568726, "lr": 1.1893725996939831e-05, "epoch": 0.9353303964757709, "percentage": 46.76, "elapsed_time": "3:54:06", "remaining_time": "4:26:34"} +{"current_steps": 2655, "total_steps": 5676, "loss": 0.7021058797836304, "lr": 1.1888004706846519e-05, "epoch": 0.9356828193832599, "percentage": 46.78, "elapsed_time": "3:54:12", "remaining_time": "4:26:29"} +{"current_steps": 2656, "total_steps": 5676, "loss": 0.6005666255950928, "lr": 1.188228277583436e-05, "epoch": 0.9360352422907489, "percentage": 46.79, "elapsed_time": "3:54:18", "remaining_time": "4:26:24"} +{"current_steps": 2657, "total_steps": 5676, "loss": 0.6572481393814087, "lr": 1.1876560205845782e-05, "epoch": 0.9363876651982379, "percentage": 46.81, "elapsed_time": "3:54:23", "remaining_time": "4:26:19"} +{"current_steps": 2658, "total_steps": 5676, "loss": 0.7402434349060059, "lr": 1.187083699882341e-05, "epoch": 0.9367400881057268, "percentage": 46.83, "elapsed_time": "3:54:28", "remaining_time": "4:26:14"} +{"current_steps": 2659, "total_steps": 5676, "loss": 0.6693596243858337, "lr": 1.1865113156710106e-05, "epoch": 0.9370925110132159, "percentage": 46.85, "elapsed_time": "3:54:33", "remaining_time": "4:26:08"} +{"current_steps": 2660, "total_steps": 5676, "loss": 0.7708617448806763, "lr": 1.1859388681448925e-05, "epoch": 0.9374449339207048, "percentage": 46.86, "elapsed_time": "3:54:39", "remaining_time": "4:26:03"} +{"current_steps": 2661, "total_steps": 5676, "loss": 0.5871701836585999, "lr": 1.1853663574983154e-05, "epoch": 0.9377973568281939, "percentage": 46.88, "elapsed_time": "3:54:45", "remaining_time": "4:25:59"} +{"current_steps": 2662, "total_steps": 5676, "loss": 0.6492994427680969, "lr": 1.1847937839256287e-05, "epoch": 0.9381497797356828, "percentage": 46.9, "elapsed_time": "3:54:51", "remaining_time": "4:25:54"} +{"current_steps": 2663, "total_steps": 5676, "loss": 0.6803429126739502, "lr": 1.1842211476212038e-05, "epoch": 0.9385022026431719, "percentage": 46.92, "elapsed_time": "3:54:56", "remaining_time": "4:25:49"} +{"current_steps": 2664, "total_steps": 5676, "loss": 0.5602244734764099, "lr": 1.1836484487794318e-05, "epoch": 0.9388546255506608, "percentage": 46.93, "elapsed_time": "3:55:00", "remaining_time": "4:25:42"} +{"current_steps": 2665, "total_steps": 5676, "loss": 0.6562157869338989, "lr": 1.183075687594727e-05, "epoch": 0.9392070484581497, "percentage": 46.95, "elapsed_time": "3:55:07", "remaining_time": "4:25:39"} +{"current_steps": 2666, "total_steps": 5676, "loss": 0.71474289894104, "lr": 1.182502864261524e-05, "epoch": 0.9395594713656388, "percentage": 46.97, "elapsed_time": "3:55:11", "remaining_time": "4:25:32"} +{"current_steps": 2667, "total_steps": 5676, "loss": 0.7130062580108643, "lr": 1.1819299789742782e-05, "epoch": 0.9399118942731277, "percentage": 46.99, "elapsed_time": "3:55:16", "remaining_time": "4:25:27"} +{"current_steps": 2668, "total_steps": 5676, "loss": 0.788813054561615, "lr": 1.1813570319274663e-05, "epoch": 0.9402643171806168, "percentage": 47.0, "elapsed_time": "3:55:21", "remaining_time": "4:25:21"} +{"current_steps": 2669, "total_steps": 5676, "loss": 0.6485022306442261, "lr": 1.1807840233155863e-05, "epoch": 0.9406167400881057, "percentage": 47.02, "elapsed_time": "3:55:26", "remaining_time": "4:25:15"} +{"current_steps": 2670, "total_steps": 5676, "loss": 0.4491521418094635, "lr": 1.1802109533331562e-05, "epoch": 0.9409691629955947, "percentage": 47.04, "elapsed_time": "3:55:31", "remaining_time": "4:25:09"} +{"current_steps": 2671, "total_steps": 5676, "loss": 0.6073683500289917, "lr": 1.1796378221747162e-05, "epoch": 0.9413215859030837, "percentage": 47.06, "elapsed_time": "3:55:36", "remaining_time": "4:25:04"} +{"current_steps": 2672, "total_steps": 5676, "loss": 0.5942971706390381, "lr": 1.179064630034826e-05, "epoch": 0.9416740088105727, "percentage": 47.08, "elapsed_time": "3:55:40", "remaining_time": "4:24:58"} +{"current_steps": 2673, "total_steps": 5676, "loss": 0.7295013666152954, "lr": 1.1784913771080667e-05, "epoch": 0.9420264317180617, "percentage": 47.09, "elapsed_time": "3:55:46", "remaining_time": "4:24:52"} +{"current_steps": 2674, "total_steps": 5676, "loss": 0.7347372770309448, "lr": 1.1779180635890394e-05, "epoch": 0.9423788546255507, "percentage": 47.11, "elapsed_time": "3:55:53", "remaining_time": "4:24:49"} +{"current_steps": 2675, "total_steps": 5676, "loss": 0.5591942667961121, "lr": 1.1773446896723668e-05, "epoch": 0.9427312775330396, "percentage": 47.13, "elapsed_time": "3:55:58", "remaining_time": "4:24:43"} +{"current_steps": 2676, "total_steps": 5676, "loss": 0.822568953037262, "lr": 1.1767712555526911e-05, "epoch": 0.9430837004405286, "percentage": 47.15, "elapsed_time": "3:56:03", "remaining_time": "4:24:38"} +{"current_steps": 2677, "total_steps": 5676, "loss": 0.649920642375946, "lr": 1.1761977614246757e-05, "epoch": 0.9434361233480176, "percentage": 47.16, "elapsed_time": "3:56:08", "remaining_time": "4:24:32"} +{"current_steps": 2678, "total_steps": 5676, "loss": 0.6298861503601074, "lr": 1.1756242074830036e-05, "epoch": 0.9437885462555066, "percentage": 47.18, "elapsed_time": "3:56:13", "remaining_time": "4:24:27"} +{"current_steps": 2679, "total_steps": 5676, "loss": 0.81938636302948, "lr": 1.1750505939223787e-05, "epoch": 0.9441409691629956, "percentage": 47.2, "elapsed_time": "3:56:19", "remaining_time": "4:24:22"} +{"current_steps": 2680, "total_steps": 5676, "loss": 0.6627225875854492, "lr": 1.1744769209375248e-05, "epoch": 0.9444933920704845, "percentage": 47.22, "elapsed_time": "3:56:23", "remaining_time": "4:24:16"} +{"current_steps": 2681, "total_steps": 5676, "loss": 0.6563318371772766, "lr": 1.1739031887231864e-05, "epoch": 0.9448458149779736, "percentage": 47.23, "elapsed_time": "3:56:29", "remaining_time": "4:24:11"} +{"current_steps": 2682, "total_steps": 5676, "loss": 0.5702694654464722, "lr": 1.1733293974741273e-05, "epoch": 0.9451982378854625, "percentage": 47.25, "elapsed_time": "3:56:35", "remaining_time": "4:24:06"} +{"current_steps": 2683, "total_steps": 5676, "loss": 0.685553789138794, "lr": 1.1727555473851321e-05, "epoch": 0.9455506607929516, "percentage": 47.27, "elapsed_time": "3:56:41", "remaining_time": "4:24:01"} +{"current_steps": 2684, "total_steps": 5676, "loss": 0.6092622876167297, "lr": 1.172181638651005e-05, "epoch": 0.9459030837004405, "percentage": 47.29, "elapsed_time": "3:56:46", "remaining_time": "4:23:56"} +{"current_steps": 2685, "total_steps": 5676, "loss": 0.6650614738464355, "lr": 1.1716076714665701e-05, "epoch": 0.9462555066079296, "percentage": 47.3, "elapsed_time": "3:56:50", "remaining_time": "4:23:50"} +{"current_steps": 2686, "total_steps": 5676, "loss": 0.7665754556655884, "lr": 1.171033646026671e-05, "epoch": 0.9466079295154185, "percentage": 47.32, "elapsed_time": "3:56:56", "remaining_time": "4:23:45"} +{"current_steps": 2687, "total_steps": 5676, "loss": 0.6365277171134949, "lr": 1.1704595625261722e-05, "epoch": 0.9469603524229074, "percentage": 47.34, "elapsed_time": "3:57:01", "remaining_time": "4:23:39"} +{"current_steps": 2688, "total_steps": 5676, "loss": 0.6622267961502075, "lr": 1.1698854211599565e-05, "epoch": 0.9473127753303965, "percentage": 47.36, "elapsed_time": "3:57:07", "remaining_time": "4:23:35"} +{"current_steps": 2689, "total_steps": 5676, "loss": 0.6636145710945129, "lr": 1.1693112221229278e-05, "epoch": 0.9476651982378854, "percentage": 47.37, "elapsed_time": "3:57:12", "remaining_time": "4:23:29"} +{"current_steps": 2690, "total_steps": 5676, "loss": 0.6943212747573853, "lr": 1.168736965610008e-05, "epoch": 0.9480176211453745, "percentage": 47.39, "elapsed_time": "3:57:18", "remaining_time": "4:23:25"} +{"current_steps": 2691, "total_steps": 5676, "loss": 0.7479512691497803, "lr": 1.1681626518161397e-05, "epoch": 0.9483700440528634, "percentage": 47.41, "elapsed_time": "3:57:25", "remaining_time": "4:23:21"} +{"current_steps": 2692, "total_steps": 5676, "loss": 0.7227041721343994, "lr": 1.1675882809362846e-05, "epoch": 0.9487224669603525, "percentage": 47.43, "elapsed_time": "3:57:29", "remaining_time": "4:23:14"} +{"current_steps": 2693, "total_steps": 5676, "loss": 0.7366166114807129, "lr": 1.1670138531654238e-05, "epoch": 0.9490748898678414, "percentage": 47.45, "elapsed_time": "3:57:35", "remaining_time": "4:23:10"} +{"current_steps": 2694, "total_steps": 5676, "loss": 0.8634493350982666, "lr": 1.1664393686985571e-05, "epoch": 0.9494273127753304, "percentage": 47.46, "elapsed_time": "3:57:40", "remaining_time": "4:23:05"} +{"current_steps": 2695, "total_steps": 5676, "loss": 0.5802862048149109, "lr": 1.165864827730705e-05, "epoch": 0.9497797356828194, "percentage": 47.48, "elapsed_time": "3:57:47", "remaining_time": "4:23:01"} +{"current_steps": 2696, "total_steps": 5676, "loss": 0.5931085348129272, "lr": 1.1652902304569053e-05, "epoch": 0.9501321585903084, "percentage": 47.5, "elapsed_time": "3:57:52", "remaining_time": "4:22:55"} +{"current_steps": 2697, "total_steps": 5676, "loss": 0.7684508562088013, "lr": 1.164715577072217e-05, "epoch": 0.9504845814977974, "percentage": 47.52, "elapsed_time": "3:57:56", "remaining_time": "4:22:49"} +{"current_steps": 2698, "total_steps": 5676, "loss": 0.94246906042099, "lr": 1.1641408677717158e-05, "epoch": 0.9508370044052863, "percentage": 47.53, "elapsed_time": "3:58:01", "remaining_time": "4:22:43"} +{"current_steps": 2699, "total_steps": 5676, "loss": 0.7072316408157349, "lr": 1.1635661027504985e-05, "epoch": 0.9511894273127753, "percentage": 47.55, "elapsed_time": "3:58:07", "remaining_time": "4:22:38"} +{"current_steps": 2700, "total_steps": 5676, "loss": 0.5872572064399719, "lr": 1.16299128220368e-05, "epoch": 0.9515418502202643, "percentage": 47.57, "elapsed_time": "3:58:11", "remaining_time": "4:22:32"} +{"current_steps": 2701, "total_steps": 5676, "loss": 0.6549060344696045, "lr": 1.1624164063263931e-05, "epoch": 0.9518942731277533, "percentage": 47.59, "elapsed_time": "3:58:21", "remaining_time": "4:22:31"} +{"current_steps": 2702, "total_steps": 5676, "loss": 0.7338137626647949, "lr": 1.161841475313791e-05, "epoch": 0.9522466960352423, "percentage": 47.6, "elapsed_time": "3:58:26", "remaining_time": "4:22:26"} +{"current_steps": 2703, "total_steps": 5676, "loss": 0.6942911148071289, "lr": 1.161266489361045e-05, "epoch": 0.9525991189427313, "percentage": 47.62, "elapsed_time": "3:58:30", "remaining_time": "4:22:20"} +{"current_steps": 2704, "total_steps": 5676, "loss": 0.674375057220459, "lr": 1.1606914486633444e-05, "epoch": 0.9529515418502202, "percentage": 47.64, "elapsed_time": "3:58:36", "remaining_time": "4:22:15"} +{"current_steps": 2705, "total_steps": 5676, "loss": 0.6790377497673035, "lr": 1.160116353415898e-05, "epoch": 0.9533039647577093, "percentage": 47.66, "elapsed_time": "3:58:41", "remaining_time": "4:22:10"} +{"current_steps": 2706, "total_steps": 5676, "loss": 0.5902142524719238, "lr": 1.1595412038139326e-05, "epoch": 0.9536563876651982, "percentage": 47.67, "elapsed_time": "3:58:46", "remaining_time": "4:22:04"} +{"current_steps": 2707, "total_steps": 5676, "loss": 0.7034019231796265, "lr": 1.1589660000526937e-05, "epoch": 0.9540088105726873, "percentage": 47.69, "elapsed_time": "3:58:51", "remaining_time": "4:21:58"} +{"current_steps": 2708, "total_steps": 5676, "loss": 0.6986846923828125, "lr": 1.158390742327445e-05, "epoch": 0.9543612334801762, "percentage": 47.71, "elapsed_time": "3:58:57", "remaining_time": "4:21:53"} +{"current_steps": 2709, "total_steps": 5676, "loss": 0.6972544193267822, "lr": 1.1578154308334683e-05, "epoch": 0.9547136563876651, "percentage": 47.73, "elapsed_time": "3:59:02", "remaining_time": "4:21:48"} +{"current_steps": 2710, "total_steps": 5676, "loss": 0.6312702298164368, "lr": 1.1572400657660646e-05, "epoch": 0.9550660792951542, "percentage": 47.74, "elapsed_time": "3:59:07", "remaining_time": "4:21:43"} +{"current_steps": 2711, "total_steps": 5676, "loss": 0.7584360241889954, "lr": 1.1566646473205518e-05, "epoch": 0.9554185022026431, "percentage": 47.76, "elapsed_time": "3:59:13", "remaining_time": "4:21:38"} +{"current_steps": 2712, "total_steps": 5676, "loss": 0.700894296169281, "lr": 1.156089175692267e-05, "epoch": 0.9557709251101322, "percentage": 47.78, "elapsed_time": "3:59:20", "remaining_time": "4:21:34"} +{"current_steps": 2713, "total_steps": 5676, "loss": 0.5637902617454529, "lr": 1.1555136510765645e-05, "epoch": 0.9561233480176211, "percentage": 47.8, "elapsed_time": "3:59:24", "remaining_time": "4:21:28"} +{"current_steps": 2714, "total_steps": 5676, "loss": 0.4537314772605896, "lr": 1.1549380736688173e-05, "epoch": 0.9564757709251102, "percentage": 47.82, "elapsed_time": "3:59:28", "remaining_time": "4:21:21"} +{"current_steps": 2715, "total_steps": 5676, "loss": 0.7880423069000244, "lr": 1.1543624436644161e-05, "epoch": 0.9568281938325991, "percentage": 47.83, "elapsed_time": "3:59:34", "remaining_time": "4:21:16"} +{"current_steps": 2716, "total_steps": 5676, "loss": 0.7314344644546509, "lr": 1.1537867612587692e-05, "epoch": 0.9571806167400881, "percentage": 47.85, "elapsed_time": "3:59:41", "remaining_time": "4:21:13"} +{"current_steps": 2717, "total_steps": 5676, "loss": 0.9550024271011353, "lr": 1.1532110266473026e-05, "epoch": 0.9575330396475771, "percentage": 47.87, "elapsed_time": "3:59:45", "remaining_time": "4:21:06"} +{"current_steps": 2718, "total_steps": 5676, "loss": 0.6482470035552979, "lr": 1.152635240025461e-05, "epoch": 0.9578854625550661, "percentage": 47.89, "elapsed_time": "3:59:52", "remaining_time": "4:21:03"} +{"current_steps": 2719, "total_steps": 5676, "loss": 0.6347365975379944, "lr": 1.152059401588705e-05, "epoch": 0.9582378854625551, "percentage": 47.9, "elapsed_time": "3:59:57", "remaining_time": "4:20:57"} +{"current_steps": 2720, "total_steps": 5676, "loss": 0.7214993238449097, "lr": 1.151483511532515e-05, "epoch": 0.958590308370044, "percentage": 47.92, "elapsed_time": "4:00:02", "remaining_time": "4:20:51"} +{"current_steps": 2721, "total_steps": 5676, "loss": 0.6255312561988831, "lr": 1.1509075700523869e-05, "epoch": 0.958942731277533, "percentage": 47.94, "elapsed_time": "4:00:07", "remaining_time": "4:20:46"} +{"current_steps": 2722, "total_steps": 5676, "loss": 0.6152437925338745, "lr": 1.1503315773438352e-05, "epoch": 0.959295154185022, "percentage": 47.96, "elapsed_time": "4:00:11", "remaining_time": "4:20:40"} +{"current_steps": 2723, "total_steps": 5676, "loss": 0.6565401554107666, "lr": 1.1497555336023916e-05, "epoch": 0.959647577092511, "percentage": 47.97, "elapsed_time": "4:00:15", "remaining_time": "4:20:33"} +{"current_steps": 2724, "total_steps": 5676, "loss": 0.796178936958313, "lr": 1.1491794390236047e-05, "epoch": 0.96, "percentage": 47.99, "elapsed_time": "4:00:20", "remaining_time": "4:20:27"} +{"current_steps": 2725, "total_steps": 5676, "loss": 0.6243436336517334, "lr": 1.1486032938030409e-05, "epoch": 0.960352422907489, "percentage": 48.01, "elapsed_time": "4:00:25", "remaining_time": "4:20:22"} +{"current_steps": 2726, "total_steps": 5676, "loss": 0.6043159365653992, "lr": 1.148027098136284e-05, "epoch": 0.960704845814978, "percentage": 48.03, "elapsed_time": "4:00:30", "remaining_time": "4:20:15"} +{"current_steps": 2727, "total_steps": 5676, "loss": 0.7268002033233643, "lr": 1.1474508522189334e-05, "epoch": 0.961057268722467, "percentage": 48.04, "elapsed_time": "4:00:33", "remaining_time": "4:20:08"} +{"current_steps": 2728, "total_steps": 5676, "loss": 0.6156840324401855, "lr": 1.1468745562466076e-05, "epoch": 0.9614096916299559, "percentage": 48.06, "elapsed_time": "4:00:37", "remaining_time": "4:20:02"} +{"current_steps": 2729, "total_steps": 5676, "loss": 0.8415796756744385, "lr": 1.1462982104149409e-05, "epoch": 0.961762114537445, "percentage": 48.08, "elapsed_time": "4:00:42", "remaining_time": "4:19:56"} +{"current_steps": 2730, "total_steps": 5676, "loss": 0.5983521342277527, "lr": 1.145721814919585e-05, "epoch": 0.9621145374449339, "percentage": 48.1, "elapsed_time": "4:00:47", "remaining_time": "4:19:50"} +{"current_steps": 2731, "total_steps": 5676, "loss": 0.6144511699676514, "lr": 1.1451453699562077e-05, "epoch": 0.962466960352423, "percentage": 48.11, "elapsed_time": "4:00:52", "remaining_time": "4:19:44"} +{"current_steps": 2732, "total_steps": 5676, "loss": 0.6449630260467529, "lr": 1.1445688757204942e-05, "epoch": 0.9628193832599119, "percentage": 48.13, "elapsed_time": "4:00:56", "remaining_time": "4:19:38"} +{"current_steps": 2733, "total_steps": 5676, "loss": 0.7321716547012329, "lr": 1.1439923324081465e-05, "epoch": 0.9631718061674008, "percentage": 48.15, "elapsed_time": "4:01:01", "remaining_time": "4:19:32"} +{"current_steps": 2734, "total_steps": 5676, "loss": 0.8354923129081726, "lr": 1.1434157402148838e-05, "epoch": 0.9635242290748899, "percentage": 48.17, "elapsed_time": "4:01:06", "remaining_time": "4:19:27"} +{"current_steps": 2735, "total_steps": 5676, "loss": 0.728820264339447, "lr": 1.14283909933644e-05, "epoch": 0.9638766519823788, "percentage": 48.19, "elapsed_time": "4:01:12", "remaining_time": "4:19:22"} +{"current_steps": 2736, "total_steps": 5676, "loss": 0.6683202981948853, "lr": 1.1422624099685675e-05, "epoch": 0.9642290748898679, "percentage": 48.2, "elapsed_time": "4:01:17", "remaining_time": "4:19:16"} +{"current_steps": 2737, "total_steps": 5676, "loss": 0.7159590125083923, "lr": 1.141685672307034e-05, "epoch": 0.9645814977973568, "percentage": 48.22, "elapsed_time": "4:01:22", "remaining_time": "4:19:11"} +{"current_steps": 2738, "total_steps": 5676, "loss": 0.8269981145858765, "lr": 1.1411088865476245e-05, "epoch": 0.9649339207048458, "percentage": 48.24, "elapsed_time": "4:01:29", "remaining_time": "4:19:07"} +{"current_steps": 2739, "total_steps": 5676, "loss": 0.6993168592453003, "lr": 1.1405320528861393e-05, "epoch": 0.9652863436123348, "percentage": 48.26, "elapsed_time": "4:01:34", "remaining_time": "4:19:02"} +{"current_steps": 2740, "total_steps": 5676, "loss": 0.6296184062957764, "lr": 1.1399551715183956e-05, "epoch": 0.9656387665198238, "percentage": 48.27, "elapsed_time": "4:01:38", "remaining_time": "4:18:55"} +{"current_steps": 2741, "total_steps": 5676, "loss": 0.670283317565918, "lr": 1.1393782426402267e-05, "epoch": 0.9659911894273128, "percentage": 48.29, "elapsed_time": "4:01:43", "remaining_time": "4:18:50"} +{"current_steps": 2742, "total_steps": 5676, "loss": 0.9248946905136108, "lr": 1.1388012664474824e-05, "epoch": 0.9663436123348018, "percentage": 48.31, "elapsed_time": "4:01:47", "remaining_time": "4:18:43"} +{"current_steps": 2743, "total_steps": 5676, "loss": 0.7965992093086243, "lr": 1.1382242431360272e-05, "epoch": 0.9666960352422908, "percentage": 48.33, "elapsed_time": "4:01:52", "remaining_time": "4:18:38"} +{"current_steps": 2744, "total_steps": 5676, "loss": 0.632454514503479, "lr": 1.1376471729017435e-05, "epoch": 0.9670484581497797, "percentage": 48.34, "elapsed_time": "4:01:57", "remaining_time": "4:18:31"} +{"current_steps": 2745, "total_steps": 5676, "loss": 0.6463649272918701, "lr": 1.1370700559405283e-05, "epoch": 0.9674008810572687, "percentage": 48.36, "elapsed_time": "4:02:03", "remaining_time": "4:18:27"} +{"current_steps": 2746, "total_steps": 5676, "loss": 0.5864677429199219, "lr": 1.1364928924482952e-05, "epoch": 0.9677533039647577, "percentage": 48.38, "elapsed_time": "4:02:09", "remaining_time": "4:18:22"} +{"current_steps": 2747, "total_steps": 5676, "loss": 0.6313967108726501, "lr": 1.1359156826209726e-05, "epoch": 0.9681057268722467, "percentage": 48.4, "elapsed_time": "4:02:16", "remaining_time": "4:18:19"} +{"current_steps": 2748, "total_steps": 5676, "loss": 0.5736903548240662, "lr": 1.1353384266545056e-05, "epoch": 0.9684581497797357, "percentage": 48.41, "elapsed_time": "4:02:22", "remaining_time": "4:18:14"} +{"current_steps": 2749, "total_steps": 5676, "loss": 0.672286868095398, "lr": 1.1347611247448544e-05, "epoch": 0.9688105726872247, "percentage": 48.43, "elapsed_time": "4:02:27", "remaining_time": "4:18:09"} +{"current_steps": 2750, "total_steps": 5676, "loss": 0.7181379795074463, "lr": 1.1341837770879957e-05, "epoch": 0.9691629955947136, "percentage": 48.45, "elapsed_time": "4:02:32", "remaining_time": "4:18:03"} +{"current_steps": 2751, "total_steps": 5676, "loss": 0.6160816550254822, "lr": 1.1336063838799204e-05, "epoch": 0.9695154185022027, "percentage": 48.47, "elapsed_time": "4:02:37", "remaining_time": "4:17:58"} +{"current_steps": 2752, "total_steps": 5676, "loss": 0.737337589263916, "lr": 1.1330289453166361e-05, "epoch": 0.9698678414096916, "percentage": 48.48, "elapsed_time": "4:02:43", "remaining_time": "4:17:53"} +{"current_steps": 2753, "total_steps": 5676, "loss": 0.6752150058746338, "lr": 1.1324514615941644e-05, "epoch": 0.9702202643171807, "percentage": 48.5, "elapsed_time": "4:02:48", "remaining_time": "4:17:48"} +{"current_steps": 2754, "total_steps": 5676, "loss": 0.6917784214019775, "lr": 1.1318739329085438e-05, "epoch": 0.9705726872246696, "percentage": 48.52, "elapsed_time": "4:02:54", "remaining_time": "4:17:44"} +{"current_steps": 2755, "total_steps": 5676, "loss": 0.7863353490829468, "lr": 1.131296359455827e-05, "epoch": 0.9709251101321585, "percentage": 48.54, "elapsed_time": "4:03:00", "remaining_time": "4:17:39"} +{"current_steps": 2756, "total_steps": 5676, "loss": 0.6236519813537598, "lr": 1.1307187414320823e-05, "epoch": 0.9712775330396476, "percentage": 48.56, "elapsed_time": "4:03:06", "remaining_time": "4:17:34"} +{"current_steps": 2757, "total_steps": 5676, "loss": 0.6957560181617737, "lr": 1.130141079033393e-05, "epoch": 0.9716299559471365, "percentage": 48.57, "elapsed_time": "4:03:11", "remaining_time": "4:17:28"} +{"current_steps": 2758, "total_steps": 5676, "loss": 0.6460270881652832, "lr": 1.1295633724558574e-05, "epoch": 0.9719823788546256, "percentage": 48.59, "elapsed_time": "4:03:15", "remaining_time": "4:17:22"} +{"current_steps": 2759, "total_steps": 5676, "loss": 0.7352741956710815, "lr": 1.1289856218955892e-05, "epoch": 0.9723348017621145, "percentage": 48.61, "elapsed_time": "4:03:21", "remaining_time": "4:17:17"} +{"current_steps": 2760, "total_steps": 5676, "loss": 0.6285911798477173, "lr": 1.1284078275487165e-05, "epoch": 0.9726872246696036, "percentage": 48.63, "elapsed_time": "4:03:26", "remaining_time": "4:17:11"} +{"current_steps": 2761, "total_steps": 5676, "loss": 0.6577984094619751, "lr": 1.1278299896113823e-05, "epoch": 0.9730396475770925, "percentage": 48.64, "elapsed_time": "4:03:32", "remaining_time": "4:17:07"} +{"current_steps": 2762, "total_steps": 5676, "loss": 0.6445770859718323, "lr": 1.1272521082797452e-05, "epoch": 0.9733920704845815, "percentage": 48.66, "elapsed_time": "4:03:36", "remaining_time": "4:17:01"} +{"current_steps": 2763, "total_steps": 5676, "loss": 0.557687520980835, "lr": 1.1266741837499773e-05, "epoch": 0.9737444933920705, "percentage": 48.68, "elapsed_time": "4:03:42", "remaining_time": "4:16:55"} +{"current_steps": 2764, "total_steps": 5676, "loss": 0.6117650866508484, "lr": 1.1260962162182664e-05, "epoch": 0.9740969162995595, "percentage": 48.7, "elapsed_time": "4:03:47", "remaining_time": "4:16:50"} +{"current_steps": 2765, "total_steps": 5676, "loss": 0.6498113870620728, "lr": 1.1255182058808143e-05, "epoch": 0.9744493392070485, "percentage": 48.71, "elapsed_time": "4:03:52", "remaining_time": "4:16:45"} +{"current_steps": 2766, "total_steps": 5676, "loss": 0.8738062381744385, "lr": 1.1249401529338375e-05, "epoch": 0.9748017621145374, "percentage": 48.73, "elapsed_time": "4:03:57", "remaining_time": "4:16:39"} +{"current_steps": 2767, "total_steps": 5676, "loss": 0.551408052444458, "lr": 1.1243620575735672e-05, "epoch": 0.9751541850220264, "percentage": 48.75, "elapsed_time": "4:04:02", "remaining_time": "4:16:34"} +{"current_steps": 2768, "total_steps": 5676, "loss": 0.7197355031967163, "lr": 1.1237839199962488e-05, "epoch": 0.9755066079295154, "percentage": 48.77, "elapsed_time": "4:04:07", "remaining_time": "4:16:28"} +{"current_steps": 2769, "total_steps": 5676, "loss": 0.5704015493392944, "lr": 1.1232057403981415e-05, "epoch": 0.9758590308370044, "percentage": 48.78, "elapsed_time": "4:04:12", "remaining_time": "4:16:23"} +{"current_steps": 2770, "total_steps": 5676, "loss": 0.603929877281189, "lr": 1.1226275189755199e-05, "epoch": 0.9762114537444934, "percentage": 48.8, "elapsed_time": "4:04:19", "remaining_time": "4:16:19"} +{"current_steps": 2771, "total_steps": 5676, "loss": 0.8652673363685608, "lr": 1.1220492559246719e-05, "epoch": 0.9765638766519824, "percentage": 48.82, "elapsed_time": "4:04:24", "remaining_time": "4:16:13"} +{"current_steps": 2772, "total_steps": 5676, "loss": 0.6827684044837952, "lr": 1.1214709514418998e-05, "epoch": 0.9769162995594713, "percentage": 48.84, "elapsed_time": "4:04:29", "remaining_time": "4:16:08"} +{"current_steps": 2773, "total_steps": 5676, "loss": 0.5584808588027954, "lr": 1.1208926057235197e-05, "epoch": 0.9772687224669604, "percentage": 48.85, "elapsed_time": "4:04:35", "remaining_time": "4:16:03"} +{"current_steps": 2774, "total_steps": 5676, "loss": 0.7242820262908936, "lr": 1.1203142189658627e-05, "epoch": 0.9776211453744493, "percentage": 48.87, "elapsed_time": "4:04:39", "remaining_time": "4:15:56"} +{"current_steps": 2775, "total_steps": 5676, "loss": 0.5299571752548218, "lr": 1.1197357913652725e-05, "epoch": 0.9779735682819384, "percentage": 48.89, "elapsed_time": "4:04:45", "remaining_time": "4:15:52"} +{"current_steps": 2776, "total_steps": 5676, "loss": 0.69478440284729, "lr": 1.1191573231181074e-05, "epoch": 0.9783259911894273, "percentage": 48.91, "elapsed_time": "4:04:50", "remaining_time": "4:15:46"} +{"current_steps": 2777, "total_steps": 5676, "loss": 0.6997090578079224, "lr": 1.1185788144207394e-05, "epoch": 0.9786784140969162, "percentage": 48.93, "elapsed_time": "4:04:56", "remaining_time": "4:15:42"} +{"current_steps": 2778, "total_steps": 5676, "loss": 0.6882679462432861, "lr": 1.1180002654695543e-05, "epoch": 0.9790308370044053, "percentage": 48.94, "elapsed_time": "4:05:02", "remaining_time": "4:15:37"} +{"current_steps": 2779, "total_steps": 5676, "loss": 0.6434916257858276, "lr": 1.1174216764609514e-05, "epoch": 0.9793832599118942, "percentage": 48.96, "elapsed_time": "4:05:07", "remaining_time": "4:15:31"} +{"current_steps": 2780, "total_steps": 5676, "loss": 0.6614376902580261, "lr": 1.1168430475913437e-05, "epoch": 0.9797356828193833, "percentage": 48.98, "elapsed_time": "4:05:12", "remaining_time": "4:15:26"} +{"current_steps": 2781, "total_steps": 5676, "loss": 0.6440471410751343, "lr": 1.1162643790571574e-05, "epoch": 0.9800881057268722, "percentage": 49.0, "elapsed_time": "4:05:16", "remaining_time": "4:15:20"} +{"current_steps": 2782, "total_steps": 5676, "loss": 0.6493573188781738, "lr": 1.1156856710548327e-05, "epoch": 0.9804405286343613, "percentage": 49.01, "elapsed_time": "4:05:20", "remaining_time": "4:15:13"} +{"current_steps": 2783, "total_steps": 5676, "loss": 0.660174548625946, "lr": 1.1151069237808231e-05, "epoch": 0.9807929515418502, "percentage": 49.03, "elapsed_time": "4:05:25", "remaining_time": "4:15:07"} +{"current_steps": 2784, "total_steps": 5676, "loss": 0.8041812181472778, "lr": 1.1145281374315953e-05, "epoch": 0.9811453744493392, "percentage": 49.05, "elapsed_time": "4:05:31", "remaining_time": "4:15:03"} +{"current_steps": 2785, "total_steps": 5676, "loss": 0.4758625030517578, "lr": 1.1139493122036289e-05, "epoch": 0.9814977973568282, "percentage": 49.07, "elapsed_time": "4:05:37", "remaining_time": "4:14:58"} +{"current_steps": 2786, "total_steps": 5676, "loss": 0.6482613682746887, "lr": 1.113370448293417e-05, "epoch": 0.9818502202643172, "percentage": 49.08, "elapsed_time": "4:05:42", "remaining_time": "4:14:53"} +{"current_steps": 2787, "total_steps": 5676, "loss": 0.6911569237709045, "lr": 1.1127915458974665e-05, "epoch": 0.9822026431718062, "percentage": 49.1, "elapsed_time": "4:05:47", "remaining_time": "4:14:47"} +{"current_steps": 2788, "total_steps": 5676, "loss": 0.6851824522018433, "lr": 1.1122126052122963e-05, "epoch": 0.9825550660792951, "percentage": 49.12, "elapsed_time": "4:05:52", "remaining_time": "4:14:42"} +{"current_steps": 2789, "total_steps": 5676, "loss": 0.6405081748962402, "lr": 1.111633626434439e-05, "epoch": 0.9829074889867842, "percentage": 49.14, "elapsed_time": "4:05:56", "remaining_time": "4:14:35"} +{"current_steps": 2790, "total_steps": 5676, "loss": 0.7064476013183594, "lr": 1.1110546097604391e-05, "epoch": 0.9832599118942731, "percentage": 49.15, "elapsed_time": "4:06:01", "remaining_time": "4:14:29"} +{"current_steps": 2791, "total_steps": 5676, "loss": 0.641350269317627, "lr": 1.1104755553868559e-05, "epoch": 0.9836123348017621, "percentage": 49.17, "elapsed_time": "4:06:06", "remaining_time": "4:14:24"} +{"current_steps": 2792, "total_steps": 5676, "loss": 0.748977780342102, "lr": 1.1098964635102597e-05, "epoch": 0.9839647577092511, "percentage": 49.19, "elapsed_time": "4:06:12", "remaining_time": "4:14:19"} +{"current_steps": 2793, "total_steps": 5676, "loss": 0.6033440828323364, "lr": 1.1093173343272342e-05, "epoch": 0.9843171806167401, "percentage": 49.21, "elapsed_time": "4:06:17", "remaining_time": "4:14:13"} +{"current_steps": 2794, "total_steps": 5676, "loss": 0.5684633255004883, "lr": 1.1087381680343754e-05, "epoch": 0.984669603524229, "percentage": 49.22, "elapsed_time": "4:06:21", "remaining_time": "4:14:07"} +{"current_steps": 2795, "total_steps": 5676, "loss": 0.7041289210319519, "lr": 1.1081589648282928e-05, "epoch": 0.9850220264317181, "percentage": 49.24, "elapsed_time": "4:06:27", "remaining_time": "4:14:02"} +{"current_steps": 2796, "total_steps": 5676, "loss": 0.7189786434173584, "lr": 1.1075797249056079e-05, "epoch": 0.985374449339207, "percentage": 49.26, "elapsed_time": "4:06:33", "remaining_time": "4:13:58"} +{"current_steps": 2797, "total_steps": 5676, "loss": 0.5114344358444214, "lr": 1.1070004484629543e-05, "epoch": 0.9857268722466961, "percentage": 49.28, "elapsed_time": "4:06:37", "remaining_time": "4:13:51"} +{"current_steps": 2798, "total_steps": 5676, "loss": 0.5897136926651001, "lr": 1.1064211356969782e-05, "epoch": 0.986079295154185, "percentage": 49.3, "elapsed_time": "4:06:41", "remaining_time": "4:13:44"} +{"current_steps": 2799, "total_steps": 5676, "loss": 0.8490760326385498, "lr": 1.1058417868043387e-05, "epoch": 0.986431718061674, "percentage": 49.31, "elapsed_time": "4:06:47", "remaining_time": "4:13:40"} +{"current_steps": 2800, "total_steps": 5676, "loss": 0.6392524242401123, "lr": 1.1052624019817065e-05, "epoch": 0.986784140969163, "percentage": 49.33, "elapsed_time": "4:06:53", "remaining_time": "4:13:35"} +{"current_steps": 2801, "total_steps": 5676, "loss": 0.7267303466796875, "lr": 1.104682981425765e-05, "epoch": 0.9871365638766519, "percentage": 49.35, "elapsed_time": "4:07:03", "remaining_time": "4:13:35"} +{"current_steps": 2802, "total_steps": 5676, "loss": 0.6622469425201416, "lr": 1.1041035253332087e-05, "epoch": 0.987488986784141, "percentage": 49.37, "elapsed_time": "4:07:09", "remaining_time": "4:13:30"} +{"current_steps": 2803, "total_steps": 5676, "loss": 0.643883466720581, "lr": 1.1035240339007454e-05, "epoch": 0.9878414096916299, "percentage": 49.38, "elapsed_time": "4:07:15", "remaining_time": "4:13:26"} +{"current_steps": 2804, "total_steps": 5676, "loss": 0.6281142234802246, "lr": 1.1029445073250945e-05, "epoch": 0.988193832599119, "percentage": 49.4, "elapsed_time": "4:07:20", "remaining_time": "4:13:20"} +{"current_steps": 2805, "total_steps": 5676, "loss": 0.6356241703033447, "lr": 1.1023649458029873e-05, "epoch": 0.9885462555066079, "percentage": 49.42, "elapsed_time": "4:07:26", "remaining_time": "4:13:15"} +{"current_steps": 2806, "total_steps": 5676, "loss": 0.8118115663528442, "lr": 1.1017853495311664e-05, "epoch": 0.988898678414097, "percentage": 49.44, "elapsed_time": "4:07:30", "remaining_time": "4:13:09"} +{"current_steps": 2807, "total_steps": 5676, "loss": 0.7673395276069641, "lr": 1.1012057187063872e-05, "epoch": 0.9892511013215859, "percentage": 49.45, "elapsed_time": "4:07:36", "remaining_time": "4:13:04"} +{"current_steps": 2808, "total_steps": 5676, "loss": 0.6617262959480286, "lr": 1.1006260535254159e-05, "epoch": 0.9896035242290749, "percentage": 49.47, "elapsed_time": "4:07:42", "remaining_time": "4:12:59"} +{"current_steps": 2809, "total_steps": 5676, "loss": 0.537519097328186, "lr": 1.1000463541850315e-05, "epoch": 0.9899559471365639, "percentage": 49.49, "elapsed_time": "4:07:47", "remaining_time": "4:12:54"} +{"current_steps": 2810, "total_steps": 5676, "loss": 0.6281024813652039, "lr": 1.0994666208820229e-05, "epoch": 0.9903083700440528, "percentage": 49.51, "elapsed_time": "4:07:52", "remaining_time": "4:12:49"} +{"current_steps": 2811, "total_steps": 5676, "loss": 0.7189136743545532, "lr": 1.0988868538131922e-05, "epoch": 0.9906607929515419, "percentage": 49.52, "elapsed_time": "4:07:59", "remaining_time": "4:12:45"} +{"current_steps": 2812, "total_steps": 5676, "loss": 0.622093677520752, "lr": 1.098307053175352e-05, "epoch": 0.9910132158590308, "percentage": 49.54, "elapsed_time": "4:08:04", "remaining_time": "4:12:39"} +{"current_steps": 2813, "total_steps": 5676, "loss": 0.6774802207946777, "lr": 1.0977272191653272e-05, "epoch": 0.9913656387665198, "percentage": 49.56, "elapsed_time": "4:08:09", "remaining_time": "4:12:34"} +{"current_steps": 2814, "total_steps": 5676, "loss": 0.5999646186828613, "lr": 1.0971473519799523e-05, "epoch": 0.9917180616740088, "percentage": 49.58, "elapsed_time": "4:08:15", "remaining_time": "4:12:29"} +{"current_steps": 2815, "total_steps": 5676, "loss": 0.6450619697570801, "lr": 1.096567451816075e-05, "epoch": 0.9920704845814978, "percentage": 49.59, "elapsed_time": "4:08:20", "remaining_time": "4:12:23"} +{"current_steps": 2816, "total_steps": 5676, "loss": 0.693134069442749, "lr": 1.0959875188705529e-05, "epoch": 0.9924229074889868, "percentage": 49.61, "elapsed_time": "4:08:25", "remaining_time": "4:12:18"} +{"current_steps": 2817, "total_steps": 5676, "loss": 0.8968616724014282, "lr": 1.0954075533402557e-05, "epoch": 0.9927753303964758, "percentage": 49.63, "elapsed_time": "4:08:30", "remaining_time": "4:12:12"} +{"current_steps": 2818, "total_steps": 5676, "loss": 0.6114391088485718, "lr": 1.0948275554220632e-05, "epoch": 0.9931277533039647, "percentage": 49.65, "elapsed_time": "4:08:36", "remaining_time": "4:12:07"} +{"current_steps": 2819, "total_steps": 5676, "loss": 0.7583796977996826, "lr": 1.0942475253128667e-05, "epoch": 0.9934801762114538, "percentage": 49.67, "elapsed_time": "4:08:41", "remaining_time": "4:12:03"} +{"current_steps": 2820, "total_steps": 5676, "loss": 0.5683549046516418, "lr": 1.0936674632095683e-05, "epoch": 0.9938325991189427, "percentage": 49.68, "elapsed_time": "4:08:46", "remaining_time": "4:11:57"} +{"current_steps": 2821, "total_steps": 5676, "loss": 0.5664689540863037, "lr": 1.0930873693090815e-05, "epoch": 0.9941850220264317, "percentage": 49.7, "elapsed_time": "4:08:52", "remaining_time": "4:11:52"} +{"current_steps": 2822, "total_steps": 5676, "loss": 0.5799476504325867, "lr": 1.0925072438083296e-05, "epoch": 0.9945374449339207, "percentage": 49.72, "elapsed_time": "4:08:58", "remaining_time": "4:11:48"} +{"current_steps": 2823, "total_steps": 5676, "loss": 0.6879112720489502, "lr": 1.0919270869042475e-05, "epoch": 0.9948898678414096, "percentage": 49.74, "elapsed_time": "4:09:03", "remaining_time": "4:11:41"} +{"current_steps": 2824, "total_steps": 5676, "loss": 0.6348927021026611, "lr": 1.09134689879378e-05, "epoch": 0.9952422907488987, "percentage": 49.75, "elapsed_time": "4:09:08", "remaining_time": "4:11:36"} +{"current_steps": 2825, "total_steps": 5676, "loss": 0.55754554271698, "lr": 1.0907666796738839e-05, "epoch": 0.9955947136563876, "percentage": 49.77, "elapsed_time": "4:09:13", "remaining_time": "4:11:31"} +{"current_steps": 2826, "total_steps": 5676, "loss": 0.6664899587631226, "lr": 1.090186429741524e-05, "epoch": 0.9959471365638767, "percentage": 49.79, "elapsed_time": "4:09:19", "remaining_time": "4:11:26"} +{"current_steps": 2827, "total_steps": 5676, "loss": 0.6521929502487183, "lr": 1.0896061491936782e-05, "epoch": 0.9962995594713656, "percentage": 49.81, "elapsed_time": "4:09:23", "remaining_time": "4:11:20"} +{"current_steps": 2828, "total_steps": 5676, "loss": 0.542471170425415, "lr": 1.0890258382273333e-05, "epoch": 0.9966519823788547, "percentage": 49.82, "elapsed_time": "4:09:28", "remaining_time": "4:11:14"} +{"current_steps": 2829, "total_steps": 5676, "loss": 0.60117506980896, "lr": 1.0884454970394871e-05, "epoch": 0.9970044052863436, "percentage": 49.84, "elapsed_time": "4:09:33", "remaining_time": "4:11:08"} +{"current_steps": 2830, "total_steps": 5676, "loss": 0.6783676147460938, "lr": 1.0878651258271471e-05, "epoch": 0.9973568281938326, "percentage": 49.86, "elapsed_time": "4:09:37", "remaining_time": "4:11:02"} +{"current_steps": 2831, "total_steps": 5676, "loss": 0.7080766558647156, "lr": 1.0872847247873315e-05, "epoch": 0.9977092511013216, "percentage": 49.88, "elapsed_time": "4:09:44", "remaining_time": "4:10:58"} +{"current_steps": 2832, "total_steps": 5676, "loss": 0.9228106141090393, "lr": 1.0867042941170677e-05, "epoch": 0.9980616740088105, "percentage": 49.89, "elapsed_time": "4:09:50", "remaining_time": "4:10:53"} +{"current_steps": 2833, "total_steps": 5676, "loss": 0.7601282596588135, "lr": 1.086123834013395e-05, "epoch": 0.9984140969162996, "percentage": 49.91, "elapsed_time": "4:09:54", "remaining_time": "4:10:47"} +{"current_steps": 2834, "total_steps": 5676, "loss": 0.7101393342018127, "lr": 1.0855433446733607e-05, "epoch": 0.9987665198237885, "percentage": 49.93, "elapsed_time": "4:09:59", "remaining_time": "4:10:42"} +{"current_steps": 2835, "total_steps": 5676, "loss": 0.5006242394447327, "lr": 1.084962826294023e-05, "epoch": 0.9991189427312775, "percentage": 49.95, "elapsed_time": "4:10:05", "remaining_time": "4:10:36"} +{"current_steps": 2836, "total_steps": 5676, "loss": 0.7270148992538452, "lr": 1.08438227907245e-05, "epoch": 0.9994713656387665, "percentage": 49.96, "elapsed_time": "4:10:10", "remaining_time": "4:10:31"} +{"current_steps": 2837, "total_steps": 5676, "loss": 0.7252628803253174, "lr": 1.0838017032057194e-05, "epoch": 0.9998237885462555, "percentage": 49.98, "elapsed_time": "4:10:15", "remaining_time": "4:10:25"} +{"current_steps": 2838, "total_steps": 5676, "loss": 0.4579252004623413, "lr": 1.0832210988909187e-05, "epoch": 1.0, "percentage": 50.0, "elapsed_time": "4:10:17", "remaining_time": "4:10:17"} +{"current_steps": 2839, "total_steps": 5676, "loss": 0.635676920413971, "lr": 1.0826404663251446e-05, "epoch": 1.000352422907489, "percentage": 50.02, "elapsed_time": "4:10:23", "remaining_time": "4:10:13"} +{"current_steps": 2840, "total_steps": 5676, "loss": 0.6083015203475952, "lr": 1.0820598057055039e-05, "epoch": 1.0007048458149779, "percentage": 50.04, "elapsed_time": "4:10:28", "remaining_time": "4:10:07"} +{"current_steps": 2841, "total_steps": 5676, "loss": 0.5641704797744751, "lr": 1.0814791172291132e-05, "epoch": 1.001057268722467, "percentage": 50.05, "elapsed_time": "4:10:34", "remaining_time": "4:10:02"} +{"current_steps": 2842, "total_steps": 5676, "loss": 0.7668559551239014, "lr": 1.0808984010930981e-05, "epoch": 1.001409691629956, "percentage": 50.07, "elapsed_time": "4:10:40", "remaining_time": "4:09:58"} +{"current_steps": 2843, "total_steps": 5676, "loss": 0.5205796957015991, "lr": 1.0803176574945933e-05, "epoch": 1.001762114537445, "percentage": 50.09, "elapsed_time": "4:10:46", "remaining_time": "4:09:53"} +{"current_steps": 2844, "total_steps": 5676, "loss": 0.6771252155303955, "lr": 1.0797368866307431e-05, "epoch": 1.0021145374449338, "percentage": 50.11, "elapsed_time": "4:10:51", "remaining_time": "4:09:47"} +{"current_steps": 2845, "total_steps": 5676, "loss": 0.6101677417755127, "lr": 1.0791560886987016e-05, "epoch": 1.002466960352423, "percentage": 50.12, "elapsed_time": "4:10:56", "remaining_time": "4:09:41"} +{"current_steps": 2846, "total_steps": 5676, "loss": 0.5651522874832153, "lr": 1.0785752638956315e-05, "epoch": 1.002819383259912, "percentage": 50.14, "elapsed_time": "4:11:00", "remaining_time": "4:09:36"} +{"current_steps": 2847, "total_steps": 5676, "loss": 0.6814571619033813, "lr": 1.0779944124187048e-05, "epoch": 1.0031718061674009, "percentage": 50.16, "elapsed_time": "4:11:05", "remaining_time": "4:09:30"} +{"current_steps": 2848, "total_steps": 5676, "loss": 0.6786171197891235, "lr": 1.0774135344651023e-05, "epoch": 1.0035242290748898, "percentage": 50.18, "elapsed_time": "4:11:09", "remaining_time": "4:09:23"} +{"current_steps": 2849, "total_steps": 5676, "loss": 0.5244907736778259, "lr": 1.0768326302320136e-05, "epoch": 1.003876651982379, "percentage": 50.19, "elapsed_time": "4:11:14", "remaining_time": "4:09:18"} +{"current_steps": 2850, "total_steps": 5676, "loss": 0.6368712186813354, "lr": 1.0762516999166383e-05, "epoch": 1.004229074889868, "percentage": 50.21, "elapsed_time": "4:11:18", "remaining_time": "4:09:12"} +{"current_steps": 2851, "total_steps": 5676, "loss": 0.6389411687850952, "lr": 1.0756707437161841e-05, "epoch": 1.0045814977973568, "percentage": 50.23, "elapsed_time": "4:11:24", "remaining_time": "4:09:06"} +{"current_steps": 2852, "total_steps": 5676, "loss": 0.6257550716400146, "lr": 1.0750897618278675e-05, "epoch": 1.0049339207048458, "percentage": 50.25, "elapsed_time": "4:11:27", "remaining_time": "4:08:59"} +{"current_steps": 2853, "total_steps": 5676, "loss": 0.49478042125701904, "lr": 1.0745087544489132e-05, "epoch": 1.0052863436123347, "percentage": 50.26, "elapsed_time": "4:11:32", "remaining_time": "4:08:54"} +{"current_steps": 2854, "total_steps": 5676, "loss": 0.6350952386856079, "lr": 1.0739277217765558e-05, "epoch": 1.0056387665198239, "percentage": 50.28, "elapsed_time": "4:11:38", "remaining_time": "4:08:49"} +{"current_steps": 2855, "total_steps": 5676, "loss": 0.6057480573654175, "lr": 1.0733466640080374e-05, "epoch": 1.0059911894273128, "percentage": 50.3, "elapsed_time": "4:11:42", "remaining_time": "4:08:42"} +{"current_steps": 2856, "total_steps": 5676, "loss": 0.5545427799224854, "lr": 1.0727655813406094e-05, "epoch": 1.0063436123348017, "percentage": 50.32, "elapsed_time": "4:11:47", "remaining_time": "4:08:37"} +{"current_steps": 2857, "total_steps": 5676, "loss": 0.55484938621521, "lr": 1.0721844739715311e-05, "epoch": 1.0066960352422907, "percentage": 50.33, "elapsed_time": "4:11:53", "remaining_time": "4:08:32"} +{"current_steps": 2858, "total_steps": 5676, "loss": 0.6889834403991699, "lr": 1.0716033420980703e-05, "epoch": 1.0070484581497798, "percentage": 50.35, "elapsed_time": "4:11:58", "remaining_time": "4:08:27"} +{"current_steps": 2859, "total_steps": 5676, "loss": 0.7259023189544678, "lr": 1.0710221859175031e-05, "epoch": 1.0074008810572688, "percentage": 50.37, "elapsed_time": "4:12:04", "remaining_time": "4:08:22"} +{"current_steps": 2860, "total_steps": 5676, "loss": 0.6200032234191895, "lr": 1.0704410056271144e-05, "epoch": 1.0077533039647577, "percentage": 50.39, "elapsed_time": "4:12:09", "remaining_time": "4:08:16"} +{"current_steps": 2861, "total_steps": 5676, "loss": 0.5357909202575684, "lr": 1.069859801424196e-05, "epoch": 1.0081057268722466, "percentage": 50.41, "elapsed_time": "4:12:16", "remaining_time": "4:08:13"} +{"current_steps": 2862, "total_steps": 5676, "loss": 0.8121966123580933, "lr": 1.0692785735060495e-05, "epoch": 1.0084581497797356, "percentage": 50.42, "elapsed_time": "4:12:21", "remaining_time": "4:08:07"} +{"current_steps": 2863, "total_steps": 5676, "loss": 0.5698819160461426, "lr": 1.0686973220699834e-05, "epoch": 1.0088105726872247, "percentage": 50.44, "elapsed_time": "4:12:25", "remaining_time": "4:08:01"} +{"current_steps": 2864, "total_steps": 5676, "loss": 0.6598206162452698, "lr": 1.0681160473133144e-05, "epoch": 1.0091629955947137, "percentage": 50.46, "elapsed_time": "4:12:30", "remaining_time": "4:07:55"} +{"current_steps": 2865, "total_steps": 5676, "loss": 0.7574363946914673, "lr": 1.0675347494333667e-05, "epoch": 1.0095154185022026, "percentage": 50.48, "elapsed_time": "4:12:36", "remaining_time": "4:07:50"} +{"current_steps": 2866, "total_steps": 5676, "loss": 0.6749663949012756, "lr": 1.0669534286274737e-05, "epoch": 1.0098678414096915, "percentage": 50.49, "elapsed_time": "4:12:41", "remaining_time": "4:07:44"} +{"current_steps": 2867, "total_steps": 5676, "loss": 0.5932409763336182, "lr": 1.0663720850929753e-05, "epoch": 1.0102202643171807, "percentage": 50.51, "elapsed_time": "4:12:46", "remaining_time": "4:07:39"} +{"current_steps": 2868, "total_steps": 5676, "loss": 0.7070773839950562, "lr": 1.0657907190272197e-05, "epoch": 1.0105726872246696, "percentage": 50.53, "elapsed_time": "4:12:52", "remaining_time": "4:07:35"} +{"current_steps": 2869, "total_steps": 5676, "loss": 0.531635582447052, "lr": 1.0652093306275621e-05, "epoch": 1.0109251101321586, "percentage": 50.55, "elapsed_time": "4:12:57", "remaining_time": "4:07:29"} +{"current_steps": 2870, "total_steps": 5676, "loss": 0.5966447591781616, "lr": 1.0646279200913665e-05, "epoch": 1.0112775330396475, "percentage": 50.56, "elapsed_time": "4:13:03", "remaining_time": "4:07:24"} +{"current_steps": 2871, "total_steps": 5676, "loss": 0.6308450698852539, "lr": 1.0640464876160033e-05, "epoch": 1.0116299559471367, "percentage": 50.58, "elapsed_time": "4:13:09", "remaining_time": "4:07:20"} +{"current_steps": 2872, "total_steps": 5676, "loss": 0.6477035284042358, "lr": 1.0634650333988508e-05, "epoch": 1.0119823788546256, "percentage": 50.6, "elapsed_time": "4:13:13", "remaining_time": "4:07:14"} +{"current_steps": 2873, "total_steps": 5676, "loss": 0.5856079459190369, "lr": 1.0628835576372942e-05, "epoch": 1.0123348017621145, "percentage": 50.62, "elapsed_time": "4:13:20", "remaining_time": "4:07:09"} +{"current_steps": 2874, "total_steps": 5676, "loss": 0.733691930770874, "lr": 1.062302060528727e-05, "epoch": 1.0126872246696035, "percentage": 50.63, "elapsed_time": "4:13:25", "remaining_time": "4:07:04"} +{"current_steps": 2875, "total_steps": 5676, "loss": 0.6020156145095825, "lr": 1.0617205422705495e-05, "epoch": 1.0130396475770924, "percentage": 50.65, "elapsed_time": "4:13:31", "remaining_time": "4:07:00"} +{"current_steps": 2876, "total_steps": 5676, "loss": 0.4980982542037964, "lr": 1.0611390030601685e-05, "epoch": 1.0133920704845816, "percentage": 50.67, "elapsed_time": "4:13:37", "remaining_time": "4:06:55"} +{"current_steps": 2877, "total_steps": 5676, "loss": 0.6498349905014038, "lr": 1.0605574430949983e-05, "epoch": 1.0137444933920705, "percentage": 50.69, "elapsed_time": "4:13:43", "remaining_time": "4:06:50"} +{"current_steps": 2878, "total_steps": 5676, "loss": 0.6456383466720581, "lr": 1.0599758625724612e-05, "epoch": 1.0140969162995594, "percentage": 50.7, "elapsed_time": "4:13:50", "remaining_time": "4:06:47"} +{"current_steps": 2879, "total_steps": 5676, "loss": 0.6047386527061462, "lr": 1.059394261689985e-05, "epoch": 1.0144493392070484, "percentage": 50.72, "elapsed_time": "4:13:57", "remaining_time": "4:06:43"} +{"current_steps": 2880, "total_steps": 5676, "loss": 0.641674816608429, "lr": 1.0588126406450056e-05, "epoch": 1.0148017621145375, "percentage": 50.74, "elapsed_time": "4:14:02", "remaining_time": "4:06:37"} +{"current_steps": 2881, "total_steps": 5676, "loss": 0.6157702207565308, "lr": 1.0582309996349648e-05, "epoch": 1.0151541850220265, "percentage": 50.76, "elapsed_time": "4:14:06", "remaining_time": "4:06:31"} +{"current_steps": 2882, "total_steps": 5676, "loss": 0.6004809737205505, "lr": 1.057649338857312e-05, "epoch": 1.0155066079295154, "percentage": 50.78, "elapsed_time": "4:14:12", "remaining_time": "4:06:26"} +{"current_steps": 2883, "total_steps": 5676, "loss": 0.5534430742263794, "lr": 1.0570676585095028e-05, "epoch": 1.0158590308370044, "percentage": 50.79, "elapsed_time": "4:14:16", "remaining_time": "4:06:20"} +{"current_steps": 2884, "total_steps": 5676, "loss": 0.7781813144683838, "lr": 1.0564859587889997e-05, "epoch": 1.0162114537444933, "percentage": 50.81, "elapsed_time": "4:14:21", "remaining_time": "4:06:14"} +{"current_steps": 2885, "total_steps": 5676, "loss": 0.6949760913848877, "lr": 1.0559042398932713e-05, "epoch": 1.0165638766519824, "percentage": 50.83, "elapsed_time": "4:14:26", "remaining_time": "4:06:08"} +{"current_steps": 2886, "total_steps": 5676, "loss": 0.5718453526496887, "lr": 1.0553225020197932e-05, "epoch": 1.0169162995594714, "percentage": 50.85, "elapsed_time": "4:14:32", "remaining_time": "4:06:04"} +{"current_steps": 2887, "total_steps": 5676, "loss": 0.6689345836639404, "lr": 1.0547407453660471e-05, "epoch": 1.0172687224669603, "percentage": 50.86, "elapsed_time": "4:14:38", "remaining_time": "4:05:59"} +{"current_steps": 2888, "total_steps": 5676, "loss": 0.6615442037582397, "lr": 1.0541589701295222e-05, "epoch": 1.0176211453744493, "percentage": 50.88, "elapsed_time": "4:14:43", "remaining_time": "4:05:54"} +{"current_steps": 2889, "total_steps": 5676, "loss": 0.6458337306976318, "lr": 1.0535771765077121e-05, "epoch": 1.0179735682819384, "percentage": 50.9, "elapsed_time": "4:14:48", "remaining_time": "4:05:49"} +{"current_steps": 2890, "total_steps": 5676, "loss": 0.5330519676208496, "lr": 1.052995364698118e-05, "epoch": 1.0183259911894273, "percentage": 50.92, "elapsed_time": "4:14:52", "remaining_time": "4:05:42"} +{"current_steps": 2891, "total_steps": 5676, "loss": 0.6219571232795715, "lr": 1.0524135348982467e-05, "epoch": 1.0186784140969163, "percentage": 50.93, "elapsed_time": "4:14:59", "remaining_time": "4:05:38"} +{"current_steps": 2892, "total_steps": 5676, "loss": 0.6731684803962708, "lr": 1.0518316873056118e-05, "epoch": 1.0190308370044052, "percentage": 50.95, "elapsed_time": "4:15:05", "remaining_time": "4:05:34"} +{"current_steps": 2893, "total_steps": 5676, "loss": 0.6126813888549805, "lr": 1.0512498221177319e-05, "epoch": 1.0193832599118944, "percentage": 50.97, "elapsed_time": "4:15:10", "remaining_time": "4:05:28"} +{"current_steps": 2894, "total_steps": 5676, "loss": 0.5796904563903809, "lr": 1.0506679395321325e-05, "epoch": 1.0197356828193833, "percentage": 50.99, "elapsed_time": "4:15:15", "remaining_time": "4:05:22"} +{"current_steps": 2895, "total_steps": 5676, "loss": 0.5765914916992188, "lr": 1.050086039746344e-05, "epoch": 1.0200881057268723, "percentage": 51.0, "elapsed_time": "4:15:20", "remaining_time": "4:05:16"} +{"current_steps": 2896, "total_steps": 5676, "loss": 0.4798969328403473, "lr": 1.0495041229579043e-05, "epoch": 1.0204405286343612, "percentage": 51.02, "elapsed_time": "4:15:25", "remaining_time": "4:05:11"} +{"current_steps": 2897, "total_steps": 5676, "loss": 0.673927366733551, "lr": 1.0489221893643553e-05, "epoch": 1.0207929515418501, "percentage": 51.04, "elapsed_time": "4:15:31", "remaining_time": "4:05:06"} +{"current_steps": 2898, "total_steps": 5676, "loss": 0.5681431293487549, "lr": 1.0483402391632453e-05, "epoch": 1.0211453744493393, "percentage": 51.06, "elapsed_time": "4:15:35", "remaining_time": "4:05:00"} +{"current_steps": 2899, "total_steps": 5676, "loss": 0.6156354546546936, "lr": 1.0477582725521287e-05, "epoch": 1.0214977973568282, "percentage": 51.07, "elapsed_time": "4:15:41", "remaining_time": "4:04:55"} +{"current_steps": 2900, "total_steps": 5676, "loss": 0.6569045782089233, "lr": 1.0471762897285652e-05, "epoch": 1.0218502202643172, "percentage": 51.09, "elapsed_time": "4:15:47", "remaining_time": "4:04:51"} +{"current_steps": 2901, "total_steps": 5676, "loss": 0.6125048995018005, "lr": 1.046594290890119e-05, "epoch": 1.022202643171806, "percentage": 51.11, "elapsed_time": "4:15:58", "remaining_time": "4:04:51"} +{"current_steps": 2902, "total_steps": 5676, "loss": 0.604046106338501, "lr": 1.0460122762343614e-05, "epoch": 1.0225550660792952, "percentage": 51.13, "elapsed_time": "4:16:03", "remaining_time": "4:04:45"} +{"current_steps": 2903, "total_steps": 5676, "loss": 0.4569816589355469, "lr": 1.0454302459588677e-05, "epoch": 1.0229074889867842, "percentage": 51.15, "elapsed_time": "4:16:08", "remaining_time": "4:04:40"} +{"current_steps": 2904, "total_steps": 5676, "loss": 0.5764607787132263, "lr": 1.0448482002612194e-05, "epoch": 1.0232599118942731, "percentage": 51.16, "elapsed_time": "4:16:13", "remaining_time": "4:04:34"} +{"current_steps": 2905, "total_steps": 5676, "loss": 0.5859626531600952, "lr": 1.044266139339003e-05, "epoch": 1.023612334801762, "percentage": 51.18, "elapsed_time": "4:16:18", "remaining_time": "4:04:29"} +{"current_steps": 2906, "total_steps": 5676, "loss": 0.7326341271400452, "lr": 1.04368406338981e-05, "epoch": 1.023964757709251, "percentage": 51.2, "elapsed_time": "4:16:23", "remaining_time": "4:04:23"} +{"current_steps": 2907, "total_steps": 5676, "loss": 0.6355161070823669, "lr": 1.0431019726112366e-05, "epoch": 1.0243171806167402, "percentage": 51.22, "elapsed_time": "4:16:28", "remaining_time": "4:04:17"} +{"current_steps": 2908, "total_steps": 5676, "loss": 0.6990653872489929, "lr": 1.0425198672008851e-05, "epoch": 1.024669603524229, "percentage": 51.23, "elapsed_time": "4:16:34", "remaining_time": "4:04:13"} +{"current_steps": 2909, "total_steps": 5676, "loss": 0.631952166557312, "lr": 1.0419377473563621e-05, "epoch": 1.025022026431718, "percentage": 51.25, "elapsed_time": "4:16:39", "remaining_time": "4:04:07"} +{"current_steps": 2910, "total_steps": 5676, "loss": 0.4872596561908722, "lr": 1.041355613275279e-05, "epoch": 1.025374449339207, "percentage": 51.27, "elapsed_time": "4:16:44", "remaining_time": "4:04:02"} +{"current_steps": 2911, "total_steps": 5676, "loss": 0.5334043502807617, "lr": 1.0407734651552522e-05, "epoch": 1.0257268722466961, "percentage": 51.29, "elapsed_time": "4:16:48", "remaining_time": "4:03:56"} +{"current_steps": 2912, "total_steps": 5676, "loss": 0.5971134305000305, "lr": 1.0401913031939026e-05, "epoch": 1.026079295154185, "percentage": 51.3, "elapsed_time": "4:16:53", "remaining_time": "4:03:49"} +{"current_steps": 2913, "total_steps": 5676, "loss": 0.6527851819992065, "lr": 1.0396091275888567e-05, "epoch": 1.026431718061674, "percentage": 51.32, "elapsed_time": "4:16:59", "remaining_time": "4:03:45"} +{"current_steps": 2914, "total_steps": 5676, "loss": 0.4515818953514099, "lr": 1.0390269385377444e-05, "epoch": 1.026784140969163, "percentage": 51.34, "elapsed_time": "4:17:04", "remaining_time": "4:03:39"} +{"current_steps": 2915, "total_steps": 5676, "loss": 0.530797004699707, "lr": 1.0384447362382013e-05, "epoch": 1.027136563876652, "percentage": 51.36, "elapsed_time": "4:17:09", "remaining_time": "4:03:34"} +{"current_steps": 2916, "total_steps": 5676, "loss": 0.5477641224861145, "lr": 1.0378625208878666e-05, "epoch": 1.027488986784141, "percentage": 51.37, "elapsed_time": "4:17:14", "remaining_time": "4:03:28"} +{"current_steps": 2917, "total_steps": 5676, "loss": 0.6390479207038879, "lr": 1.0372802926843843e-05, "epoch": 1.02784140969163, "percentage": 51.39, "elapsed_time": "4:17:18", "remaining_time": "4:03:22"} +{"current_steps": 2918, "total_steps": 5676, "loss": 0.610755443572998, "lr": 1.0366980518254028e-05, "epoch": 1.028193832599119, "percentage": 51.41, "elapsed_time": "4:17:24", "remaining_time": "4:03:17"} +{"current_steps": 2919, "total_steps": 5676, "loss": 0.5427766442298889, "lr": 1.036115798508575e-05, "epoch": 1.0285462555066078, "percentage": 51.43, "elapsed_time": "4:17:30", "remaining_time": "4:03:13"} +{"current_steps": 2920, "total_steps": 5676, "loss": 0.621055006980896, "lr": 1.0355335329315573e-05, "epoch": 1.028898678414097, "percentage": 51.44, "elapsed_time": "4:17:35", "remaining_time": "4:03:07"} +{"current_steps": 2921, "total_steps": 5676, "loss": 0.6098253726959229, "lr": 1.0349512552920114e-05, "epoch": 1.029251101321586, "percentage": 51.46, "elapsed_time": "4:17:40", "remaining_time": "4:03:01"} +{"current_steps": 2922, "total_steps": 5676, "loss": 0.591664731502533, "lr": 1.0343689657876017e-05, "epoch": 1.0296035242290749, "percentage": 51.48, "elapsed_time": "4:17:45", "remaining_time": "4:02:56"} +{"current_steps": 2923, "total_steps": 5676, "loss": 0.6531485915184021, "lr": 1.033786664615998e-05, "epoch": 1.0299559471365638, "percentage": 51.5, "elapsed_time": "4:17:50", "remaining_time": "4:02:50"} +{"current_steps": 2924, "total_steps": 5676, "loss": 0.6933655738830566, "lr": 1.0332043519748727e-05, "epoch": 1.030308370044053, "percentage": 51.52, "elapsed_time": "4:17:57", "remaining_time": "4:02:46"} +{"current_steps": 2925, "total_steps": 5676, "loss": 0.6512705087661743, "lr": 1.0326220280619036e-05, "epoch": 1.030660792951542, "percentage": 51.53, "elapsed_time": "4:18:02", "remaining_time": "4:02:41"} +{"current_steps": 2926, "total_steps": 5676, "loss": 0.5671502947807312, "lr": 1.0320396930747712e-05, "epoch": 1.0310132158590308, "percentage": 51.55, "elapsed_time": "4:18:09", "remaining_time": "4:02:37"} +{"current_steps": 2927, "total_steps": 5676, "loss": 0.6795192360877991, "lr": 1.0314573472111601e-05, "epoch": 1.0313656387665198, "percentage": 51.57, "elapsed_time": "4:18:13", "remaining_time": "4:02:31"} +{"current_steps": 2928, "total_steps": 5676, "loss": 0.6357578039169312, "lr": 1.0308749906687585e-05, "epoch": 1.0317180616740087, "percentage": 51.59, "elapsed_time": "4:18:18", "remaining_time": "4:02:25"} +{"current_steps": 2929, "total_steps": 5676, "loss": 0.7009944915771484, "lr": 1.0302926236452588e-05, "epoch": 1.0320704845814979, "percentage": 51.6, "elapsed_time": "4:18:23", "remaining_time": "4:02:20"} +{"current_steps": 2930, "total_steps": 5676, "loss": 0.4685679078102112, "lr": 1.0297102463383557e-05, "epoch": 1.0324229074889868, "percentage": 51.62, "elapsed_time": "4:18:30", "remaining_time": "4:02:16"} +{"current_steps": 2931, "total_steps": 5676, "loss": 0.6359078884124756, "lr": 1.0291278589457488e-05, "epoch": 1.0327753303964757, "percentage": 51.64, "elapsed_time": "4:18:38", "remaining_time": "4:02:13"} +{"current_steps": 2932, "total_steps": 5676, "loss": 0.6606266498565674, "lr": 1.0285454616651398e-05, "epoch": 1.0331277533039647, "percentage": 51.66, "elapsed_time": "4:18:43", "remaining_time": "4:02:07"} +{"current_steps": 2933, "total_steps": 5676, "loss": 0.5405932664871216, "lr": 1.0279630546942353e-05, "epoch": 1.0334801762114538, "percentage": 51.67, "elapsed_time": "4:18:46", "remaining_time": "4:02:01"} +{"current_steps": 2934, "total_steps": 5676, "loss": 0.8072758316993713, "lr": 1.0273806382307443e-05, "epoch": 1.0338325991189428, "percentage": 51.69, "elapsed_time": "4:18:52", "remaining_time": "4:01:56"} +{"current_steps": 2935, "total_steps": 5676, "loss": 0.6923058032989502, "lr": 1.0267982124723783e-05, "epoch": 1.0341850220264317, "percentage": 51.71, "elapsed_time": "4:18:57", "remaining_time": "4:01:50"} +{"current_steps": 2936, "total_steps": 5676, "loss": 0.5577275156974792, "lr": 1.0262157776168533e-05, "epoch": 1.0345374449339206, "percentage": 51.73, "elapsed_time": "4:19:03", "remaining_time": "4:01:45"} +{"current_steps": 2937, "total_steps": 5676, "loss": 0.6780786514282227, "lr": 1.0256333338618875e-05, "epoch": 1.0348898678414098, "percentage": 51.74, "elapsed_time": "4:19:09", "remaining_time": "4:01:41"} +{"current_steps": 2938, "total_steps": 5676, "loss": 0.6966040134429932, "lr": 1.0250508814052029e-05, "epoch": 1.0352422907488987, "percentage": 51.76, "elapsed_time": "4:19:14", "remaining_time": "4:01:35"} +{"current_steps": 2939, "total_steps": 5676, "loss": 0.5726339817047119, "lr": 1.0244684204445237e-05, "epoch": 1.0355947136563877, "percentage": 51.78, "elapsed_time": "4:19:18", "remaining_time": "4:01:29"} +{"current_steps": 2940, "total_steps": 5676, "loss": 0.64924156665802, "lr": 1.0238859511775768e-05, "epoch": 1.0359471365638766, "percentage": 51.8, "elapsed_time": "4:19:25", "remaining_time": "4:01:25"} +{"current_steps": 2941, "total_steps": 5676, "loss": 0.49121707677841187, "lr": 1.0233034738020933e-05, "epoch": 1.0362995594713655, "percentage": 51.81, "elapsed_time": "4:19:30", "remaining_time": "4:01:19"} +{"current_steps": 2942, "total_steps": 5676, "loss": 0.5505814552307129, "lr": 1.0227209885158053e-05, "epoch": 1.0366519823788547, "percentage": 51.83, "elapsed_time": "4:19:36", "remaining_time": "4:01:14"} +{"current_steps": 2943, "total_steps": 5676, "loss": 0.7429872751235962, "lr": 1.022138495516449e-05, "epoch": 1.0370044052863436, "percentage": 51.85, "elapsed_time": "4:19:41", "remaining_time": "4:01:09"} +{"current_steps": 2944, "total_steps": 5676, "loss": 0.6492434740066528, "lr": 1.0215559950017624e-05, "epoch": 1.0373568281938326, "percentage": 51.87, "elapsed_time": "4:19:45", "remaining_time": "4:01:03"} +{"current_steps": 2945, "total_steps": 5676, "loss": 0.5418736338615417, "lr": 1.0209734871694865e-05, "epoch": 1.0377092511013215, "percentage": 51.89, "elapsed_time": "4:19:51", "remaining_time": "4:00:58"} +{"current_steps": 2946, "total_steps": 5676, "loss": 0.6252620220184326, "lr": 1.0203909722173644e-05, "epoch": 1.0380616740088107, "percentage": 51.9, "elapsed_time": "4:19:57", "remaining_time": "4:00:53"} +{"current_steps": 2947, "total_steps": 5676, "loss": 0.5124455690383911, "lr": 1.0198084503431416e-05, "epoch": 1.0384140969162996, "percentage": 51.92, "elapsed_time": "4:20:02", "remaining_time": "4:00:48"} +{"current_steps": 2948, "total_steps": 5676, "loss": 0.5729688405990601, "lr": 1.0192259217445663e-05, "epoch": 1.0387665198237885, "percentage": 51.94, "elapsed_time": "4:20:07", "remaining_time": "4:00:43"} +{"current_steps": 2949, "total_steps": 5676, "loss": 0.5891536474227905, "lr": 1.0186433866193893e-05, "epoch": 1.0391189427312775, "percentage": 51.96, "elapsed_time": "4:20:12", "remaining_time": "4:00:37"} +{"current_steps": 2950, "total_steps": 5676, "loss": 0.774397075176239, "lr": 1.0180608451653626e-05, "epoch": 1.0394713656387666, "percentage": 51.97, "elapsed_time": "4:20:17", "remaining_time": "4:00:31"} +{"current_steps": 2951, "total_steps": 5676, "loss": 0.5987098813056946, "lr": 1.0174782975802408e-05, "epoch": 1.0398237885462556, "percentage": 51.99, "elapsed_time": "4:20:22", "remaining_time": "4:00:26"} +{"current_steps": 2952, "total_steps": 5676, "loss": 0.5334598422050476, "lr": 1.016895744061781e-05, "epoch": 1.0401762114537445, "percentage": 52.01, "elapsed_time": "4:20:26", "remaining_time": "4:00:19"} +{"current_steps": 2953, "total_steps": 5676, "loss": 0.5946340560913086, "lr": 1.0163131848077421e-05, "epoch": 1.0405286343612334, "percentage": 52.03, "elapsed_time": "4:20:31", "remaining_time": "4:00:14"} +{"current_steps": 2954, "total_steps": 5676, "loss": 0.5780941247940063, "lr": 1.0157306200158847e-05, "epoch": 1.0408810572687224, "percentage": 52.04, "elapsed_time": "4:20:37", "remaining_time": "4:00:09"} +{"current_steps": 2955, "total_steps": 5676, "loss": 0.6348963975906372, "lr": 1.0151480498839712e-05, "epoch": 1.0412334801762115, "percentage": 52.06, "elapsed_time": "4:20:42", "remaining_time": "4:00:04"} +{"current_steps": 2956, "total_steps": 5676, "loss": 0.6868102550506592, "lr": 1.014565474609766e-05, "epoch": 1.0415859030837005, "percentage": 52.08, "elapsed_time": "4:20:47", "remaining_time": "3:59:57"} +{"current_steps": 2957, "total_steps": 5676, "loss": 0.6507548689842224, "lr": 1.0139828943910358e-05, "epoch": 1.0419383259911894, "percentage": 52.1, "elapsed_time": "4:20:52", "remaining_time": "3:59:52"} +{"current_steps": 2958, "total_steps": 5676, "loss": 0.6358312964439392, "lr": 1.0134003094255478e-05, "epoch": 1.0422907488986783, "percentage": 52.11, "elapsed_time": "4:20:57", "remaining_time": "3:59:47"} +{"current_steps": 2959, "total_steps": 5676, "loss": 0.7530224919319153, "lr": 1.0128177199110723e-05, "epoch": 1.0426431718061675, "percentage": 52.13, "elapsed_time": "4:21:01", "remaining_time": "3:59:41"} +{"current_steps": 2960, "total_steps": 5676, "loss": 0.545819878578186, "lr": 1.012235126045379e-05, "epoch": 1.0429955947136564, "percentage": 52.15, "elapsed_time": "4:21:05", "remaining_time": "3:59:34"} +{"current_steps": 2961, "total_steps": 5676, "loss": 0.6626788377761841, "lr": 1.011652528026242e-05, "epoch": 1.0433480176211454, "percentage": 52.17, "elapsed_time": "4:21:11", "remaining_time": "3:59:29"} +{"current_steps": 2962, "total_steps": 5676, "loss": 0.6407896280288696, "lr": 1.0110699260514336e-05, "epoch": 1.0437004405286343, "percentage": 52.18, "elapsed_time": "4:21:16", "remaining_time": "3:59:24"} +{"current_steps": 2963, "total_steps": 5676, "loss": 0.5633673667907715, "lr": 1.0104873203187307e-05, "epoch": 1.0440528634361232, "percentage": 52.2, "elapsed_time": "4:21:22", "remaining_time": "3:59:19"} +{"current_steps": 2964, "total_steps": 5676, "loss": 0.5356892943382263, "lr": 1.0099047110259081e-05, "epoch": 1.0444052863436124, "percentage": 52.22, "elapsed_time": "4:21:28", "remaining_time": "3:59:14"} +{"current_steps": 2965, "total_steps": 5676, "loss": 0.5527205467224121, "lr": 1.0093220983707448e-05, "epoch": 1.0447577092511013, "percentage": 52.24, "elapsed_time": "4:21:34", "remaining_time": "3:59:09"} +{"current_steps": 2966, "total_steps": 5676, "loss": 0.6148320436477661, "lr": 1.008739482551019e-05, "epoch": 1.0451101321585903, "percentage": 52.26, "elapsed_time": "4:21:38", "remaining_time": "3:59:03"} +{"current_steps": 2967, "total_steps": 5676, "loss": 0.5713976621627808, "lr": 1.0081568637645111e-05, "epoch": 1.0454625550660792, "percentage": 52.27, "elapsed_time": "4:21:43", "remaining_time": "3:58:57"} +{"current_steps": 2968, "total_steps": 5676, "loss": 0.5836226940155029, "lr": 1.0075742422090015e-05, "epoch": 1.0458149779735684, "percentage": 52.29, "elapsed_time": "4:21:48", "remaining_time": "3:58:51"} +{"current_steps": 2969, "total_steps": 5676, "loss": 0.6452749371528625, "lr": 1.0069916180822727e-05, "epoch": 1.0461674008810573, "percentage": 52.31, "elapsed_time": "4:21:52", "remaining_time": "3:58:45"} +{"current_steps": 2970, "total_steps": 5676, "loss": 0.5468501448631287, "lr": 1.006408991582107e-05, "epoch": 1.0465198237885462, "percentage": 52.33, "elapsed_time": "4:21:59", "remaining_time": "3:58:41"} +{"current_steps": 2971, "total_steps": 5676, "loss": 0.5195704698562622, "lr": 1.0058263629062883e-05, "epoch": 1.0468722466960352, "percentage": 52.34, "elapsed_time": "4:22:04", "remaining_time": "3:58:36"} +{"current_steps": 2972, "total_steps": 5676, "loss": 0.5144641995429993, "lr": 1.0052437322526003e-05, "epoch": 1.0472246696035241, "percentage": 52.36, "elapsed_time": "4:22:10", "remaining_time": "3:58:31"} +{"current_steps": 2973, "total_steps": 5676, "loss": 0.7258927822113037, "lr": 1.004661099818829e-05, "epoch": 1.0475770925110133, "percentage": 52.38, "elapsed_time": "4:22:14", "remaining_time": "3:58:25"} +{"current_steps": 2974, "total_steps": 5676, "loss": 0.6108053922653198, "lr": 1.004078465802759e-05, "epoch": 1.0479295154185022, "percentage": 52.4, "elapsed_time": "4:22:18", "remaining_time": "3:58:18"} +{"current_steps": 2975, "total_steps": 5676, "loss": 0.612535834312439, "lr": 1.0034958304021766e-05, "epoch": 1.0482819383259911, "percentage": 52.41, "elapsed_time": "4:22:23", "remaining_time": "3:58:13"} +{"current_steps": 2976, "total_steps": 5676, "loss": 0.7272380590438843, "lr": 1.0029131938148686e-05, "epoch": 1.04863436123348, "percentage": 52.43, "elapsed_time": "4:22:28", "remaining_time": "3:58:08"} +{"current_steps": 2977, "total_steps": 5676, "loss": 0.4748264253139496, "lr": 1.0023305562386222e-05, "epoch": 1.0489867841409692, "percentage": 52.45, "elapsed_time": "4:22:34", "remaining_time": "3:58:03"} +{"current_steps": 2978, "total_steps": 5676, "loss": 0.6686758399009705, "lr": 1.0017479178712245e-05, "epoch": 1.0493392070484582, "percentage": 52.47, "elapsed_time": "4:22:39", "remaining_time": "3:57:57"} +{"current_steps": 2979, "total_steps": 5676, "loss": 0.5003838539123535, "lr": 1.0011652789104631e-05, "epoch": 1.0496916299559471, "percentage": 52.48, "elapsed_time": "4:22:44", "remaining_time": "3:57:51"} +{"current_steps": 2980, "total_steps": 5676, "loss": 0.6210055351257324, "lr": 1.0005826395541257e-05, "epoch": 1.050044052863436, "percentage": 52.5, "elapsed_time": "4:22:49", "remaining_time": "3:57:46"} +{"current_steps": 2981, "total_steps": 5676, "loss": 0.6160269975662231, "lr": 1e-05, "epoch": 1.0503964757709252, "percentage": 52.52, "elapsed_time": "4:22:54", "remaining_time": "3:57:40"} +{"current_steps": 2982, "total_steps": 5676, "loss": 0.6432052850723267, "lr": 9.994173604458748e-06, "epoch": 1.0507488986784141, "percentage": 52.54, "elapsed_time": "4:22:59", "remaining_time": "3:57:35"} +{"current_steps": 2983, "total_steps": 5676, "loss": 0.588628888130188, "lr": 9.988347210895372e-06, "epoch": 1.051101321585903, "percentage": 52.55, "elapsed_time": "4:23:04", "remaining_time": "3:57:29"} +{"current_steps": 2984, "total_steps": 5676, "loss": 0.6694320440292358, "lr": 9.982520821287758e-06, "epoch": 1.051453744493392, "percentage": 52.57, "elapsed_time": "4:23:09", "remaining_time": "3:57:24"} +{"current_steps": 2985, "total_steps": 5676, "loss": 0.8591301441192627, "lr": 9.976694437613778e-06, "epoch": 1.051806167400881, "percentage": 52.59, "elapsed_time": "4:23:14", "remaining_time": "3:57:18"} +{"current_steps": 2986, "total_steps": 5676, "loss": 0.6000436544418335, "lr": 9.970868061851315e-06, "epoch": 1.0521585903083701, "percentage": 52.61, "elapsed_time": "4:23:18", "remaining_time": "3:57:12"} +{"current_steps": 2987, "total_steps": 5676, "loss": 0.624568521976471, "lr": 9.965041695978239e-06, "epoch": 1.052511013215859, "percentage": 52.63, "elapsed_time": "4:23:24", "remaining_time": "3:57:07"} +{"current_steps": 2988, "total_steps": 5676, "loss": 0.6173535585403442, "lr": 9.959215341972414e-06, "epoch": 1.052863436123348, "percentage": 52.64, "elapsed_time": "4:23:30", "remaining_time": "3:57:03"} +{"current_steps": 2989, "total_steps": 5676, "loss": 0.5991729497909546, "lr": 9.953389001811716e-06, "epoch": 1.053215859030837, "percentage": 52.66, "elapsed_time": "4:23:36", "remaining_time": "3:56:58"} +{"current_steps": 2990, "total_steps": 5676, "loss": 0.570953905582428, "lr": 9.947562677473999e-06, "epoch": 1.053568281938326, "percentage": 52.68, "elapsed_time": "4:23:39", "remaining_time": "3:56:51"} +{"current_steps": 2991, "total_steps": 5676, "loss": 0.6079390048980713, "lr": 9.941736370937119e-06, "epoch": 1.053920704845815, "percentage": 52.7, "elapsed_time": "4:23:45", "remaining_time": "3:56:46"} +{"current_steps": 2992, "total_steps": 5676, "loss": 0.599539577960968, "lr": 9.935910084178934e-06, "epoch": 1.054273127753304, "percentage": 52.71, "elapsed_time": "4:23:50", "remaining_time": "3:56:40"} +{"current_steps": 2993, "total_steps": 5676, "loss": 0.6736180186271667, "lr": 9.930083819177273e-06, "epoch": 1.0546255506607929, "percentage": 52.73, "elapsed_time": "4:23:55", "remaining_time": "3:56:35"} +{"current_steps": 2994, "total_steps": 5676, "loss": 0.6953197717666626, "lr": 9.924257577909987e-06, "epoch": 1.054977973568282, "percentage": 52.75, "elapsed_time": "4:24:01", "remaining_time": "3:56:30"} +{"current_steps": 2995, "total_steps": 5676, "loss": 0.6670099496841431, "lr": 9.918431362354892e-06, "epoch": 1.055330396475771, "percentage": 52.77, "elapsed_time": "4:24:05", "remaining_time": "3:56:24"} +{"current_steps": 2996, "total_steps": 5676, "loss": 0.5829994678497314, "lr": 9.912605174489811e-06, "epoch": 1.05568281938326, "percentage": 52.78, "elapsed_time": "4:24:10", "remaining_time": "3:56:18"} +{"current_steps": 2997, "total_steps": 5676, "loss": 0.5926212072372437, "lr": 9.906779016292554e-06, "epoch": 1.0560352422907489, "percentage": 52.8, "elapsed_time": "4:24:15", "remaining_time": "3:56:13"} +{"current_steps": 2998, "total_steps": 5676, "loss": 0.6085237860679626, "lr": 9.900952889740922e-06, "epoch": 1.0563876651982378, "percentage": 52.82, "elapsed_time": "4:24:21", "remaining_time": "3:56:08"} +{"current_steps": 2999, "total_steps": 5676, "loss": 0.5348918437957764, "lr": 9.895126796812698e-06, "epoch": 1.056740088105727, "percentage": 52.84, "elapsed_time": "4:24:26", "remaining_time": "3:56:02"} +{"current_steps": 3000, "total_steps": 5676, "loss": 0.6325811743736267, "lr": 9.889300739485666e-06, "epoch": 1.0570925110132159, "percentage": 52.85, "elapsed_time": "4:24:31", "remaining_time": "3:55:57"} +{"current_steps": 3001, "total_steps": 5676, "loss": 0.6262463927268982, "lr": 9.883474719737582e-06, "epoch": 1.0574449339207048, "percentage": 52.87, "elapsed_time": "4:24:42", "remaining_time": "3:55:57"} +{"current_steps": 3002, "total_steps": 5676, "loss": 0.5863393545150757, "lr": 9.877648739546213e-06, "epoch": 1.0577973568281938, "percentage": 52.89, "elapsed_time": "4:24:49", "remaining_time": "3:55:52"} +{"current_steps": 3003, "total_steps": 5676, "loss": 0.6200219392776489, "lr": 9.871822800889284e-06, "epoch": 1.058149779735683, "percentage": 52.91, "elapsed_time": "4:24:55", "remaining_time": "3:55:48"} +{"current_steps": 3004, "total_steps": 5676, "loss": 0.6994227170944214, "lr": 9.865996905744523e-06, "epoch": 1.0585022026431719, "percentage": 52.92, "elapsed_time": "4:25:01", "remaining_time": "3:55:43"} +{"current_steps": 3005, "total_steps": 5676, "loss": 0.6458406448364258, "lr": 9.860171056089646e-06, "epoch": 1.0588546255506608, "percentage": 52.94, "elapsed_time": "4:25:05", "remaining_time": "3:55:37"} +{"current_steps": 3006, "total_steps": 5676, "loss": 0.6814782619476318, "lr": 9.854345253902342e-06, "epoch": 1.0592070484581497, "percentage": 52.96, "elapsed_time": "4:25:10", "remaining_time": "3:55:31"} +{"current_steps": 3007, "total_steps": 5676, "loss": 0.521275520324707, "lr": 9.84851950116029e-06, "epoch": 1.0595594713656387, "percentage": 52.98, "elapsed_time": "4:25:15", "remaining_time": "3:55:26"} +{"current_steps": 3008, "total_steps": 5676, "loss": 0.5541207790374756, "lr": 9.84269379984116e-06, "epoch": 1.0599118942731278, "percentage": 53.0, "elapsed_time": "4:25:20", "remaining_time": "3:55:20"} +{"current_steps": 3009, "total_steps": 5676, "loss": 0.578704833984375, "lr": 9.836868151922579e-06, "epoch": 1.0602643171806168, "percentage": 53.01, "elapsed_time": "4:25:25", "remaining_time": "3:55:15"} +{"current_steps": 3010, "total_steps": 5676, "loss": 0.6280980706214905, "lr": 9.831042559382193e-06, "epoch": 1.0606167400881057, "percentage": 53.03, "elapsed_time": "4:25:31", "remaining_time": "3:55:10"} +{"current_steps": 3011, "total_steps": 5676, "loss": 0.6059408783912659, "lr": 9.825217024197595e-06, "epoch": 1.0609691629955946, "percentage": 53.05, "elapsed_time": "4:25:36", "remaining_time": "3:55:04"} +{"current_steps": 3012, "total_steps": 5676, "loss": 0.6375449299812317, "lr": 9.819391548346377e-06, "epoch": 1.0613215859030838, "percentage": 53.07, "elapsed_time": "4:25:40", "remaining_time": "3:54:58"} +{"current_steps": 3013, "total_steps": 5676, "loss": 0.5959592461585999, "lr": 9.81356613380611e-06, "epoch": 1.0616740088105727, "percentage": 53.08, "elapsed_time": "4:25:44", "remaining_time": "3:54:52"} +{"current_steps": 3014, "total_steps": 5676, "loss": 0.7636409401893616, "lr": 9.807740782554337e-06, "epoch": 1.0620264317180617, "percentage": 53.1, "elapsed_time": "4:25:49", "remaining_time": "3:54:47"} +{"current_steps": 3015, "total_steps": 5676, "loss": 0.6136656999588013, "lr": 9.801915496568586e-06, "epoch": 1.0623788546255506, "percentage": 53.12, "elapsed_time": "4:25:54", "remaining_time": "3:54:41"} +{"current_steps": 3016, "total_steps": 5676, "loss": 0.4659839868545532, "lr": 9.796090277826361e-06, "epoch": 1.0627312775330395, "percentage": 53.14, "elapsed_time": "4:25:59", "remaining_time": "3:54:35"} +{"current_steps": 3017, "total_steps": 5676, "loss": 0.6053155660629272, "lr": 9.790265128305137e-06, "epoch": 1.0630837004405287, "percentage": 53.15, "elapsed_time": "4:26:04", "remaining_time": "3:54:30"} +{"current_steps": 3018, "total_steps": 5676, "loss": 0.6885203123092651, "lr": 9.78444004998238e-06, "epoch": 1.0634361233480176, "percentage": 53.17, "elapsed_time": "4:26:10", "remaining_time": "3:54:25"} +{"current_steps": 3019, "total_steps": 5676, "loss": 0.4985584616661072, "lr": 9.778615044835513e-06, "epoch": 1.0637885462555066, "percentage": 53.19, "elapsed_time": "4:26:16", "remaining_time": "3:54:20"} +{"current_steps": 3020, "total_steps": 5676, "loss": 0.5782307386398315, "lr": 9.772790114841948e-06, "epoch": 1.0641409691629955, "percentage": 53.21, "elapsed_time": "4:26:22", "remaining_time": "3:54:15"} +{"current_steps": 3021, "total_steps": 5676, "loss": 0.5819451212882996, "lr": 9.766965261979072e-06, "epoch": 1.0644933920704847, "percentage": 53.22, "elapsed_time": "4:26:27", "remaining_time": "3:54:10"} +{"current_steps": 3022, "total_steps": 5676, "loss": 0.7316779494285583, "lr": 9.761140488224232e-06, "epoch": 1.0648458149779736, "percentage": 53.24, "elapsed_time": "4:26:33", "remaining_time": "3:54:05"} +{"current_steps": 3023, "total_steps": 5676, "loss": 0.5986718535423279, "lr": 9.755315795554766e-06, "epoch": 1.0651982378854625, "percentage": 53.26, "elapsed_time": "4:26:38", "remaining_time": "3:54:00"} +{"current_steps": 3024, "total_steps": 5676, "loss": 0.5052427053451538, "lr": 9.749491185947977e-06, "epoch": 1.0655506607929515, "percentage": 53.28, "elapsed_time": "4:26:44", "remaining_time": "3:53:55"} +{"current_steps": 3025, "total_steps": 5676, "loss": 0.7370901107788086, "lr": 9.743666661381123e-06, "epoch": 1.0659030837004406, "percentage": 53.29, "elapsed_time": "4:26:49", "remaining_time": "3:53:49"} +{"current_steps": 3026, "total_steps": 5676, "loss": 0.6423007249832153, "lr": 9.73784222383147e-06, "epoch": 1.0662555066079296, "percentage": 53.31, "elapsed_time": "4:26:54", "remaining_time": "3:53:44"} +{"current_steps": 3027, "total_steps": 5676, "loss": 0.5679126977920532, "lr": 9.73201787527622e-06, "epoch": 1.0666079295154185, "percentage": 53.33, "elapsed_time": "4:26:59", "remaining_time": "3:53:38"} +{"current_steps": 3028, "total_steps": 5676, "loss": 0.5890183448791504, "lr": 9.72619361769256e-06, "epoch": 1.0669603524229074, "percentage": 53.35, "elapsed_time": "4:27:04", "remaining_time": "3:53:33"} +{"current_steps": 3029, "total_steps": 5676, "loss": 0.6772822141647339, "lr": 9.720369453057648e-06, "epoch": 1.0673127753303966, "percentage": 53.37, "elapsed_time": "4:27:09", "remaining_time": "3:53:27"} +{"current_steps": 3030, "total_steps": 5676, "loss": 0.8275488615036011, "lr": 9.714545383348602e-06, "epoch": 1.0676651982378855, "percentage": 53.38, "elapsed_time": "4:27:15", "remaining_time": "3:53:22"} +{"current_steps": 3031, "total_steps": 5676, "loss": 0.5369541645050049, "lr": 9.708721410542517e-06, "epoch": 1.0680176211453745, "percentage": 53.4, "elapsed_time": "4:27:21", "remaining_time": "3:53:18"} +{"current_steps": 3032, "total_steps": 5676, "loss": 0.7173746824264526, "lr": 9.70289753661645e-06, "epoch": 1.0683700440528634, "percentage": 53.42, "elapsed_time": "4:27:27", "remaining_time": "3:53:14"} +{"current_steps": 3033, "total_steps": 5676, "loss": 0.597034215927124, "lr": 9.697073763547415e-06, "epoch": 1.0687224669603523, "percentage": 53.44, "elapsed_time": "4:27:34", "remaining_time": "3:53:10"} +{"current_steps": 3034, "total_steps": 5676, "loss": 0.6680281162261963, "lr": 9.691250093312419e-06, "epoch": 1.0690748898678415, "percentage": 53.45, "elapsed_time": "4:27:39", "remaining_time": "3:53:04"} +{"current_steps": 3035, "total_steps": 5676, "loss": 0.6155321002006531, "lr": 9.6854265278884e-06, "epoch": 1.0694273127753304, "percentage": 53.47, "elapsed_time": "4:27:44", "remaining_time": "3:52:58"} +{"current_steps": 3036, "total_steps": 5676, "loss": 0.5945199728012085, "lr": 9.67960306925229e-06, "epoch": 1.0697797356828194, "percentage": 53.49, "elapsed_time": "4:27:49", "remaining_time": "3:52:53"} +{"current_steps": 3037, "total_steps": 5676, "loss": 0.6492328643798828, "lr": 9.673779719380967e-06, "epoch": 1.0701321585903083, "percentage": 53.51, "elapsed_time": "4:27:55", "remaining_time": "3:52:48"} +{"current_steps": 3038, "total_steps": 5676, "loss": 0.6501325964927673, "lr": 9.667956480251273e-06, "epoch": 1.0704845814977975, "percentage": 53.52, "elapsed_time": "4:28:00", "remaining_time": "3:52:43"} +{"current_steps": 3039, "total_steps": 5676, "loss": 0.5956053733825684, "lr": 9.662133353840025e-06, "epoch": 1.0708370044052864, "percentage": 53.54, "elapsed_time": "4:28:05", "remaining_time": "3:52:37"} +{"current_steps": 3040, "total_steps": 5676, "loss": 0.5966510772705078, "lr": 9.656310342123988e-06, "epoch": 1.0711894273127753, "percentage": 53.56, "elapsed_time": "4:28:11", "remaining_time": "3:52:33"} +{"current_steps": 3041, "total_steps": 5676, "loss": 0.7096615433692932, "lr": 9.65048744707989e-06, "epoch": 1.0715418502202643, "percentage": 53.58, "elapsed_time": "4:28:16", "remaining_time": "3:52:27"} +{"current_steps": 3042, "total_steps": 5676, "loss": 0.6697839498519897, "lr": 9.644664670684429e-06, "epoch": 1.0718942731277532, "percentage": 53.59, "elapsed_time": "4:28:22", "remaining_time": "3:52:22"} +{"current_steps": 3043, "total_steps": 5676, "loss": 0.6288081407546997, "lr": 9.638842014914253e-06, "epoch": 1.0722466960352424, "percentage": 53.61, "elapsed_time": "4:28:29", "remaining_time": "3:52:19"} +{"current_steps": 3044, "total_steps": 5676, "loss": 0.5870436429977417, "lr": 9.633019481745973e-06, "epoch": 1.0725991189427313, "percentage": 53.63, "elapsed_time": "4:28:34", "remaining_time": "3:52:13"} +{"current_steps": 3045, "total_steps": 5676, "loss": 0.5540846586227417, "lr": 9.62719707315616e-06, "epoch": 1.0729515418502202, "percentage": 53.65, "elapsed_time": "4:28:40", "remaining_time": "3:52:09"} +{"current_steps": 3046, "total_steps": 5676, "loss": 0.6134544014930725, "lr": 9.621374791121335e-06, "epoch": 1.0733039647577092, "percentage": 53.66, "elapsed_time": "4:28:44", "remaining_time": "3:52:02"} +{"current_steps": 3047, "total_steps": 5676, "loss": 0.5537046194076538, "lr": 9.61555263761799e-06, "epoch": 1.0736563876651983, "percentage": 53.68, "elapsed_time": "4:28:48", "remaining_time": "3:51:55"} +{"current_steps": 3048, "total_steps": 5676, "loss": 0.6423748731613159, "lr": 9.60973061462256e-06, "epoch": 1.0740088105726873, "percentage": 53.7, "elapsed_time": "4:28:55", "remaining_time": "3:51:51"} +{"current_steps": 3049, "total_steps": 5676, "loss": 0.575737714767456, "lr": 9.603908724111438e-06, "epoch": 1.0743612334801762, "percentage": 53.72, "elapsed_time": "4:28:59", "remaining_time": "3:51:46"} +{"current_steps": 3050, "total_steps": 5676, "loss": 0.5326197147369385, "lr": 9.598086968060976e-06, "epoch": 1.0747136563876651, "percentage": 53.74, "elapsed_time": "4:29:04", "remaining_time": "3:51:40"} +{"current_steps": 3051, "total_steps": 5676, "loss": 0.6533973217010498, "lr": 9.592265348447481e-06, "epoch": 1.075066079295154, "percentage": 53.75, "elapsed_time": "4:29:09", "remaining_time": "3:51:34"} +{"current_steps": 3052, "total_steps": 5676, "loss": 0.5536586046218872, "lr": 9.586443867247212e-06, "epoch": 1.0754185022026432, "percentage": 53.77, "elapsed_time": "4:29:14", "remaining_time": "3:51:29"} +{"current_steps": 3053, "total_steps": 5676, "loss": 0.6024892926216125, "lr": 9.580622526436382e-06, "epoch": 1.0757709251101322, "percentage": 53.79, "elapsed_time": "4:29:20", "remaining_time": "3:51:24"} +{"current_steps": 3054, "total_steps": 5676, "loss": 0.5070478320121765, "lr": 9.574801327991148e-06, "epoch": 1.076123348017621, "percentage": 53.81, "elapsed_time": "4:29:27", "remaining_time": "3:51:20"} +{"current_steps": 3055, "total_steps": 5676, "loss": 0.6518458127975464, "lr": 9.568980273887637e-06, "epoch": 1.07647577092511, "percentage": 53.82, "elapsed_time": "4:29:31", "remaining_time": "3:51:14"} +{"current_steps": 3056, "total_steps": 5676, "loss": 0.6120346784591675, "lr": 9.563159366101905e-06, "epoch": 1.0768281938325992, "percentage": 53.84, "elapsed_time": "4:29:36", "remaining_time": "3:51:08"} +{"current_steps": 3057, "total_steps": 5676, "loss": 0.6725353598594666, "lr": 9.557338606609973e-06, "epoch": 1.0771806167400881, "percentage": 53.86, "elapsed_time": "4:29:42", "remaining_time": "3:51:04"} +{"current_steps": 3058, "total_steps": 5676, "loss": 0.5311183333396912, "lr": 9.551517997387809e-06, "epoch": 1.077533039647577, "percentage": 53.88, "elapsed_time": "4:29:48", "remaining_time": "3:50:58"} +{"current_steps": 3059, "total_steps": 5676, "loss": 0.5728713274002075, "lr": 9.545697540411324e-06, "epoch": 1.077885462555066, "percentage": 53.89, "elapsed_time": "4:29:52", "remaining_time": "3:50:53"} +{"current_steps": 3060, "total_steps": 5676, "loss": 0.5527676343917847, "lr": 9.53987723765639e-06, "epoch": 1.0782378854625552, "percentage": 53.91, "elapsed_time": "4:29:57", "remaining_time": "3:50:47"} +{"current_steps": 3061, "total_steps": 5676, "loss": 0.7529809474945068, "lr": 9.534057091098813e-06, "epoch": 1.078590308370044, "percentage": 53.93, "elapsed_time": "4:30:03", "remaining_time": "3:50:42"} +{"current_steps": 3062, "total_steps": 5676, "loss": 0.5485205054283142, "lr": 9.528237102714352e-06, "epoch": 1.078942731277533, "percentage": 53.95, "elapsed_time": "4:30:07", "remaining_time": "3:50:35"} +{"current_steps": 3063, "total_steps": 5676, "loss": 0.785184383392334, "lr": 9.522417274478716e-06, "epoch": 1.079295154185022, "percentage": 53.96, "elapsed_time": "4:30:11", "remaining_time": "3:50:30"} +{"current_steps": 3064, "total_steps": 5676, "loss": 0.5645574331283569, "lr": 9.516597608367547e-06, "epoch": 1.079647577092511, "percentage": 53.98, "elapsed_time": "4:30:17", "remaining_time": "3:50:24"} +{"current_steps": 3065, "total_steps": 5676, "loss": 0.5878466367721558, "lr": 9.51077810635645e-06, "epoch": 1.08, "percentage": 54.0, "elapsed_time": "4:30:23", "remaining_time": "3:50:20"} +{"current_steps": 3066, "total_steps": 5676, "loss": 0.6610634922981262, "lr": 9.504958770420962e-06, "epoch": 1.080352422907489, "percentage": 54.02, "elapsed_time": "4:30:27", "remaining_time": "3:50:13"} +{"current_steps": 3067, "total_steps": 5676, "loss": 0.5928626656532288, "lr": 9.49913960253656e-06, "epoch": 1.080704845814978, "percentage": 54.03, "elapsed_time": "4:30:31", "remaining_time": "3:50:07"} +{"current_steps": 3068, "total_steps": 5676, "loss": 0.7038083672523499, "lr": 9.49332060467868e-06, "epoch": 1.0810572687224669, "percentage": 54.05, "elapsed_time": "4:30:37", "remaining_time": "3:50:03"} +{"current_steps": 3069, "total_steps": 5676, "loss": 0.5966217517852783, "lr": 9.487501778822685e-06, "epoch": 1.081409691629956, "percentage": 54.07, "elapsed_time": "4:30:42", "remaining_time": "3:49:57"} +{"current_steps": 3070, "total_steps": 5676, "loss": 0.6594187021255493, "lr": 9.481683126943884e-06, "epoch": 1.081762114537445, "percentage": 54.09, "elapsed_time": "4:30:47", "remaining_time": "3:49:51"} +{"current_steps": 3071, "total_steps": 5676, "loss": 0.450161874294281, "lr": 9.475864651017536e-06, "epoch": 1.082114537444934, "percentage": 54.11, "elapsed_time": "4:30:53", "remaining_time": "3:49:46"} +{"current_steps": 3072, "total_steps": 5676, "loss": 0.6459252834320068, "lr": 9.470046353018821e-06, "epoch": 1.0824669603524228, "percentage": 54.12, "elapsed_time": "4:30:57", "remaining_time": "3:49:40"} +{"current_steps": 3073, "total_steps": 5676, "loss": 0.6505793929100037, "lr": 9.464228234922882e-06, "epoch": 1.082819383259912, "percentage": 54.14, "elapsed_time": "4:31:03", "remaining_time": "3:49:35"} +{"current_steps": 3074, "total_steps": 5676, "loss": 0.6480910778045654, "lr": 9.458410298704781e-06, "epoch": 1.083171806167401, "percentage": 54.16, "elapsed_time": "4:31:07", "remaining_time": "3:49:30"} +{"current_steps": 3075, "total_steps": 5676, "loss": 0.6494983434677124, "lr": 9.452592546339527e-06, "epoch": 1.0835242290748899, "percentage": 54.18, "elapsed_time": "4:31:14", "remaining_time": "3:49:25"} +{"current_steps": 3076, "total_steps": 5676, "loss": 0.4710897207260132, "lr": 9.44677497980207e-06, "epoch": 1.0838766519823788, "percentage": 54.19, "elapsed_time": "4:31:18", "remaining_time": "3:49:19"} +{"current_steps": 3077, "total_steps": 5676, "loss": 0.599402904510498, "lr": 9.440957601067294e-06, "epoch": 1.0842290748898677, "percentage": 54.21, "elapsed_time": "4:31:24", "remaining_time": "3:49:14"} +{"current_steps": 3078, "total_steps": 5676, "loss": 0.665642499923706, "lr": 9.435140412110006e-06, "epoch": 1.084581497797357, "percentage": 54.23, "elapsed_time": "4:31:29", "remaining_time": "3:49:09"} +{"current_steps": 3079, "total_steps": 5676, "loss": 0.5861828923225403, "lr": 9.429323414904975e-06, "epoch": 1.0849339207048458, "percentage": 54.25, "elapsed_time": "4:31:34", "remaining_time": "3:49:03"} +{"current_steps": 3080, "total_steps": 5676, "loss": 0.6115351915359497, "lr": 9.42350661142688e-06, "epoch": 1.0852863436123348, "percentage": 54.26, "elapsed_time": "4:31:40", "remaining_time": "3:48:59"} +{"current_steps": 3081, "total_steps": 5676, "loss": 0.6627066135406494, "lr": 9.417690003650353e-06, "epoch": 1.0856387665198237, "percentage": 54.28, "elapsed_time": "4:31:44", "remaining_time": "3:48:52"} +{"current_steps": 3082, "total_steps": 5676, "loss": 0.6155676245689392, "lr": 9.411873593549947e-06, "epoch": 1.0859911894273129, "percentage": 54.3, "elapsed_time": "4:31:50", "remaining_time": "3:48:48"} +{"current_steps": 3083, "total_steps": 5676, "loss": 0.5429089069366455, "lr": 9.406057383100151e-06, "epoch": 1.0863436123348018, "percentage": 54.32, "elapsed_time": "4:31:55", "remaining_time": "3:48:42"} +{"current_steps": 3084, "total_steps": 5676, "loss": 0.5416614413261414, "lr": 9.400241374275391e-06, "epoch": 1.0866960352422907, "percentage": 54.33, "elapsed_time": "4:32:02", "remaining_time": "3:48:38"} +{"current_steps": 3085, "total_steps": 5676, "loss": 0.6708710193634033, "lr": 9.394425569050018e-06, "epoch": 1.0870484581497797, "percentage": 54.35, "elapsed_time": "4:32:07", "remaining_time": "3:48:32"} +{"current_steps": 3086, "total_steps": 5676, "loss": 0.588347315788269, "lr": 9.388609969398318e-06, "epoch": 1.0874008810572686, "percentage": 54.37, "elapsed_time": "4:32:11", "remaining_time": "3:48:26"} +{"current_steps": 3087, "total_steps": 5676, "loss": 0.4999222755432129, "lr": 9.38279457729451e-06, "epoch": 1.0877533039647578, "percentage": 54.39, "elapsed_time": "4:32:16", "remaining_time": "3:48:21"} +{"current_steps": 3088, "total_steps": 5676, "loss": 0.5400034189224243, "lr": 9.37697939471273e-06, "epoch": 1.0881057268722467, "percentage": 54.4, "elapsed_time": "4:32:23", "remaining_time": "3:48:17"} +{"current_steps": 3089, "total_steps": 5676, "loss": 0.5583670139312744, "lr": 9.37116442362706e-06, "epoch": 1.0884581497797357, "percentage": 54.42, "elapsed_time": "4:32:28", "remaining_time": "3:48:11"} +{"current_steps": 3090, "total_steps": 5676, "loss": 0.6863820552825928, "lr": 9.365349666011497e-06, "epoch": 1.0888105726872246, "percentage": 54.44, "elapsed_time": "4:32:33", "remaining_time": "3:48:06"} +{"current_steps": 3091, "total_steps": 5676, "loss": 0.6422115564346313, "lr": 9.35953512383997e-06, "epoch": 1.0891629955947137, "percentage": 54.46, "elapsed_time": "4:32:39", "remaining_time": "3:48:01"} +{"current_steps": 3092, "total_steps": 5676, "loss": 0.7106888294219971, "lr": 9.353720799086337e-06, "epoch": 1.0895154185022027, "percentage": 54.47, "elapsed_time": "4:32:44", "remaining_time": "3:47:55"} +{"current_steps": 3093, "total_steps": 5676, "loss": 0.6070472002029419, "lr": 9.347906693724379e-06, "epoch": 1.0898678414096916, "percentage": 54.49, "elapsed_time": "4:32:49", "remaining_time": "3:47:50"} +{"current_steps": 3094, "total_steps": 5676, "loss": 0.577377200126648, "lr": 9.342092809727807e-06, "epoch": 1.0902202643171806, "percentage": 54.51, "elapsed_time": "4:32:55", "remaining_time": "3:47:45"} +{"current_steps": 3095, "total_steps": 5676, "loss": 0.6249948740005493, "lr": 9.336279149070252e-06, "epoch": 1.0905726872246695, "percentage": 54.53, "elapsed_time": "4:33:00", "remaining_time": "3:47:40"} +{"current_steps": 3096, "total_steps": 5676, "loss": 0.5515183210372925, "lr": 9.330465713725265e-06, "epoch": 1.0909251101321586, "percentage": 54.55, "elapsed_time": "4:33:06", "remaining_time": "3:47:35"} +{"current_steps": 3097, "total_steps": 5676, "loss": 0.6074613332748413, "lr": 9.324652505666336e-06, "epoch": 1.0912775330396476, "percentage": 54.56, "elapsed_time": "4:33:11", "remaining_time": "3:47:29"} +{"current_steps": 3098, "total_steps": 5676, "loss": 0.6520178318023682, "lr": 9.318839526866863e-06, "epoch": 1.0916299559471365, "percentage": 54.58, "elapsed_time": "4:33:17", "remaining_time": "3:47:24"} +{"current_steps": 3099, "total_steps": 5676, "loss": 0.45863813161849976, "lr": 9.31302677930017e-06, "epoch": 1.0919823788546255, "percentage": 54.6, "elapsed_time": "4:33:23", "remaining_time": "3:47:20"} +{"current_steps": 3100, "total_steps": 5676, "loss": 0.610805869102478, "lr": 9.307214264939508e-06, "epoch": 1.0923348017621146, "percentage": 54.62, "elapsed_time": "4:33:29", "remaining_time": "3:47:15"} +{"current_steps": 3101, "total_steps": 5676, "loss": 0.5954282283782959, "lr": 9.30140198575804e-06, "epoch": 1.0926872246696036, "percentage": 54.63, "elapsed_time": "4:33:37", "remaining_time": "3:47:13"} +{"current_steps": 3102, "total_steps": 5676, "loss": 0.6941400170326233, "lr": 9.29558994372886e-06, "epoch": 1.0930396475770925, "percentage": 54.65, "elapsed_time": "4:33:42", "remaining_time": "3:47:07"} +{"current_steps": 3103, "total_steps": 5676, "loss": 0.6723533868789673, "lr": 9.289778140824974e-06, "epoch": 1.0933920704845814, "percentage": 54.67, "elapsed_time": "4:33:48", "remaining_time": "3:47:02"} +{"current_steps": 3104, "total_steps": 5676, "loss": 0.49137037992477417, "lr": 9.2839665790193e-06, "epoch": 1.0937444933920706, "percentage": 54.69, "elapsed_time": "4:33:54", "remaining_time": "3:46:57"} +{"current_steps": 3105, "total_steps": 5676, "loss": 0.5827045440673828, "lr": 9.278155260284692e-06, "epoch": 1.0940969162995595, "percentage": 54.7, "elapsed_time": "4:33:59", "remaining_time": "3:46:52"} +{"current_steps": 3106, "total_steps": 5676, "loss": 0.6391462683677673, "lr": 9.272344186593909e-06, "epoch": 1.0944493392070485, "percentage": 54.72, "elapsed_time": "4:34:05", "remaining_time": "3:46:47"} +{"current_steps": 3107, "total_steps": 5676, "loss": 0.4994915723800659, "lr": 9.266533359919628e-06, "epoch": 1.0948017621145374, "percentage": 54.74, "elapsed_time": "4:34:11", "remaining_time": "3:46:42"} +{"current_steps": 3108, "total_steps": 5676, "loss": 0.6480728387832642, "lr": 9.260722782234445e-06, "epoch": 1.0951541850220265, "percentage": 54.76, "elapsed_time": "4:34:15", "remaining_time": "3:46:36"} +{"current_steps": 3109, "total_steps": 5676, "loss": 0.5734057426452637, "lr": 9.25491245551087e-06, "epoch": 1.0955066079295155, "percentage": 54.77, "elapsed_time": "4:34:20", "remaining_time": "3:46:31"} +{"current_steps": 3110, "total_steps": 5676, "loss": 0.5650345087051392, "lr": 9.249102381721328e-06, "epoch": 1.0958590308370044, "percentage": 54.79, "elapsed_time": "4:34:26", "remaining_time": "3:46:25"} +{"current_steps": 3111, "total_steps": 5676, "loss": 0.6261975765228271, "lr": 9.243292562838164e-06, "epoch": 1.0962114537444934, "percentage": 54.81, "elapsed_time": "4:34:32", "remaining_time": "3:46:21"} +{"current_steps": 3112, "total_steps": 5676, "loss": 0.730735182762146, "lr": 9.237483000833619e-06, "epoch": 1.0965638766519823, "percentage": 54.83, "elapsed_time": "4:34:38", "remaining_time": "3:46:16"} +{"current_steps": 3113, "total_steps": 5676, "loss": 0.6198948621749878, "lr": 9.231673697679867e-06, "epoch": 1.0969162995594715, "percentage": 54.84, "elapsed_time": "4:34:43", "remaining_time": "3:46:11"} +{"current_steps": 3114, "total_steps": 5676, "loss": 0.5302865505218506, "lr": 9.225864655348982e-06, "epoch": 1.0972687224669604, "percentage": 54.86, "elapsed_time": "4:34:48", "remaining_time": "3:46:05"} +{"current_steps": 3115, "total_steps": 5676, "loss": 0.5995128154754639, "lr": 9.220055875812955e-06, "epoch": 1.0976211453744493, "percentage": 54.88, "elapsed_time": "4:34:54", "remaining_time": "3:46:01"} +{"current_steps": 3116, "total_steps": 5676, "loss": 0.3801479935646057, "lr": 9.214247361043687e-06, "epoch": 1.0979735682819383, "percentage": 54.9, "elapsed_time": "4:35:00", "remaining_time": "3:45:55"} +{"current_steps": 3117, "total_steps": 5676, "loss": 0.5617209076881409, "lr": 9.208439113012984e-06, "epoch": 1.0983259911894274, "percentage": 54.92, "elapsed_time": "4:35:05", "remaining_time": "3:45:50"} +{"current_steps": 3118, "total_steps": 5676, "loss": 0.5233842134475708, "lr": 9.202631133692572e-06, "epoch": 1.0986784140969164, "percentage": 54.93, "elapsed_time": "4:35:11", "remaining_time": "3:45:45"} +{"current_steps": 3119, "total_steps": 5676, "loss": 0.5300124883651733, "lr": 9.196823425054073e-06, "epoch": 1.0990308370044053, "percentage": 54.95, "elapsed_time": "4:35:17", "remaining_time": "3:45:41"} +{"current_steps": 3120, "total_steps": 5676, "loss": 0.686185359954834, "lr": 9.191015989069024e-06, "epoch": 1.0993832599118942, "percentage": 54.97, "elapsed_time": "4:35:22", "remaining_time": "3:45:36"} +{"current_steps": 3121, "total_steps": 5676, "loss": 0.6043056845664978, "lr": 9.18520882770887e-06, "epoch": 1.0997356828193832, "percentage": 54.99, "elapsed_time": "4:35:28", "remaining_time": "3:45:31"} +{"current_steps": 3122, "total_steps": 5676, "loss": 0.6299905776977539, "lr": 9.179401942944961e-06, "epoch": 1.1000881057268723, "percentage": 55.0, "elapsed_time": "4:35:32", "remaining_time": "3:45:24"} +{"current_steps": 3123, "total_steps": 5676, "loss": 0.57872474193573, "lr": 9.173595336748557e-06, "epoch": 1.1004405286343613, "percentage": 55.02, "elapsed_time": "4:35:37", "remaining_time": "3:45:19"} +{"current_steps": 3124, "total_steps": 5676, "loss": 0.5638746023178101, "lr": 9.167789011090818e-06, "epoch": 1.1007929515418502, "percentage": 55.04, "elapsed_time": "4:35:43", "remaining_time": "3:45:14"} +{"current_steps": 3125, "total_steps": 5676, "loss": 0.6150490045547485, "lr": 9.161982967942806e-06, "epoch": 1.1011453744493391, "percentage": 55.06, "elapsed_time": "4:35:48", "remaining_time": "3:45:09"} +{"current_steps": 3126, "total_steps": 5676, "loss": 0.547231912612915, "lr": 9.156177209275503e-06, "epoch": 1.1014977973568283, "percentage": 55.07, "elapsed_time": "4:35:54", "remaining_time": "3:45:04"} +{"current_steps": 3127, "total_steps": 5676, "loss": 0.6999325752258301, "lr": 9.150371737059773e-06, "epoch": 1.1018502202643172, "percentage": 55.09, "elapsed_time": "4:35:58", "remaining_time": "3:44:58"} +{"current_steps": 3128, "total_steps": 5676, "loss": 0.7175568342208862, "lr": 9.144566553266396e-06, "epoch": 1.1022026431718062, "percentage": 55.11, "elapsed_time": "4:36:03", "remaining_time": "3:44:52"} +{"current_steps": 3129, "total_steps": 5676, "loss": 0.7308273911476135, "lr": 9.138761659866054e-06, "epoch": 1.102555066079295, "percentage": 55.13, "elapsed_time": "4:36:08", "remaining_time": "3:44:47"} +{"current_steps": 3130, "total_steps": 5676, "loss": 0.5951930284500122, "lr": 9.132957058829323e-06, "epoch": 1.102907488986784, "percentage": 55.14, "elapsed_time": "4:36:14", "remaining_time": "3:44:41"} +{"current_steps": 3131, "total_steps": 5676, "loss": 0.5684988498687744, "lr": 9.127152752126688e-06, "epoch": 1.1032599118942732, "percentage": 55.16, "elapsed_time": "4:36:19", "remaining_time": "3:44:36"} +{"current_steps": 3132, "total_steps": 5676, "loss": 0.6490764617919922, "lr": 9.121348741728532e-06, "epoch": 1.1036123348017621, "percentage": 55.18, "elapsed_time": "4:36:25", "remaining_time": "3:44:31"} +{"current_steps": 3133, "total_steps": 5676, "loss": 0.7795257568359375, "lr": 9.115545029605129e-06, "epoch": 1.103964757709251, "percentage": 55.2, "elapsed_time": "4:36:30", "remaining_time": "3:44:26"} +{"current_steps": 3134, "total_steps": 5676, "loss": 0.5443774461746216, "lr": 9.10974161772667e-06, "epoch": 1.10431718061674, "percentage": 55.21, "elapsed_time": "4:36:34", "remaining_time": "3:44:20"} +{"current_steps": 3135, "total_steps": 5676, "loss": 0.48989373445510864, "lr": 9.103938508063223e-06, "epoch": 1.1046696035242292, "percentage": 55.23, "elapsed_time": "4:36:39", "remaining_time": "3:44:14"} +{"current_steps": 3136, "total_steps": 5676, "loss": 0.5628808736801147, "lr": 9.098135702584762e-06, "epoch": 1.105022026431718, "percentage": 55.25, "elapsed_time": "4:36:44", "remaining_time": "3:44:08"} +{"current_steps": 3137, "total_steps": 5676, "loss": 0.6549321413040161, "lr": 9.092333203261168e-06, "epoch": 1.105374449339207, "percentage": 55.27, "elapsed_time": "4:36:49", "remaining_time": "3:44:03"} +{"current_steps": 3138, "total_steps": 5676, "loss": 0.7353606224060059, "lr": 9.0865310120622e-06, "epoch": 1.105726872246696, "percentage": 55.29, "elapsed_time": "4:36:55", "remaining_time": "3:43:58"} +{"current_steps": 3139, "total_steps": 5676, "loss": 0.650668203830719, "lr": 9.080729130957528e-06, "epoch": 1.106079295154185, "percentage": 55.3, "elapsed_time": "4:37:02", "remaining_time": "3:43:54"} +{"current_steps": 3140, "total_steps": 5676, "loss": 0.5618860721588135, "lr": 9.07492756191671e-06, "epoch": 1.106431718061674, "percentage": 55.32, "elapsed_time": "4:37:09", "remaining_time": "3:43:50"} +{"current_steps": 3141, "total_steps": 5676, "loss": 0.5532773733139038, "lr": 9.069126306909187e-06, "epoch": 1.106784140969163, "percentage": 55.34, "elapsed_time": "4:37:14", "remaining_time": "3:43:45"} +{"current_steps": 3142, "total_steps": 5676, "loss": 0.6240289211273193, "lr": 9.06332536790432e-06, "epoch": 1.107136563876652, "percentage": 55.36, "elapsed_time": "4:37:19", "remaining_time": "3:43:39"} +{"current_steps": 3143, "total_steps": 5676, "loss": 0.5952814221382141, "lr": 9.057524746871335e-06, "epoch": 1.1074889867841409, "percentage": 55.37, "elapsed_time": "4:37:25", "remaining_time": "3:43:34"} +{"current_steps": 3144, "total_steps": 5676, "loss": 0.6011646389961243, "lr": 9.051724445779373e-06, "epoch": 1.10784140969163, "percentage": 55.39, "elapsed_time": "4:37:29", "remaining_time": "3:43:28"} +{"current_steps": 3145, "total_steps": 5676, "loss": 0.6964641213417053, "lr": 9.045924466597448e-06, "epoch": 1.108193832599119, "percentage": 55.41, "elapsed_time": "4:37:33", "remaining_time": "3:43:22"} +{"current_steps": 3146, "total_steps": 5676, "loss": 0.6821622848510742, "lr": 9.040124811294473e-06, "epoch": 1.108546255506608, "percentage": 55.43, "elapsed_time": "4:37:38", "remaining_time": "3:43:16"} +{"current_steps": 3147, "total_steps": 5676, "loss": 0.5045080184936523, "lr": 9.034325481839253e-06, "epoch": 1.1088986784140968, "percentage": 55.44, "elapsed_time": "4:37:44", "remaining_time": "3:43:11"} +{"current_steps": 3148, "total_steps": 5676, "loss": 0.5709735155105591, "lr": 9.028526480200482e-06, "epoch": 1.109251101321586, "percentage": 55.46, "elapsed_time": "4:37:47", "remaining_time": "3:43:05"} +{"current_steps": 3149, "total_steps": 5676, "loss": 0.5882325172424316, "lr": 9.022727808346731e-06, "epoch": 1.109603524229075, "percentage": 55.48, "elapsed_time": "4:37:54", "remaining_time": "3:43:00"} +{"current_steps": 3150, "total_steps": 5676, "loss": 0.627426266670227, "lr": 9.016929468246482e-06, "epoch": 1.1099559471365639, "percentage": 55.5, "elapsed_time": "4:38:00", "remaining_time": "3:42:55"} +{"current_steps": 3151, "total_steps": 5676, "loss": 0.42419761419296265, "lr": 9.011131461868078e-06, "epoch": 1.1103083700440528, "percentage": 55.51, "elapsed_time": "4:38:06", "remaining_time": "3:42:51"} +{"current_steps": 3152, "total_steps": 5676, "loss": 0.5261023044586182, "lr": 9.005333791179775e-06, "epoch": 1.110660792951542, "percentage": 55.53, "elapsed_time": "4:38:12", "remaining_time": "3:42:46"} +{"current_steps": 3153, "total_steps": 5676, "loss": 0.6654448509216309, "lr": 8.999536458149692e-06, "epoch": 1.111013215859031, "percentage": 55.55, "elapsed_time": "4:38:17", "remaining_time": "3:42:41"} +{"current_steps": 3154, "total_steps": 5676, "loss": 0.5939514636993408, "lr": 8.993739464745843e-06, "epoch": 1.1113656387665198, "percentage": 55.57, "elapsed_time": "4:38:23", "remaining_time": "3:42:36"} +{"current_steps": 3155, "total_steps": 5676, "loss": 0.6381959319114685, "lr": 8.987942812936133e-06, "epoch": 1.1117180616740088, "percentage": 55.58, "elapsed_time": "4:38:28", "remaining_time": "3:42:30"} +{"current_steps": 3156, "total_steps": 5676, "loss": 0.5474847555160522, "lr": 8.982146504688343e-06, "epoch": 1.1120704845814977, "percentage": 55.6, "elapsed_time": "4:38:33", "remaining_time": "3:42:25"} +{"current_steps": 3157, "total_steps": 5676, "loss": 0.6306884288787842, "lr": 8.97635054197013e-06, "epoch": 1.1124229074889869, "percentage": 55.62, "elapsed_time": "4:38:37", "remaining_time": "3:42:19"} +{"current_steps": 3158, "total_steps": 5676, "loss": 0.5988807678222656, "lr": 8.97055492674906e-06, "epoch": 1.1127753303964758, "percentage": 55.64, "elapsed_time": "4:38:42", "remaining_time": "3:42:13"} +{"current_steps": 3159, "total_steps": 5676, "loss": 0.6316757202148438, "lr": 8.964759660992547e-06, "epoch": 1.1131277533039647, "percentage": 55.66, "elapsed_time": "4:38:48", "remaining_time": "3:42:09"} +{"current_steps": 3160, "total_steps": 5676, "loss": 0.6031370162963867, "lr": 8.958964746667917e-06, "epoch": 1.1134801762114537, "percentage": 55.67, "elapsed_time": "4:38:54", "remaining_time": "3:42:03"} +{"current_steps": 3161, "total_steps": 5676, "loss": 0.6334977149963379, "lr": 8.953170185742357e-06, "epoch": 1.1138325991189428, "percentage": 55.69, "elapsed_time": "4:39:00", "remaining_time": "3:41:59"} +{"current_steps": 3162, "total_steps": 5676, "loss": 0.49237731099128723, "lr": 8.947375980182937e-06, "epoch": 1.1141850220264318, "percentage": 55.71, "elapsed_time": "4:39:05", "remaining_time": "3:41:53"} +{"current_steps": 3163, "total_steps": 5676, "loss": 0.7349523305892944, "lr": 8.941582131956615e-06, "epoch": 1.1145374449339207, "percentage": 55.73, "elapsed_time": "4:39:11", "remaining_time": "3:41:48"} +{"current_steps": 3164, "total_steps": 5676, "loss": 0.5048422813415527, "lr": 8.935788643030218e-06, "epoch": 1.1148898678414096, "percentage": 55.74, "elapsed_time": "4:39:16", "remaining_time": "3:41:43"} +{"current_steps": 3165, "total_steps": 5676, "loss": 0.6217244267463684, "lr": 8.92999551537046e-06, "epoch": 1.1152422907488986, "percentage": 55.76, "elapsed_time": "4:39:22", "remaining_time": "3:41:38"} +{"current_steps": 3166, "total_steps": 5676, "loss": 0.4949147701263428, "lr": 8.924202750943926e-06, "epoch": 1.1155947136563877, "percentage": 55.78, "elapsed_time": "4:39:28", "remaining_time": "3:41:34"} +{"current_steps": 3167, "total_steps": 5676, "loss": 0.5975630283355713, "lr": 8.918410351717074e-06, "epoch": 1.1159471365638767, "percentage": 55.8, "elapsed_time": "4:39:34", "remaining_time": "3:41:29"} +{"current_steps": 3168, "total_steps": 5676, "loss": 0.7546026110649109, "lr": 8.91261831965625e-06, "epoch": 1.1162995594713656, "percentage": 55.81, "elapsed_time": "4:39:40", "remaining_time": "3:41:24"} +{"current_steps": 3169, "total_steps": 5676, "loss": 0.6238037347793579, "lr": 8.906826656727665e-06, "epoch": 1.1166519823788545, "percentage": 55.83, "elapsed_time": "4:39:44", "remaining_time": "3:41:18"} +{"current_steps": 3170, "total_steps": 5676, "loss": 0.617587685585022, "lr": 8.901035364897407e-06, "epoch": 1.1170044052863437, "percentage": 55.85, "elapsed_time": "4:39:50", "remaining_time": "3:41:13"} +{"current_steps": 3171, "total_steps": 5676, "loss": 0.4834432005882263, "lr": 8.895244446131445e-06, "epoch": 1.1173568281938326, "percentage": 55.87, "elapsed_time": "4:39:56", "remaining_time": "3:41:08"} +{"current_steps": 3172, "total_steps": 5676, "loss": 0.614972710609436, "lr": 8.889453902395608e-06, "epoch": 1.1177092511013216, "percentage": 55.88, "elapsed_time": "4:40:02", "remaining_time": "3:41:03"} +{"current_steps": 3173, "total_steps": 5676, "loss": 0.6468379497528076, "lr": 8.883663735655612e-06, "epoch": 1.1180616740088105, "percentage": 55.9, "elapsed_time": "4:40:07", "remaining_time": "3:40:58"} +{"current_steps": 3174, "total_steps": 5676, "loss": 0.6372466683387756, "lr": 8.877873947877042e-06, "epoch": 1.1184140969162994, "percentage": 55.92, "elapsed_time": "4:40:11", "remaining_time": "3:40:52"} +{"current_steps": 3175, "total_steps": 5676, "loss": 0.6295863389968872, "lr": 8.872084541025336e-06, "epoch": 1.1187665198237886, "percentage": 55.94, "elapsed_time": "4:40:15", "remaining_time": "3:40:46"} +{"current_steps": 3176, "total_steps": 5676, "loss": 0.6109524369239807, "lr": 8.866295517065831e-06, "epoch": 1.1191189427312775, "percentage": 55.95, "elapsed_time": "4:40:20", "remaining_time": "3:40:40"} +{"current_steps": 3177, "total_steps": 5676, "loss": 0.6724812388420105, "lr": 8.860506877963715e-06, "epoch": 1.1194713656387665, "percentage": 55.97, "elapsed_time": "4:40:25", "remaining_time": "3:40:34"} +{"current_steps": 3178, "total_steps": 5676, "loss": 0.6612162590026855, "lr": 8.854718625684049e-06, "epoch": 1.1198237885462554, "percentage": 55.99, "elapsed_time": "4:40:32", "remaining_time": "3:40:30"} +{"current_steps": 3179, "total_steps": 5676, "loss": 0.6209636926651001, "lr": 8.84893076219177e-06, "epoch": 1.1201762114537446, "percentage": 56.01, "elapsed_time": "4:40:36", "remaining_time": "3:40:24"} +{"current_steps": 3180, "total_steps": 5676, "loss": 0.8548281192779541, "lr": 8.843143289451673e-06, "epoch": 1.1205286343612335, "percentage": 56.03, "elapsed_time": "4:40:41", "remaining_time": "3:40:19"} +{"current_steps": 3181, "total_steps": 5676, "loss": 0.4621508717536926, "lr": 8.837356209428428e-06, "epoch": 1.1208810572687224, "percentage": 56.04, "elapsed_time": "4:40:45", "remaining_time": "3:40:12"} +{"current_steps": 3182, "total_steps": 5676, "loss": 0.5065817832946777, "lr": 8.831569524086568e-06, "epoch": 1.1212334801762114, "percentage": 56.06, "elapsed_time": "4:40:50", "remaining_time": "3:40:07"} +{"current_steps": 3183, "total_steps": 5676, "loss": 0.5467691421508789, "lr": 8.825783235390488e-06, "epoch": 1.1215859030837005, "percentage": 56.08, "elapsed_time": "4:40:55", "remaining_time": "3:40:01"} +{"current_steps": 3184, "total_steps": 5676, "loss": 0.4938517212867737, "lr": 8.81999734530446e-06, "epoch": 1.1219383259911895, "percentage": 56.1, "elapsed_time": "4:41:01", "remaining_time": "3:39:57"} +{"current_steps": 3185, "total_steps": 5676, "loss": 0.6125702857971191, "lr": 8.814211855792609e-06, "epoch": 1.1222907488986784, "percentage": 56.11, "elapsed_time": "4:41:07", "remaining_time": "3:39:52"} +{"current_steps": 3186, "total_steps": 5676, "loss": 0.5272841453552246, "lr": 8.80842676881893e-06, "epoch": 1.1226431718061674, "percentage": 56.13, "elapsed_time": "4:41:13", "remaining_time": "3:39:47"} +{"current_steps": 3187, "total_steps": 5676, "loss": 0.5595715045928955, "lr": 8.802642086347278e-06, "epoch": 1.1229955947136563, "percentage": 56.15, "elapsed_time": "4:41:19", "remaining_time": "3:39:42"} +{"current_steps": 3188, "total_steps": 5676, "loss": 0.7178677916526794, "lr": 8.796857810341375e-06, "epoch": 1.1233480176211454, "percentage": 56.17, "elapsed_time": "4:41:24", "remaining_time": "3:39:36"} +{"current_steps": 3189, "total_steps": 5676, "loss": 0.6000991463661194, "lr": 8.791073942764806e-06, "epoch": 1.1237004405286344, "percentage": 56.18, "elapsed_time": "4:41:29", "remaining_time": "3:39:31"} +{"current_steps": 3190, "total_steps": 5676, "loss": 0.537361741065979, "lr": 8.785290485581008e-06, "epoch": 1.1240528634361233, "percentage": 56.2, "elapsed_time": "4:41:33", "remaining_time": "3:39:25"} +{"current_steps": 3191, "total_steps": 5676, "loss": 0.7135556936264038, "lr": 8.779507440753286e-06, "epoch": 1.1244052863436123, "percentage": 56.22, "elapsed_time": "4:41:39", "remaining_time": "3:39:20"} +{"current_steps": 3192, "total_steps": 5676, "loss": 0.501063346862793, "lr": 8.773724810244805e-06, "epoch": 1.1247577092511014, "percentage": 56.24, "elapsed_time": "4:41:43", "remaining_time": "3:39:14"} +{"current_steps": 3193, "total_steps": 5676, "loss": 0.6885302662849426, "lr": 8.767942596018587e-06, "epoch": 1.1251101321585903, "percentage": 56.25, "elapsed_time": "4:41:48", "remaining_time": "3:39:08"} +{"current_steps": 3194, "total_steps": 5676, "loss": 0.5902360081672668, "lr": 8.762160800037516e-06, "epoch": 1.1254625550660793, "percentage": 56.27, "elapsed_time": "4:41:55", "remaining_time": "3:39:04"} +{"current_steps": 3195, "total_steps": 5676, "loss": 0.6308953762054443, "lr": 8.75637942426433e-06, "epoch": 1.1258149779735682, "percentage": 56.29, "elapsed_time": "4:42:00", "remaining_time": "3:38:59"} +{"current_steps": 3196, "total_steps": 5676, "loss": 0.5710124969482422, "lr": 8.750598470661625e-06, "epoch": 1.1261674008810574, "percentage": 56.31, "elapsed_time": "4:42:06", "remaining_time": "3:38:54"} +{"current_steps": 3197, "total_steps": 5676, "loss": 0.6110632419586182, "lr": 8.744817941191862e-06, "epoch": 1.1265198237885463, "percentage": 56.32, "elapsed_time": "4:42:10", "remaining_time": "3:38:48"} +{"current_steps": 3198, "total_steps": 5676, "loss": 0.5274624824523926, "lr": 8.73903783781734e-06, "epoch": 1.1268722466960353, "percentage": 56.34, "elapsed_time": "4:42:15", "remaining_time": "3:38:42"} +{"current_steps": 3199, "total_steps": 5676, "loss": 0.6144713163375854, "lr": 8.733258162500228e-06, "epoch": 1.1272246696035242, "percentage": 56.36, "elapsed_time": "4:42:20", "remaining_time": "3:38:36"} +{"current_steps": 3200, "total_steps": 5676, "loss": 0.6404621005058289, "lr": 8.727478917202551e-06, "epoch": 1.1275770925110131, "percentage": 56.38, "elapsed_time": "4:42:24", "remaining_time": "3:38:30"} +{"current_steps": 3201, "total_steps": 5676, "loss": 0.5693025588989258, "lr": 8.721700103886177e-06, "epoch": 1.1279295154185023, "percentage": 56.4, "elapsed_time": "4:42:34", "remaining_time": "3:38:29"} +{"current_steps": 3202, "total_steps": 5676, "loss": 0.5631159543991089, "lr": 8.715921724512838e-06, "epoch": 1.1282819383259912, "percentage": 56.41, "elapsed_time": "4:42:40", "remaining_time": "3:38:24"} +{"current_steps": 3203, "total_steps": 5676, "loss": 0.648078441619873, "lr": 8.710143781044113e-06, "epoch": 1.1286343612334802, "percentage": 56.43, "elapsed_time": "4:42:46", "remaining_time": "3:38:19"} +{"current_steps": 3204, "total_steps": 5676, "loss": 0.6858379244804382, "lr": 8.704366275441426e-06, "epoch": 1.128986784140969, "percentage": 56.45, "elapsed_time": "4:42:52", "remaining_time": "3:38:14"} +{"current_steps": 3205, "total_steps": 5676, "loss": 0.7244000434875488, "lr": 8.698589209666074e-06, "epoch": 1.1293392070484582, "percentage": 56.47, "elapsed_time": "4:42:55", "remaining_time": "3:38:08"} +{"current_steps": 3206, "total_steps": 5676, "loss": 0.5918365716934204, "lr": 8.692812585679182e-06, "epoch": 1.1296916299559472, "percentage": 56.48, "elapsed_time": "4:43:00", "remaining_time": "3:38:02"} +{"current_steps": 3207, "total_steps": 5676, "loss": 0.6893443465232849, "lr": 8.687036405441733e-06, "epoch": 1.1300440528634361, "percentage": 56.5, "elapsed_time": "4:43:05", "remaining_time": "3:37:56"} +{"current_steps": 3208, "total_steps": 5676, "loss": 0.729834794998169, "lr": 8.681260670914564e-06, "epoch": 1.130396475770925, "percentage": 56.52, "elapsed_time": "4:43:10", "remaining_time": "3:37:51"} +{"current_steps": 3209, "total_steps": 5676, "loss": 0.6525821685791016, "lr": 8.675485384058356e-06, "epoch": 1.130748898678414, "percentage": 56.54, "elapsed_time": "4:43:14", "remaining_time": "3:37:45"} +{"current_steps": 3210, "total_steps": 5676, "loss": 0.6799874305725098, "lr": 8.669710546833642e-06, "epoch": 1.1311013215859032, "percentage": 56.55, "elapsed_time": "4:43:18", "remaining_time": "3:37:39"} +{"current_steps": 3211, "total_steps": 5676, "loss": 0.5614932775497437, "lr": 8.6639361612008e-06, "epoch": 1.131453744493392, "percentage": 56.57, "elapsed_time": "4:43:24", "remaining_time": "3:37:34"} +{"current_steps": 3212, "total_steps": 5676, "loss": 0.5975101590156555, "lr": 8.658162229120045e-06, "epoch": 1.131806167400881, "percentage": 56.59, "elapsed_time": "4:43:31", "remaining_time": "3:37:29"} +{"current_steps": 3213, "total_steps": 5676, "loss": 0.5367887020111084, "lr": 8.652388752551458e-06, "epoch": 1.13215859030837, "percentage": 56.61, "elapsed_time": "4:43:36", "remaining_time": "3:37:24"} +{"current_steps": 3214, "total_steps": 5676, "loss": 0.4451865553855896, "lr": 8.646615733454949e-06, "epoch": 1.1325110132158591, "percentage": 56.62, "elapsed_time": "4:43:43", "remaining_time": "3:37:20"} +{"current_steps": 3215, "total_steps": 5676, "loss": 0.6482576131820679, "lr": 8.64084317379028e-06, "epoch": 1.132863436123348, "percentage": 56.64, "elapsed_time": "4:43:48", "remaining_time": "3:37:14"} +{"current_steps": 3216, "total_steps": 5676, "loss": 0.5890318155288696, "lr": 8.635071075517053e-06, "epoch": 1.133215859030837, "percentage": 56.66, "elapsed_time": "4:43:53", "remaining_time": "3:37:09"} +{"current_steps": 3217, "total_steps": 5676, "loss": 0.554576575756073, "lr": 8.629299440594719e-06, "epoch": 1.133568281938326, "percentage": 56.68, "elapsed_time": "4:43:58", "remaining_time": "3:37:03"} +{"current_steps": 3218, "total_steps": 5676, "loss": 0.5987116694450378, "lr": 8.623528270982567e-06, "epoch": 1.1339207048458149, "percentage": 56.69, "elapsed_time": "4:44:03", "remaining_time": "3:36:58"} +{"current_steps": 3219, "total_steps": 5676, "loss": 0.49857625365257263, "lr": 8.617757568639731e-06, "epoch": 1.134273127753304, "percentage": 56.71, "elapsed_time": "4:44:09", "remaining_time": "3:36:53"} +{"current_steps": 3220, "total_steps": 5676, "loss": 0.6116641759872437, "lr": 8.61198733552518e-06, "epoch": 1.134625550660793, "percentage": 56.73, "elapsed_time": "4:44:14", "remaining_time": "3:36:48"} +{"current_steps": 3221, "total_steps": 5676, "loss": 0.4346674978733063, "lr": 8.606217573597738e-06, "epoch": 1.134977973568282, "percentage": 56.75, "elapsed_time": "4:44:20", "remaining_time": "3:36:43"} +{"current_steps": 3222, "total_steps": 5676, "loss": 0.6973283290863037, "lr": 8.600448284816046e-06, "epoch": 1.1353303964757708, "percentage": 56.77, "elapsed_time": "4:44:26", "remaining_time": "3:36:38"} +{"current_steps": 3223, "total_steps": 5676, "loss": 0.5457896590232849, "lr": 8.594679471138613e-06, "epoch": 1.13568281938326, "percentage": 56.78, "elapsed_time": "4:44:30", "remaining_time": "3:36:32"} +{"current_steps": 3224, "total_steps": 5676, "loss": 0.4520479440689087, "lr": 8.58891113452376e-06, "epoch": 1.136035242290749, "percentage": 56.8, "elapsed_time": "4:44:35", "remaining_time": "3:36:27"} +{"current_steps": 3225, "total_steps": 5676, "loss": 0.6169587969779968, "lr": 8.58314327692966e-06, "epoch": 1.1363876651982379, "percentage": 56.82, "elapsed_time": "4:44:41", "remaining_time": "3:36:22"} +{"current_steps": 3226, "total_steps": 5676, "loss": 0.6398670673370361, "lr": 8.577375900314327e-06, "epoch": 1.1367400881057268, "percentage": 56.84, "elapsed_time": "4:44:46", "remaining_time": "3:36:16"} +{"current_steps": 3227, "total_steps": 5676, "loss": 0.5772207975387573, "lr": 8.571609006635604e-06, "epoch": 1.1370925110132157, "percentage": 56.85, "elapsed_time": "4:44:51", "remaining_time": "3:36:10"} +{"current_steps": 3228, "total_steps": 5676, "loss": 0.5561503171920776, "lr": 8.565842597851165e-06, "epoch": 1.137444933920705, "percentage": 56.87, "elapsed_time": "4:44:57", "remaining_time": "3:36:05"} +{"current_steps": 3229, "total_steps": 5676, "loss": 0.4702373743057251, "lr": 8.560076675918537e-06, "epoch": 1.1377973568281938, "percentage": 56.89, "elapsed_time": "4:45:01", "remaining_time": "3:36:00"} +{"current_steps": 3230, "total_steps": 5676, "loss": 0.5967564582824707, "lr": 8.554311242795061e-06, "epoch": 1.1381497797356828, "percentage": 56.91, "elapsed_time": "4:45:05", "remaining_time": "3:35:53"} +{"current_steps": 3231, "total_steps": 5676, "loss": 0.4749453663825989, "lr": 8.548546300437928e-06, "epoch": 1.138502202643172, "percentage": 56.92, "elapsed_time": "4:45:12", "remaining_time": "3:35:49"} +{"current_steps": 3232, "total_steps": 5676, "loss": 0.6939869523048401, "lr": 8.542781850804155e-06, "epoch": 1.1388546255506609, "percentage": 56.94, "elapsed_time": "4:45:18", "remaining_time": "3:35:44"} +{"current_steps": 3233, "total_steps": 5676, "loss": 0.5618892908096313, "lr": 8.537017895850593e-06, "epoch": 1.1392070484581498, "percentage": 56.96, "elapsed_time": "4:45:22", "remaining_time": "3:35:38"} +{"current_steps": 3234, "total_steps": 5676, "loss": 0.6627654433250427, "lr": 8.531254437533925e-06, "epoch": 1.1395594713656387, "percentage": 56.98, "elapsed_time": "4:45:28", "remaining_time": "3:35:33"} +{"current_steps": 3235, "total_steps": 5676, "loss": 0.6365151405334473, "lr": 8.525491477810671e-06, "epoch": 1.1399118942731277, "percentage": 56.99, "elapsed_time": "4:45:33", "remaining_time": "3:35:28"} +{"current_steps": 3236, "total_steps": 5676, "loss": 0.5207303762435913, "lr": 8.519729018637164e-06, "epoch": 1.1402643171806168, "percentage": 57.01, "elapsed_time": "4:45:38", "remaining_time": "3:35:22"} +{"current_steps": 3237, "total_steps": 5676, "loss": 0.7469059228897095, "lr": 8.513967061969594e-06, "epoch": 1.1406167400881058, "percentage": 57.03, "elapsed_time": "4:45:44", "remaining_time": "3:35:18"} +{"current_steps": 3238, "total_steps": 5676, "loss": 0.5778630971908569, "lr": 8.508205609763955e-06, "epoch": 1.1409691629955947, "percentage": 57.05, "elapsed_time": "4:45:49", "remaining_time": "3:35:12"} +{"current_steps": 3239, "total_steps": 5676, "loss": 0.5447480082511902, "lr": 8.502444663976089e-06, "epoch": 1.1413215859030836, "percentage": 57.06, "elapsed_time": "4:45:54", "remaining_time": "3:35:06"} +{"current_steps": 3240, "total_steps": 5676, "loss": 0.6002986431121826, "lr": 8.496684226561653e-06, "epoch": 1.1416740088105728, "percentage": 57.08, "elapsed_time": "4:46:00", "remaining_time": "3:35:01"} +{"current_steps": 3241, "total_steps": 5676, "loss": 0.7627072930335999, "lr": 8.490924299476133e-06, "epoch": 1.1420264317180617, "percentage": 57.1, "elapsed_time": "4:46:04", "remaining_time": "3:34:55"} +{"current_steps": 3242, "total_steps": 5676, "loss": 0.6406078338623047, "lr": 8.485164884674854e-06, "epoch": 1.1423788546255507, "percentage": 57.12, "elapsed_time": "4:46:10", "remaining_time": "3:34:51"} +{"current_steps": 3243, "total_steps": 5676, "loss": 0.47047436237335205, "lr": 8.479405984112949e-06, "epoch": 1.1427312775330396, "percentage": 57.14, "elapsed_time": "4:46:15", "remaining_time": "3:34:45"} +{"current_steps": 3244, "total_steps": 5676, "loss": 0.6702529191970825, "lr": 8.473647599745393e-06, "epoch": 1.1430837004405285, "percentage": 57.15, "elapsed_time": "4:46:20", "remaining_time": "3:34:40"} +{"current_steps": 3245, "total_steps": 5676, "loss": 0.6570258140563965, "lr": 8.467889733526977e-06, "epoch": 1.1434361233480177, "percentage": 57.17, "elapsed_time": "4:46:27", "remaining_time": "3:34:36"} +{"current_steps": 3246, "total_steps": 5676, "loss": 0.6248423457145691, "lr": 8.462132387412312e-06, "epoch": 1.1437885462555066, "percentage": 57.19, "elapsed_time": "4:46:32", "remaining_time": "3:34:30"} +{"current_steps": 3247, "total_steps": 5676, "loss": 0.7377427816390991, "lr": 8.456375563355842e-06, "epoch": 1.1441409691629956, "percentage": 57.21, "elapsed_time": "4:46:37", "remaining_time": "3:34:24"} +{"current_steps": 3248, "total_steps": 5676, "loss": 0.6469020843505859, "lr": 8.45061926331183e-06, "epoch": 1.1444933920704845, "percentage": 57.22, "elapsed_time": "4:46:41", "remaining_time": "3:34:18"} +{"current_steps": 3249, "total_steps": 5676, "loss": 0.6417430639266968, "lr": 8.444863489234356e-06, "epoch": 1.1448458149779737, "percentage": 57.24, "elapsed_time": "4:46:45", "remaining_time": "3:34:12"} +{"current_steps": 3250, "total_steps": 5676, "loss": 0.5447275638580322, "lr": 8.439108243077335e-06, "epoch": 1.1451982378854626, "percentage": 57.26, "elapsed_time": "4:46:49", "remaining_time": "3:34:06"} +{"current_steps": 3251, "total_steps": 5676, "loss": 0.6621315479278564, "lr": 8.433353526794484e-06, "epoch": 1.1455506607929515, "percentage": 57.28, "elapsed_time": "4:46:54", "remaining_time": "3:34:00"} +{"current_steps": 3252, "total_steps": 5676, "loss": 0.5660392045974731, "lr": 8.42759934233936e-06, "epoch": 1.1459030837004405, "percentage": 57.29, "elapsed_time": "4:46:58", "remaining_time": "3:33:54"} +{"current_steps": 3253, "total_steps": 5676, "loss": 0.43074172735214233, "lr": 8.42184569166532e-06, "epoch": 1.1462555066079294, "percentage": 57.31, "elapsed_time": "4:47:04", "remaining_time": "3:33:49"} +{"current_steps": 3254, "total_steps": 5676, "loss": 0.5863226056098938, "lr": 8.416092576725554e-06, "epoch": 1.1466079295154186, "percentage": 57.33, "elapsed_time": "4:47:08", "remaining_time": "3:33:43"} +{"current_steps": 3255, "total_steps": 5676, "loss": 0.6003422737121582, "lr": 8.410339999473067e-06, "epoch": 1.1469603524229075, "percentage": 57.35, "elapsed_time": "4:47:12", "remaining_time": "3:33:37"} +{"current_steps": 3256, "total_steps": 5676, "loss": 0.6109241247177124, "lr": 8.404587961860678e-06, "epoch": 1.1473127753303964, "percentage": 57.36, "elapsed_time": "4:47:19", "remaining_time": "3:33:32"} +{"current_steps": 3257, "total_steps": 5676, "loss": 0.5749140977859497, "lr": 8.398836465841021e-06, "epoch": 1.1476651982378854, "percentage": 57.38, "elapsed_time": "4:47:23", "remaining_time": "3:33:26"} +{"current_steps": 3258, "total_steps": 5676, "loss": 0.6920739412307739, "lr": 8.393085513366557e-06, "epoch": 1.1480176211453745, "percentage": 57.4, "elapsed_time": "4:47:29", "remaining_time": "3:33:21"} +{"current_steps": 3259, "total_steps": 5676, "loss": 0.6632573008537292, "lr": 8.38733510638955e-06, "epoch": 1.1483700440528635, "percentage": 57.42, "elapsed_time": "4:47:33", "remaining_time": "3:33:15"} +{"current_steps": 3260, "total_steps": 5676, "loss": 0.6396503448486328, "lr": 8.381585246862091e-06, "epoch": 1.1487224669603524, "percentage": 57.43, "elapsed_time": "4:47:38", "remaining_time": "3:33:10"} +{"current_steps": 3261, "total_steps": 5676, "loss": 0.5975937843322754, "lr": 8.375835936736072e-06, "epoch": 1.1490748898678413, "percentage": 57.45, "elapsed_time": "4:47:44", "remaining_time": "3:33:05"} +{"current_steps": 3262, "total_steps": 5676, "loss": 0.6297920346260071, "lr": 8.370087177963204e-06, "epoch": 1.1494273127753303, "percentage": 57.47, "elapsed_time": "4:47:48", "remaining_time": "3:32:59"} +{"current_steps": 3263, "total_steps": 5676, "loss": 0.7004375457763672, "lr": 8.364338972495016e-06, "epoch": 1.1497797356828194, "percentage": 57.49, "elapsed_time": "4:47:53", "remaining_time": "3:32:53"} +{"current_steps": 3264, "total_steps": 5676, "loss": 0.5850871801376343, "lr": 8.358591322282845e-06, "epoch": 1.1501321585903084, "percentage": 57.51, "elapsed_time": "4:47:58", "remaining_time": "3:32:48"} +{"current_steps": 3265, "total_steps": 5676, "loss": 0.493900865316391, "lr": 8.352844229277834e-06, "epoch": 1.1504845814977973, "percentage": 57.52, "elapsed_time": "4:48:02", "remaining_time": "3:32:42"} +{"current_steps": 3266, "total_steps": 5676, "loss": 0.573354959487915, "lr": 8.34709769543095e-06, "epoch": 1.1508370044052865, "percentage": 57.54, "elapsed_time": "4:48:07", "remaining_time": "3:32:36"} +{"current_steps": 3267, "total_steps": 5676, "loss": 0.7154442667961121, "lr": 8.341351722692951e-06, "epoch": 1.1511894273127754, "percentage": 57.56, "elapsed_time": "4:48:12", "remaining_time": "3:32:31"} +{"current_steps": 3268, "total_steps": 5676, "loss": 0.5429074764251709, "lr": 8.335606313014432e-06, "epoch": 1.1515418502202643, "percentage": 57.58, "elapsed_time": "4:48:18", "remaining_time": "3:32:26"} +{"current_steps": 3269, "total_steps": 5676, "loss": 0.6938891410827637, "lr": 8.329861468345768e-06, "epoch": 1.1518942731277533, "percentage": 57.59, "elapsed_time": "4:48:23", "remaining_time": "3:32:20"} +{"current_steps": 3270, "total_steps": 5676, "loss": 0.7114205360412598, "lr": 8.324117190637157e-06, "epoch": 1.1522466960352422, "percentage": 57.61, "elapsed_time": "4:48:27", "remaining_time": "3:32:14"} +{"current_steps": 3271, "total_steps": 5676, "loss": 0.5353071093559265, "lr": 8.318373481838605e-06, "epoch": 1.1525991189427314, "percentage": 57.63, "elapsed_time": "4:48:31", "remaining_time": "3:32:08"} +{"current_steps": 3272, "total_steps": 5676, "loss": 0.7838516235351562, "lr": 8.312630343899921e-06, "epoch": 1.1529515418502203, "percentage": 57.65, "elapsed_time": "4:48:36", "remaining_time": "3:32:02"} +{"current_steps": 3273, "total_steps": 5676, "loss": 0.630479633808136, "lr": 8.306887778770724e-06, "epoch": 1.1533039647577092, "percentage": 57.66, "elapsed_time": "4:48:42", "remaining_time": "3:31:58"} +{"current_steps": 3274, "total_steps": 5676, "loss": 0.6568116545677185, "lr": 8.301145788400438e-06, "epoch": 1.1536563876651982, "percentage": 57.68, "elapsed_time": "4:48:47", "remaining_time": "3:31:52"} +{"current_steps": 3275, "total_steps": 5676, "loss": 0.5410804748535156, "lr": 8.295404374738278e-06, "epoch": 1.1540088105726873, "percentage": 57.7, "elapsed_time": "4:48:52", "remaining_time": "3:31:47"} +{"current_steps": 3276, "total_steps": 5676, "loss": 0.6699862480163574, "lr": 8.289663539733292e-06, "epoch": 1.1543612334801763, "percentage": 57.72, "elapsed_time": "4:48:58", "remaining_time": "3:31:41"} +{"current_steps": 3277, "total_steps": 5676, "loss": 0.6828576326370239, "lr": 8.283923285334304e-06, "epoch": 1.1547136563876652, "percentage": 57.73, "elapsed_time": "4:49:02", "remaining_time": "3:31:35"} +{"current_steps": 3278, "total_steps": 5676, "loss": 0.5569214820861816, "lr": 8.278183613489951e-06, "epoch": 1.1550660792951541, "percentage": 57.75, "elapsed_time": "4:49:07", "remaining_time": "3:31:30"} +{"current_steps": 3279, "total_steps": 5676, "loss": 0.6276477575302124, "lr": 8.27244452614868e-06, "epoch": 1.155418502202643, "percentage": 57.77, "elapsed_time": "4:49:13", "remaining_time": "3:31:25"} +{"current_steps": 3280, "total_steps": 5676, "loss": 0.5752792954444885, "lr": 8.266706025258727e-06, "epoch": 1.1557709251101322, "percentage": 57.79, "elapsed_time": "4:49:19", "remaining_time": "3:31:20"} +{"current_steps": 3281, "total_steps": 5676, "loss": 0.6149388551712036, "lr": 8.260968112768137e-06, "epoch": 1.1561233480176212, "percentage": 57.8, "elapsed_time": "4:49:24", "remaining_time": "3:31:15"} +{"current_steps": 3282, "total_steps": 5676, "loss": 0.6399196982383728, "lr": 8.255230790624755e-06, "epoch": 1.1564757709251101, "percentage": 57.82, "elapsed_time": "4:49:29", "remaining_time": "3:31:10"} +{"current_steps": 3283, "total_steps": 5676, "loss": 0.6927458047866821, "lr": 8.249494060776215e-06, "epoch": 1.156828193832599, "percentage": 57.84, "elapsed_time": "4:49:35", "remaining_time": "3:31:04"} +{"current_steps": 3284, "total_steps": 5676, "loss": 0.5843946933746338, "lr": 8.243757925169968e-06, "epoch": 1.1571806167400882, "percentage": 57.86, "elapsed_time": "4:49:41", "remaining_time": "3:31:00"} +{"current_steps": 3285, "total_steps": 5676, "loss": 0.6469332575798035, "lr": 8.238022385753248e-06, "epoch": 1.1575330396475771, "percentage": 57.88, "elapsed_time": "4:49:47", "remaining_time": "3:30:55"} +{"current_steps": 3286, "total_steps": 5676, "loss": 0.572630763053894, "lr": 8.23228744447309e-06, "epoch": 1.157885462555066, "percentage": 57.89, "elapsed_time": "4:49:53", "remaining_time": "3:30:50"} +{"current_steps": 3287, "total_steps": 5676, "loss": 0.6872239112854004, "lr": 8.226553103276335e-06, "epoch": 1.158237885462555, "percentage": 57.91, "elapsed_time": "4:49:58", "remaining_time": "3:30:45"} +{"current_steps": 3288, "total_steps": 5676, "loss": 0.5116995573043823, "lr": 8.220819364109607e-06, "epoch": 1.158590308370044, "percentage": 57.93, "elapsed_time": "4:50:04", "remaining_time": "3:30:40"} +{"current_steps": 3289, "total_steps": 5676, "loss": 0.6179347038269043, "lr": 8.215086228919336e-06, "epoch": 1.1589427312775331, "percentage": 57.95, "elapsed_time": "4:50:09", "remaining_time": "3:30:35"} +{"current_steps": 3290, "total_steps": 5676, "loss": 0.573688805103302, "lr": 8.209353699651745e-06, "epoch": 1.159295154185022, "percentage": 57.96, "elapsed_time": "4:50:14", "remaining_time": "3:30:29"} +{"current_steps": 3291, "total_steps": 5676, "loss": 0.6622583866119385, "lr": 8.20362177825284e-06, "epoch": 1.159647577092511, "percentage": 57.98, "elapsed_time": "4:50:20", "remaining_time": "3:30:24"} +{"current_steps": 3292, "total_steps": 5676, "loss": 0.4945096969604492, "lr": 8.197890466668441e-06, "epoch": 1.16, "percentage": 58.0, "elapsed_time": "4:50:26", "remaining_time": "3:30:19"} +{"current_steps": 3293, "total_steps": 5676, "loss": 0.5657082796096802, "lr": 8.19215976684414e-06, "epoch": 1.160352422907489, "percentage": 58.02, "elapsed_time": "4:50:30", "remaining_time": "3:30:13"} +{"current_steps": 3294, "total_steps": 5676, "loss": 0.5684623122215271, "lr": 8.186429680725339e-06, "epoch": 1.160704845814978, "percentage": 58.03, "elapsed_time": "4:50:36", "remaining_time": "3:30:08"} +{"current_steps": 3295, "total_steps": 5676, "loss": 0.567638635635376, "lr": 8.180700210257223e-06, "epoch": 1.161057268722467, "percentage": 58.05, "elapsed_time": "4:50:42", "remaining_time": "3:30:03"} +{"current_steps": 3296, "total_steps": 5676, "loss": 0.7182992696762085, "lr": 8.174971357384762e-06, "epoch": 1.1614096916299559, "percentage": 58.07, "elapsed_time": "4:50:46", "remaining_time": "3:29:57"} +{"current_steps": 3297, "total_steps": 5676, "loss": 0.7188737392425537, "lr": 8.169243124052731e-06, "epoch": 1.1617621145374448, "percentage": 58.09, "elapsed_time": "4:50:51", "remaining_time": "3:29:52"} +{"current_steps": 3298, "total_steps": 5676, "loss": 0.5532418489456177, "lr": 8.163515512205687e-06, "epoch": 1.162114537444934, "percentage": 58.1, "elapsed_time": "4:50:58", "remaining_time": "3:29:48"} +{"current_steps": 3299, "total_steps": 5676, "loss": 0.7167447209358215, "lr": 8.157788523787967e-06, "epoch": 1.162466960352423, "percentage": 58.12, "elapsed_time": "4:51:03", "remaining_time": "3:29:42"} +{"current_steps": 3300, "total_steps": 5676, "loss": 0.633411169052124, "lr": 8.152062160743716e-06, "epoch": 1.1628193832599119, "percentage": 58.14, "elapsed_time": "4:51:08", "remaining_time": "3:29:37"} +{"current_steps": 3301, "total_steps": 5676, "loss": 0.6686321496963501, "lr": 8.146336425016849e-06, "epoch": 1.1631718061674008, "percentage": 58.16, "elapsed_time": "4:51:17", "remaining_time": "3:29:34"} +{"current_steps": 3302, "total_steps": 5676, "loss": 0.608701765537262, "lr": 8.140611318551078e-06, "epoch": 1.16352422907489, "percentage": 58.17, "elapsed_time": "4:51:21", "remaining_time": "3:29:28"} +{"current_steps": 3303, "total_steps": 5676, "loss": 0.5607466101646423, "lr": 8.1348868432899e-06, "epoch": 1.1638766519823789, "percentage": 58.19, "elapsed_time": "4:51:26", "remaining_time": "3:29:23"} +{"current_steps": 3304, "total_steps": 5676, "loss": 0.6397457122802734, "lr": 8.12916300117659e-06, "epoch": 1.1642290748898678, "percentage": 58.21, "elapsed_time": "4:51:31", "remaining_time": "3:29:17"} +{"current_steps": 3305, "total_steps": 5676, "loss": 0.6681507229804993, "lr": 8.123439794154223e-06, "epoch": 1.1645814977973568, "percentage": 58.23, "elapsed_time": "4:51:36", "remaining_time": "3:29:12"} +{"current_steps": 3306, "total_steps": 5676, "loss": 0.5549972057342529, "lr": 8.117717224165645e-06, "epoch": 1.1649339207048457, "percentage": 58.25, "elapsed_time": "4:51:41", "remaining_time": "3:29:06"} +{"current_steps": 3307, "total_steps": 5676, "loss": 0.7519058585166931, "lr": 8.111995293153486e-06, "epoch": 1.1652863436123349, "percentage": 58.26, "elapsed_time": "4:51:46", "remaining_time": "3:29:00"} +{"current_steps": 3308, "total_steps": 5676, "loss": 0.7100121378898621, "lr": 8.106274003060172e-06, "epoch": 1.1656387665198238, "percentage": 58.28, "elapsed_time": "4:51:52", "remaining_time": "3:28:56"} +{"current_steps": 3309, "total_steps": 5676, "loss": 0.6297321319580078, "lr": 8.100553355827897e-06, "epoch": 1.1659911894273127, "percentage": 58.3, "elapsed_time": "4:51:56", "remaining_time": "3:28:50"} +{"current_steps": 3310, "total_steps": 5676, "loss": 0.6875895857810974, "lr": 8.094833353398645e-06, "epoch": 1.1663436123348019, "percentage": 58.32, "elapsed_time": "4:52:01", "remaining_time": "3:28:44"} +{"current_steps": 3311, "total_steps": 5676, "loss": 0.5369099974632263, "lr": 8.08911399771418e-06, "epoch": 1.1666960352422908, "percentage": 58.33, "elapsed_time": "4:52:06", "remaining_time": "3:28:38"} +{"current_steps": 3312, "total_steps": 5676, "loss": 0.5598124265670776, "lr": 8.083395290716042e-06, "epoch": 1.1670484581497798, "percentage": 58.35, "elapsed_time": "4:52:10", "remaining_time": "3:28:32"} +{"current_steps": 3313, "total_steps": 5676, "loss": 0.6438342332839966, "lr": 8.077677234345557e-06, "epoch": 1.1674008810572687, "percentage": 58.37, "elapsed_time": "4:52:16", "remaining_time": "3:28:27"} +{"current_steps": 3314, "total_steps": 5676, "loss": 0.5558618307113647, "lr": 8.07195983054383e-06, "epoch": 1.1677533039647576, "percentage": 58.39, "elapsed_time": "4:52:21", "remaining_time": "3:28:22"} +{"current_steps": 3315, "total_steps": 5676, "loss": 0.5729602575302124, "lr": 8.06624308125173e-06, "epoch": 1.1681057268722468, "percentage": 58.4, "elapsed_time": "4:52:27", "remaining_time": "3:28:17"} +{"current_steps": 3316, "total_steps": 5676, "loss": 0.5094903707504272, "lr": 8.060526988409929e-06, "epoch": 1.1684581497797357, "percentage": 58.42, "elapsed_time": "4:52:32", "remaining_time": "3:28:11"} +{"current_steps": 3317, "total_steps": 5676, "loss": 0.6605818867683411, "lr": 8.054811553958853e-06, "epoch": 1.1688105726872247, "percentage": 58.44, "elapsed_time": "4:52:37", "remaining_time": "3:28:06"} +{"current_steps": 3318, "total_steps": 5676, "loss": 0.7929576635360718, "lr": 8.04909677983872e-06, "epoch": 1.1691629955947136, "percentage": 58.46, "elapsed_time": "4:52:41", "remaining_time": "3:28:00"} +{"current_steps": 3319, "total_steps": 5676, "loss": 0.5915192365646362, "lr": 8.043382667989514e-06, "epoch": 1.1695154185022028, "percentage": 58.47, "elapsed_time": "4:52:46", "remaining_time": "3:27:54"} +{"current_steps": 3320, "total_steps": 5676, "loss": 0.5923853516578674, "lr": 8.037669220351e-06, "epoch": 1.1698678414096917, "percentage": 58.49, "elapsed_time": "4:52:51", "remaining_time": "3:27:49"} +{"current_steps": 3321, "total_steps": 5676, "loss": 0.7034223079681396, "lr": 8.031956438862718e-06, "epoch": 1.1702202643171806, "percentage": 58.51, "elapsed_time": "4:52:57", "remaining_time": "3:27:44"} +{"current_steps": 3322, "total_steps": 5676, "loss": 0.6093307733535767, "lr": 8.026244325463975e-06, "epoch": 1.1705726872246696, "percentage": 58.53, "elapsed_time": "4:53:03", "remaining_time": "3:27:39"} +{"current_steps": 3323, "total_steps": 5676, "loss": 0.5709424614906311, "lr": 8.020532882093862e-06, "epoch": 1.1709251101321585, "percentage": 58.54, "elapsed_time": "4:53:07", "remaining_time": "3:27:33"} +{"current_steps": 3324, "total_steps": 5676, "loss": 0.5242069959640503, "lr": 8.01482211069123e-06, "epoch": 1.1712775330396477, "percentage": 58.56, "elapsed_time": "4:53:12", "remaining_time": "3:27:28"} +{"current_steps": 3325, "total_steps": 5676, "loss": 0.5869580507278442, "lr": 8.009112013194707e-06, "epoch": 1.1716299559471366, "percentage": 58.58, "elapsed_time": "4:53:17", "remaining_time": "3:27:22"} +{"current_steps": 3326, "total_steps": 5676, "loss": 0.7281460762023926, "lr": 8.0034025915427e-06, "epoch": 1.1719823788546255, "percentage": 58.6, "elapsed_time": "4:53:22", "remaining_time": "3:27:17"} +{"current_steps": 3327, "total_steps": 5676, "loss": 0.6877723336219788, "lr": 7.997693847673378e-06, "epoch": 1.1723348017621145, "percentage": 58.62, "elapsed_time": "4:53:27", "remaining_time": "3:27:11"} +{"current_steps": 3328, "total_steps": 5676, "loss": 0.6045002937316895, "lr": 7.991985783524676e-06, "epoch": 1.1726872246696036, "percentage": 58.63, "elapsed_time": "4:53:32", "remaining_time": "3:27:06"} +{"current_steps": 3329, "total_steps": 5676, "loss": 0.5698690414428711, "lr": 7.986278401034315e-06, "epoch": 1.1730396475770926, "percentage": 58.65, "elapsed_time": "4:53:37", "remaining_time": "3:27:00"} +{"current_steps": 3330, "total_steps": 5676, "loss": 0.6802438497543335, "lr": 7.980571702139759e-06, "epoch": 1.1733920704845815, "percentage": 58.67, "elapsed_time": "4:53:42", "remaining_time": "3:26:54"} +{"current_steps": 3331, "total_steps": 5676, "loss": 0.5840654373168945, "lr": 7.974865688778271e-06, "epoch": 1.1737444933920704, "percentage": 58.69, "elapsed_time": "4:53:48", "remaining_time": "3:26:50"} +{"current_steps": 3332, "total_steps": 5676, "loss": 0.5203073024749756, "lr": 7.969160362886855e-06, "epoch": 1.1740969162995594, "percentage": 58.7, "elapsed_time": "4:53:53", "remaining_time": "3:26:44"} +{"current_steps": 3333, "total_steps": 5676, "loss": 0.4558306932449341, "lr": 7.963455726402292e-06, "epoch": 1.1744493392070485, "percentage": 58.72, "elapsed_time": "4:53:57", "remaining_time": "3:26:38"} +{"current_steps": 3334, "total_steps": 5676, "loss": 0.6200483441352844, "lr": 7.957751781261132e-06, "epoch": 1.1748017621145375, "percentage": 58.74, "elapsed_time": "4:54:04", "remaining_time": "3:26:34"} +{"current_steps": 3335, "total_steps": 5676, "loss": 0.559386670589447, "lr": 7.952048529399686e-06, "epoch": 1.1751541850220264, "percentage": 58.76, "elapsed_time": "4:54:11", "remaining_time": "3:26:30"} +{"current_steps": 3336, "total_steps": 5676, "loss": 0.5521356463432312, "lr": 7.946345972754026e-06, "epoch": 1.1755066079295153, "percentage": 58.77, "elapsed_time": "4:54:15", "remaining_time": "3:26:24"} +{"current_steps": 3337, "total_steps": 5676, "loss": 0.6235495805740356, "lr": 7.940644113260001e-06, "epoch": 1.1758590308370045, "percentage": 58.79, "elapsed_time": "4:54:20", "remaining_time": "3:26:18"} +{"current_steps": 3338, "total_steps": 5676, "loss": 0.5196648836135864, "lr": 7.934942952853203e-06, "epoch": 1.1762114537444934, "percentage": 58.81, "elapsed_time": "4:54:26", "remaining_time": "3:26:13"} +{"current_steps": 3339, "total_steps": 5676, "loss": 0.5959422588348389, "lr": 7.929242493469013e-06, "epoch": 1.1765638766519824, "percentage": 58.83, "elapsed_time": "4:54:32", "remaining_time": "3:26:08"} +{"current_steps": 3340, "total_steps": 5676, "loss": 0.5400167107582092, "lr": 7.923542737042549e-06, "epoch": 1.1769162995594713, "percentage": 58.84, "elapsed_time": "4:54:37", "remaining_time": "3:26:03"} +{"current_steps": 3341, "total_steps": 5676, "loss": 0.688996434211731, "lr": 7.917843685508702e-06, "epoch": 1.1772687224669602, "percentage": 58.86, "elapsed_time": "4:54:42", "remaining_time": "3:25:58"} +{"current_steps": 3342, "total_steps": 5676, "loss": 0.623216450214386, "lr": 7.912145340802127e-06, "epoch": 1.1776211453744494, "percentage": 58.88, "elapsed_time": "4:54:47", "remaining_time": "3:25:52"} +{"current_steps": 3343, "total_steps": 5676, "loss": 0.587382435798645, "lr": 7.906447704857233e-06, "epoch": 1.1779735682819383, "percentage": 58.9, "elapsed_time": "4:54:53", "remaining_time": "3:25:48"} +{"current_steps": 3344, "total_steps": 5676, "loss": 0.6033053398132324, "lr": 7.900750779608187e-06, "epoch": 1.1783259911894273, "percentage": 58.91, "elapsed_time": "4:55:00", "remaining_time": "3:25:43"} +{"current_steps": 3345, "total_steps": 5676, "loss": 0.557671308517456, "lr": 7.895054566988924e-06, "epoch": 1.1786784140969162, "percentage": 58.93, "elapsed_time": "4:55:06", "remaining_time": "3:25:38"} +{"current_steps": 3346, "total_steps": 5676, "loss": 0.4550681710243225, "lr": 7.889359068933122e-06, "epoch": 1.1790308370044054, "percentage": 58.95, "elapsed_time": "4:55:09", "remaining_time": "3:25:32"} +{"current_steps": 3347, "total_steps": 5676, "loss": 0.6417531967163086, "lr": 7.883664287374235e-06, "epoch": 1.1793832599118943, "percentage": 58.97, "elapsed_time": "4:55:14", "remaining_time": "3:25:26"} +{"current_steps": 3348, "total_steps": 5676, "loss": 0.703549861907959, "lr": 7.877970224245458e-06, "epoch": 1.1797356828193832, "percentage": 58.99, "elapsed_time": "4:55:19", "remaining_time": "3:25:20"} +{"current_steps": 3349, "total_steps": 5676, "loss": 0.7438976764678955, "lr": 7.87227688147975e-06, "epoch": 1.1800881057268722, "percentage": 59.0, "elapsed_time": "4:55:23", "remaining_time": "3:25:14"} +{"current_steps": 3350, "total_steps": 5676, "loss": 0.5563932657241821, "lr": 7.866584261009823e-06, "epoch": 1.1804405286343613, "percentage": 59.02, "elapsed_time": "4:55:27", "remaining_time": "3:25:08"} +{"current_steps": 3351, "total_steps": 5676, "loss": 0.6332740783691406, "lr": 7.860892364768145e-06, "epoch": 1.1807929515418503, "percentage": 59.04, "elapsed_time": "4:55:31", "remaining_time": "3:25:02"} +{"current_steps": 3352, "total_steps": 5676, "loss": 0.5207923650741577, "lr": 7.855201194686938e-06, "epoch": 1.1811453744493392, "percentage": 59.06, "elapsed_time": "4:55:37", "remaining_time": "3:24:57"} +{"current_steps": 3353, "total_steps": 5676, "loss": 0.5930209755897522, "lr": 7.849510752698179e-06, "epoch": 1.1814977973568281, "percentage": 59.07, "elapsed_time": "4:55:41", "remaining_time": "3:24:51"} +{"current_steps": 3354, "total_steps": 5676, "loss": 0.6207472085952759, "lr": 7.843821040733588e-06, "epoch": 1.1818502202643173, "percentage": 59.09, "elapsed_time": "4:55:46", "remaining_time": "3:24:46"} +{"current_steps": 3355, "total_steps": 5676, "loss": 0.5487867593765259, "lr": 7.838132060724657e-06, "epoch": 1.1822026431718062, "percentage": 59.11, "elapsed_time": "4:55:50", "remaining_time": "3:24:39"} +{"current_steps": 3356, "total_steps": 5676, "loss": 0.5457941889762878, "lr": 7.83244381460261e-06, "epoch": 1.1825550660792952, "percentage": 59.13, "elapsed_time": "4:55:56", "remaining_time": "3:24:35"} +{"current_steps": 3357, "total_steps": 5676, "loss": 0.5203769207000732, "lr": 7.826756304298428e-06, "epoch": 1.182907488986784, "percentage": 59.14, "elapsed_time": "4:56:01", "remaining_time": "3:24:29"} +{"current_steps": 3358, "total_steps": 5676, "loss": 0.7241770029067993, "lr": 7.821069531742848e-06, "epoch": 1.183259911894273, "percentage": 59.16, "elapsed_time": "4:56:06", "remaining_time": "3:24:23"} +{"current_steps": 3359, "total_steps": 5676, "loss": 0.5085904598236084, "lr": 7.815383498866351e-06, "epoch": 1.1836123348017622, "percentage": 59.18, "elapsed_time": "4:56:11", "remaining_time": "3:24:18"} +{"current_steps": 3360, "total_steps": 5676, "loss": 0.6219276785850525, "lr": 7.80969820759917e-06, "epoch": 1.1839647577092511, "percentage": 59.2, "elapsed_time": "4:56:16", "remaining_time": "3:24:12"} +{"current_steps": 3361, "total_steps": 5676, "loss": 0.5621576309204102, "lr": 7.804013659871286e-06, "epoch": 1.18431718061674, "percentage": 59.21, "elapsed_time": "4:56:21", "remaining_time": "3:24:07"} +{"current_steps": 3362, "total_steps": 5676, "loss": 0.6862529516220093, "lr": 7.798329857612415e-06, "epoch": 1.184669603524229, "percentage": 59.23, "elapsed_time": "4:56:25", "remaining_time": "3:24:01"} +{"current_steps": 3363, "total_steps": 5676, "loss": 0.5536706447601318, "lr": 7.792646802752045e-06, "epoch": 1.1850220264317182, "percentage": 59.25, "elapsed_time": "4:56:30", "remaining_time": "3:23:55"} +{"current_steps": 3364, "total_steps": 5676, "loss": 0.7158493995666504, "lr": 7.786964497219389e-06, "epoch": 1.185374449339207, "percentage": 59.27, "elapsed_time": "4:56:35", "remaining_time": "3:23:50"} +{"current_steps": 3365, "total_steps": 5676, "loss": 0.6510338187217712, "lr": 7.781282942943411e-06, "epoch": 1.185726872246696, "percentage": 59.28, "elapsed_time": "4:56:40", "remaining_time": "3:23:44"} +{"current_steps": 3366, "total_steps": 5676, "loss": 0.4999651312828064, "lr": 7.775602141852827e-06, "epoch": 1.186079295154185, "percentage": 59.3, "elapsed_time": "4:56:46", "remaining_time": "3:23:40"} +{"current_steps": 3367, "total_steps": 5676, "loss": 0.566371738910675, "lr": 7.769922095876088e-06, "epoch": 1.186431718061674, "percentage": 59.32, "elapsed_time": "4:56:52", "remaining_time": "3:23:35"} +{"current_steps": 3368, "total_steps": 5676, "loss": 0.6424880623817444, "lr": 7.764242806941396e-06, "epoch": 1.186784140969163, "percentage": 59.34, "elapsed_time": "4:56:57", "remaining_time": "3:23:30"} +{"current_steps": 3369, "total_steps": 5676, "loss": 0.6731792688369751, "lr": 7.758564276976696e-06, "epoch": 1.187136563876652, "percentage": 59.36, "elapsed_time": "4:57:03", "remaining_time": "3:23:25"} +{"current_steps": 3370, "total_steps": 5676, "loss": 0.7350698113441467, "lr": 7.752886507909661e-06, "epoch": 1.187488986784141, "percentage": 59.37, "elapsed_time": "4:57:08", "remaining_time": "3:23:19"} +{"current_steps": 3371, "total_steps": 5676, "loss": 0.49212586879730225, "lr": 7.747209501667729e-06, "epoch": 1.1878414096916299, "percentage": 59.39, "elapsed_time": "4:57:14", "remaining_time": "3:23:14"} +{"current_steps": 3372, "total_steps": 5676, "loss": 0.46775591373443604, "lr": 7.741533260178058e-06, "epoch": 1.188193832599119, "percentage": 59.41, "elapsed_time": "4:57:19", "remaining_time": "3:23:09"} +{"current_steps": 3373, "total_steps": 5676, "loss": 0.7006367444992065, "lr": 7.73585778536756e-06, "epoch": 1.188546255506608, "percentage": 59.43, "elapsed_time": "4:57:24", "remaining_time": "3:23:03"} +{"current_steps": 3374, "total_steps": 5676, "loss": 0.6403789520263672, "lr": 7.730183079162882e-06, "epoch": 1.188898678414097, "percentage": 59.44, "elapsed_time": "4:57:29", "remaining_time": "3:22:58"} +{"current_steps": 3375, "total_steps": 5676, "loss": 0.5788881778717041, "lr": 7.724509143490409e-06, "epoch": 1.1892511013215858, "percentage": 59.46, "elapsed_time": "4:57:33", "remaining_time": "3:22:52"} +{"current_steps": 3376, "total_steps": 5676, "loss": 0.5216118693351746, "lr": 7.718835980276265e-06, "epoch": 1.1896035242290748, "percentage": 59.48, "elapsed_time": "4:57:39", "remaining_time": "3:22:47"} +{"current_steps": 3377, "total_steps": 5676, "loss": 0.5951248407363892, "lr": 7.713163591446318e-06, "epoch": 1.189955947136564, "percentage": 59.5, "elapsed_time": "4:57:43", "remaining_time": "3:22:41"} +{"current_steps": 3378, "total_steps": 5676, "loss": 0.4975050687789917, "lr": 7.707491978926157e-06, "epoch": 1.1903083700440529, "percentage": 59.51, "elapsed_time": "4:57:49", "remaining_time": "3:22:36"} +{"current_steps": 3379, "total_steps": 5676, "loss": 0.6019243001937866, "lr": 7.701821144641127e-06, "epoch": 1.1906607929515418, "percentage": 59.53, "elapsed_time": "4:57:55", "remaining_time": "3:22:31"} +{"current_steps": 3380, "total_steps": 5676, "loss": 0.6395450830459595, "lr": 7.696151090516292e-06, "epoch": 1.1910132158590307, "percentage": 59.55, "elapsed_time": "4:58:02", "remaining_time": "3:22:27"} +{"current_steps": 3381, "total_steps": 5676, "loss": 0.579787015914917, "lr": 7.690481818476468e-06, "epoch": 1.19136563876652, "percentage": 59.57, "elapsed_time": "4:58:06", "remaining_time": "3:22:21"} +{"current_steps": 3382, "total_steps": 5676, "loss": 0.5136005878448486, "lr": 7.684813330446191e-06, "epoch": 1.1917180616740088, "percentage": 59.58, "elapsed_time": "4:58:12", "remaining_time": "3:22:16"} +{"current_steps": 3383, "total_steps": 5676, "loss": 0.6639782190322876, "lr": 7.679145628349734e-06, "epoch": 1.1920704845814978, "percentage": 59.6, "elapsed_time": "4:58:17", "remaining_time": "3:22:11"} +{"current_steps": 3384, "total_steps": 5676, "loss": 0.5575984716415405, "lr": 7.673478714111111e-06, "epoch": 1.1924229074889867, "percentage": 59.62, "elapsed_time": "4:58:23", "remaining_time": "3:22:06"} +{"current_steps": 3385, "total_steps": 5676, "loss": 0.6456045508384705, "lr": 7.667812589654062e-06, "epoch": 1.1927753303964757, "percentage": 59.64, "elapsed_time": "4:58:27", "remaining_time": "3:22:00"} +{"current_steps": 3386, "total_steps": 5676, "loss": 0.6936196088790894, "lr": 7.662147256902055e-06, "epoch": 1.1931277533039648, "percentage": 59.65, "elapsed_time": "4:58:32", "remaining_time": "3:21:54"} +{"current_steps": 3387, "total_steps": 5676, "loss": 0.5490384697914124, "lr": 7.656482717778299e-06, "epoch": 1.1934801762114537, "percentage": 59.67, "elapsed_time": "4:58:38", "remaining_time": "3:21:49"} +{"current_steps": 3388, "total_steps": 5676, "loss": 0.6973621845245361, "lr": 7.650818974205727e-06, "epoch": 1.1938325991189427, "percentage": 59.69, "elapsed_time": "4:58:43", "remaining_time": "3:21:44"} +{"current_steps": 3389, "total_steps": 5676, "loss": 0.7471047639846802, "lr": 7.645156028107005e-06, "epoch": 1.1941850220264318, "percentage": 59.71, "elapsed_time": "4:58:48", "remaining_time": "3:21:38"} +{"current_steps": 3390, "total_steps": 5676, "loss": 0.6205108165740967, "lr": 7.639493881404526e-06, "epoch": 1.1945374449339208, "percentage": 59.73, "elapsed_time": "4:58:53", "remaining_time": "3:21:33"} +{"current_steps": 3391, "total_steps": 5676, "loss": 0.747038722038269, "lr": 7.63383253602041e-06, "epoch": 1.1948898678414097, "percentage": 59.74, "elapsed_time": "4:58:59", "remaining_time": "3:21:28"} +{"current_steps": 3392, "total_steps": 5676, "loss": 0.5185794830322266, "lr": 7.628171993876514e-06, "epoch": 1.1952422907488987, "percentage": 59.76, "elapsed_time": "4:59:04", "remaining_time": "3:21:22"} +{"current_steps": 3393, "total_steps": 5676, "loss": 0.6059385538101196, "lr": 7.6225122568944124e-06, "epoch": 1.1955947136563876, "percentage": 59.78, "elapsed_time": "4:59:09", "remaining_time": "3:21:17"} +{"current_steps": 3394, "total_steps": 5676, "loss": 0.5154507160186768, "lr": 7.6168533269954045e-06, "epoch": 1.1959471365638767, "percentage": 59.8, "elapsed_time": "4:59:16", "remaining_time": "3:21:13"} +{"current_steps": 3395, "total_steps": 5676, "loss": 0.684306263923645, "lr": 7.611195206100529e-06, "epoch": 1.1962995594713657, "percentage": 59.81, "elapsed_time": "4:59:22", "remaining_time": "3:21:08"} +{"current_steps": 3396, "total_steps": 5676, "loss": 0.5637205839157104, "lr": 7.605537896130537e-06, "epoch": 1.1966519823788546, "percentage": 59.83, "elapsed_time": "4:59:27", "remaining_time": "3:21:03"} +{"current_steps": 3397, "total_steps": 5676, "loss": 0.700809121131897, "lr": 7.599881399005913e-06, "epoch": 1.1970044052863436, "percentage": 59.85, "elapsed_time": "4:59:31", "remaining_time": "3:20:57"} +{"current_steps": 3398, "total_steps": 5676, "loss": 0.45139041543006897, "lr": 7.594225716646859e-06, "epoch": 1.1973568281938327, "percentage": 59.87, "elapsed_time": "4:59:36", "remaining_time": "3:20:51"} +{"current_steps": 3399, "total_steps": 5676, "loss": 0.6623016595840454, "lr": 7.588570850973301e-06, "epoch": 1.1977092511013216, "percentage": 59.88, "elapsed_time": "4:59:43", "remaining_time": "3:20:46"} +{"current_steps": 3400, "total_steps": 5676, "loss": 0.47430598735809326, "lr": 7.582916803904899e-06, "epoch": 1.1980616740088106, "percentage": 59.9, "elapsed_time": "4:59:48", "remaining_time": "3:20:41"} +{"current_steps": 3401, "total_steps": 5676, "loss": 0.7190637588500977, "lr": 7.57726357736101e-06, "epoch": 1.1984140969162995, "percentage": 59.92, "elapsed_time": "4:59:58", "remaining_time": "3:20:39"} +{"current_steps": 3402, "total_steps": 5676, "loss": 0.552079439163208, "lr": 7.571611173260747e-06, "epoch": 1.1987665198237885, "percentage": 59.94, "elapsed_time": "5:00:03", "remaining_time": "3:20:34"} +{"current_steps": 3403, "total_steps": 5676, "loss": 0.5499744415283203, "lr": 7.565959593522914e-06, "epoch": 1.1991189427312776, "percentage": 59.95, "elapsed_time": "5:00:09", "remaining_time": "3:20:29"} +{"current_steps": 3404, "total_steps": 5676, "loss": 0.6013774871826172, "lr": 7.560308840066046e-06, "epoch": 1.1994713656387666, "percentage": 59.97, "elapsed_time": "5:00:15", "remaining_time": "3:20:24"} +{"current_steps": 3405, "total_steps": 5676, "loss": 0.5489538908004761, "lr": 7.554658914808404e-06, "epoch": 1.1998237885462555, "percentage": 59.99, "elapsed_time": "5:00:21", "remaining_time": "3:20:19"} +{"current_steps": 3406, "total_steps": 5676, "loss": 0.6124382615089417, "lr": 7.549009819667956e-06, "epoch": 1.2001762114537444, "percentage": 60.01, "elapsed_time": "5:00:25", "remaining_time": "3:20:13"} +{"current_steps": 3407, "total_steps": 5676, "loss": 0.6895862817764282, "lr": 7.543361556562397e-06, "epoch": 1.2005286343612336, "percentage": 60.02, "elapsed_time": "5:00:30", "remaining_time": "3:20:07"} +{"current_steps": 3408, "total_steps": 5676, "loss": 0.6632197499275208, "lr": 7.537714127409139e-06, "epoch": 1.2008810572687225, "percentage": 60.04, "elapsed_time": "5:00:35", "remaining_time": "3:20:02"} +{"current_steps": 3409, "total_steps": 5676, "loss": 0.5940145254135132, "lr": 7.5320675341253e-06, "epoch": 1.2012334801762115, "percentage": 60.06, "elapsed_time": "5:00:40", "remaining_time": "3:19:57"} +{"current_steps": 3410, "total_steps": 5676, "loss": 0.646323561668396, "lr": 7.526421778627735e-06, "epoch": 1.2015859030837004, "percentage": 60.08, "elapsed_time": "5:00:47", "remaining_time": "3:19:52"} +{"current_steps": 3411, "total_steps": 5676, "loss": 0.6173659563064575, "lr": 7.520776862832993e-06, "epoch": 1.2019383259911893, "percentage": 60.1, "elapsed_time": "5:00:52", "remaining_time": "3:19:47"} +{"current_steps": 3412, "total_steps": 5676, "loss": 0.574191689491272, "lr": 7.515132788657347e-06, "epoch": 1.2022907488986785, "percentage": 60.11, "elapsed_time": "5:00:58", "remaining_time": "3:19:42"} +{"current_steps": 3413, "total_steps": 5676, "loss": 0.6243089437484741, "lr": 7.50948955801679e-06, "epoch": 1.2026431718061674, "percentage": 60.13, "elapsed_time": "5:01:02", "remaining_time": "3:19:36"} +{"current_steps": 3414, "total_steps": 5676, "loss": 0.692270040512085, "lr": 7.503847172827022e-06, "epoch": 1.2029955947136564, "percentage": 60.15, "elapsed_time": "5:01:07", "remaining_time": "3:19:30"} +{"current_steps": 3415, "total_steps": 5676, "loss": 0.5929970145225525, "lr": 7.498205635003451e-06, "epoch": 1.2033480176211453, "percentage": 60.17, "elapsed_time": "5:01:13", "remaining_time": "3:19:26"} +{"current_steps": 3416, "total_steps": 5676, "loss": 0.5479272603988647, "lr": 7.4925649464612126e-06, "epoch": 1.2037004405286345, "percentage": 60.18, "elapsed_time": "5:01:19", "remaining_time": "3:19:21"} +{"current_steps": 3417, "total_steps": 5676, "loss": 0.5923635363578796, "lr": 7.486925109115135e-06, "epoch": 1.2040528634361234, "percentage": 60.2, "elapsed_time": "5:01:25", "remaining_time": "3:19:16"} +{"current_steps": 3418, "total_steps": 5676, "loss": 0.6530192494392395, "lr": 7.48128612487978e-06, "epoch": 1.2044052863436123, "percentage": 60.22, "elapsed_time": "5:01:30", "remaining_time": "3:19:11"} +{"current_steps": 3419, "total_steps": 5676, "loss": 0.5104716420173645, "lr": 7.475647995669397e-06, "epoch": 1.2047577092511013, "percentage": 60.24, "elapsed_time": "5:01:34", "remaining_time": "3:19:05"} +{"current_steps": 3420, "total_steps": 5676, "loss": 0.6526790261268616, "lr": 7.470010723397958e-06, "epoch": 1.2051101321585902, "percentage": 60.25, "elapsed_time": "5:01:39", "remaining_time": "3:18:59"} +{"current_steps": 3421, "total_steps": 5676, "loss": 0.5985254645347595, "lr": 7.464374309979143e-06, "epoch": 1.2054625550660794, "percentage": 60.27, "elapsed_time": "5:01:45", "remaining_time": "3:18:54"} +{"current_steps": 3422, "total_steps": 5676, "loss": 0.6575271487236023, "lr": 7.458738757326336e-06, "epoch": 1.2058149779735683, "percentage": 60.29, "elapsed_time": "5:01:50", "remaining_time": "3:18:49"} +{"current_steps": 3423, "total_steps": 5676, "loss": 0.5906708836555481, "lr": 7.453104067352637e-06, "epoch": 1.2061674008810572, "percentage": 60.31, "elapsed_time": "5:01:56", "remaining_time": "3:18:43"} +{"current_steps": 3424, "total_steps": 5676, "loss": 0.7992517352104187, "lr": 7.4474702419708465e-06, "epoch": 1.2065198237885462, "percentage": 60.32, "elapsed_time": "5:02:01", "remaining_time": "3:18:38"} +{"current_steps": 3425, "total_steps": 5676, "loss": 0.5935543179512024, "lr": 7.4418372830934645e-06, "epoch": 1.2068722466960353, "percentage": 60.34, "elapsed_time": "5:02:07", "remaining_time": "3:18:33"} +{"current_steps": 3426, "total_steps": 5676, "loss": 0.7166613340377808, "lr": 7.436205192632719e-06, "epoch": 1.2072246696035243, "percentage": 60.36, "elapsed_time": "5:02:12", "remaining_time": "3:18:28"} +{"current_steps": 3427, "total_steps": 5676, "loss": 0.5254578590393066, "lr": 7.430573972500519e-06, "epoch": 1.2075770925110132, "percentage": 60.38, "elapsed_time": "5:02:17", "remaining_time": "3:18:23"} +{"current_steps": 3428, "total_steps": 5676, "loss": 0.6586379408836365, "lr": 7.42494362460849e-06, "epoch": 1.2079295154185021, "percentage": 60.39, "elapsed_time": "5:02:24", "remaining_time": "3:18:18"} +{"current_steps": 3429, "total_steps": 5676, "loss": 0.6960606575012207, "lr": 7.419314150867964e-06, "epoch": 1.208281938325991, "percentage": 60.41, "elapsed_time": "5:02:29", "remaining_time": "3:18:13"} +{"current_steps": 3430, "total_steps": 5676, "loss": 0.6107728481292725, "lr": 7.413685553189969e-06, "epoch": 1.2086343612334802, "percentage": 60.43, "elapsed_time": "5:02:36", "remaining_time": "3:18:08"} +{"current_steps": 3431, "total_steps": 5676, "loss": 0.6446499824523926, "lr": 7.408057833485241e-06, "epoch": 1.2089867841409692, "percentage": 60.45, "elapsed_time": "5:02:40", "remaining_time": "3:18:02"} +{"current_steps": 3432, "total_steps": 5676, "loss": 0.7070472240447998, "lr": 7.402430993664216e-06, "epoch": 1.209339207048458, "percentage": 60.47, "elapsed_time": "5:02:45", "remaining_time": "3:17:57"} +{"current_steps": 3433, "total_steps": 5676, "loss": 0.5919365882873535, "lr": 7.396805035637023e-06, "epoch": 1.2096916299559473, "percentage": 60.48, "elapsed_time": "5:02:49", "remaining_time": "3:17:51"} +{"current_steps": 3434, "total_steps": 5676, "loss": 0.5975243449211121, "lr": 7.391179961313512e-06, "epoch": 1.2100440528634362, "percentage": 60.5, "elapsed_time": "5:02:53", "remaining_time": "3:17:45"} +{"current_steps": 3435, "total_steps": 5676, "loss": 0.5772840976715088, "lr": 7.385555772603212e-06, "epoch": 1.2103964757709251, "percentage": 60.52, "elapsed_time": "5:02:58", "remaining_time": "3:17:40"} +{"current_steps": 3436, "total_steps": 5676, "loss": 0.7335072755813599, "lr": 7.379932471415362e-06, "epoch": 1.210748898678414, "percentage": 60.54, "elapsed_time": "5:03:03", "remaining_time": "3:17:33"} +{"current_steps": 3437, "total_steps": 5676, "loss": 0.6214553713798523, "lr": 7.3743100596589e-06, "epoch": 1.211101321585903, "percentage": 60.55, "elapsed_time": "5:03:07", "remaining_time": "3:17:27"} +{"current_steps": 3438, "total_steps": 5676, "loss": 0.6515316963195801, "lr": 7.368688539242457e-06, "epoch": 1.2114537444933922, "percentage": 60.57, "elapsed_time": "5:03:12", "remaining_time": "3:17:22"} +{"current_steps": 3439, "total_steps": 5676, "loss": 0.6479551196098328, "lr": 7.3630679120743665e-06, "epoch": 1.211806167400881, "percentage": 60.59, "elapsed_time": "5:03:16", "remaining_time": "3:17:16"} +{"current_steps": 3440, "total_steps": 5676, "loss": 0.6195069551467896, "lr": 7.357448180062657e-06, "epoch": 1.21215859030837, "percentage": 60.61, "elapsed_time": "5:03:23", "remaining_time": "3:17:11"} +{"current_steps": 3441, "total_steps": 5676, "loss": 0.5939193964004517, "lr": 7.351829345115047e-06, "epoch": 1.212511013215859, "percentage": 60.62, "elapsed_time": "5:03:28", "remaining_time": "3:17:06"} +{"current_steps": 3442, "total_steps": 5676, "loss": 0.6346434354782104, "lr": 7.346211409138964e-06, "epoch": 1.2128634361233481, "percentage": 60.64, "elapsed_time": "5:03:33", "remaining_time": "3:17:01"} +{"current_steps": 3443, "total_steps": 5676, "loss": 0.5924171209335327, "lr": 7.340594374041516e-06, "epoch": 1.213215859030837, "percentage": 60.66, "elapsed_time": "5:03:38", "remaining_time": "3:16:55"} +{"current_steps": 3444, "total_steps": 5676, "loss": 0.48560285568237305, "lr": 7.334978241729514e-06, "epoch": 1.213568281938326, "percentage": 60.68, "elapsed_time": "5:03:45", "remaining_time": "3:16:51"} +{"current_steps": 3445, "total_steps": 5676, "loss": 0.643998384475708, "lr": 7.329363014109463e-06, "epoch": 1.213920704845815, "percentage": 60.69, "elapsed_time": "5:03:51", "remaining_time": "3:16:47"} +{"current_steps": 3446, "total_steps": 5676, "loss": 0.6041159629821777, "lr": 7.323748693087551e-06, "epoch": 1.2142731277533039, "percentage": 60.71, "elapsed_time": "5:03:56", "remaining_time": "3:16:41"} +{"current_steps": 3447, "total_steps": 5676, "loss": 0.7143498659133911, "lr": 7.318135280569674e-06, "epoch": 1.214625550660793, "percentage": 60.73, "elapsed_time": "5:04:01", "remaining_time": "3:16:35"} +{"current_steps": 3448, "total_steps": 5676, "loss": 0.5821564197540283, "lr": 7.312522778461409e-06, "epoch": 1.214977973568282, "percentage": 60.75, "elapsed_time": "5:04:08", "remaining_time": "3:16:31"} +{"current_steps": 3449, "total_steps": 5676, "loss": 0.5786745548248291, "lr": 7.3069111886680166e-06, "epoch": 1.215330396475771, "percentage": 60.76, "elapsed_time": "5:04:13", "remaining_time": "3:16:25"} +{"current_steps": 3450, "total_steps": 5676, "loss": 0.6740534901618958, "lr": 7.3013005130944666e-06, "epoch": 1.2156828193832598, "percentage": 60.78, "elapsed_time": "5:04:17", "remaining_time": "3:16:20"} +{"current_steps": 3451, "total_steps": 5676, "loss": 0.6353983879089355, "lr": 7.2956907536454045e-06, "epoch": 1.216035242290749, "percentage": 60.8, "elapsed_time": "5:04:23", "remaining_time": "3:16:15"} +{"current_steps": 3452, "total_steps": 5676, "loss": 0.6890027523040771, "lr": 7.290081912225172e-06, "epoch": 1.216387665198238, "percentage": 60.82, "elapsed_time": "5:04:27", "remaining_time": "3:16:09"} +{"current_steps": 3453, "total_steps": 5676, "loss": 0.6485118269920349, "lr": 7.284473990737795e-06, "epoch": 1.2167400881057269, "percentage": 60.84, "elapsed_time": "5:04:31", "remaining_time": "3:16:03"} +{"current_steps": 3454, "total_steps": 5676, "loss": 0.5364162921905518, "lr": 7.2788669910869845e-06, "epoch": 1.2170925110132158, "percentage": 60.85, "elapsed_time": "5:04:37", "remaining_time": "3:15:57"} +{"current_steps": 3455, "total_steps": 5676, "loss": 0.6625754833221436, "lr": 7.27326091517615e-06, "epoch": 1.2174449339207047, "percentage": 60.87, "elapsed_time": "5:04:41", "remaining_time": "3:15:52"} +{"current_steps": 3456, "total_steps": 5676, "loss": 0.7090050578117371, "lr": 7.267655764908374e-06, "epoch": 1.217797356828194, "percentage": 60.89, "elapsed_time": "5:04:45", "remaining_time": "3:15:46"} +{"current_steps": 3457, "total_steps": 5676, "loss": 0.6556301116943359, "lr": 7.26205154218643e-06, "epoch": 1.2181497797356828, "percentage": 60.91, "elapsed_time": "5:04:50", "remaining_time": "3:15:40"} +{"current_steps": 3458, "total_steps": 5676, "loss": 0.7998625636100769, "lr": 7.2564482489127815e-06, "epoch": 1.2185022026431718, "percentage": 60.92, "elapsed_time": "5:04:54", "remaining_time": "3:15:34"} +{"current_steps": 3459, "total_steps": 5676, "loss": 0.6336952447891235, "lr": 7.250845886989568e-06, "epoch": 1.2188546255506607, "percentage": 60.94, "elapsed_time": "5:04:59", "remaining_time": "3:15:28"} +{"current_steps": 3460, "total_steps": 5676, "loss": 0.5072300434112549, "lr": 7.245244458318621e-06, "epoch": 1.2192070484581499, "percentage": 60.96, "elapsed_time": "5:05:04", "remaining_time": "3:15:23"} +{"current_steps": 3461, "total_steps": 5676, "loss": 0.6297830939292908, "lr": 7.23964396480145e-06, "epoch": 1.2195594713656388, "percentage": 60.98, "elapsed_time": "5:05:08", "remaining_time": "3:15:17"} +{"current_steps": 3462, "total_steps": 5676, "loss": 0.5560386180877686, "lr": 7.234044408339243e-06, "epoch": 1.2199118942731277, "percentage": 60.99, "elapsed_time": "5:05:15", "remaining_time": "3:15:12"} +{"current_steps": 3463, "total_steps": 5676, "loss": 0.5180274844169617, "lr": 7.228445790832885e-06, "epoch": 1.2202643171806167, "percentage": 61.01, "elapsed_time": "5:05:20", "remaining_time": "3:15:07"} +{"current_steps": 3464, "total_steps": 5676, "loss": 0.4870688319206238, "lr": 7.222848114182926e-06, "epoch": 1.2206167400881056, "percentage": 61.03, "elapsed_time": "5:05:25", "remaining_time": "3:15:01"} +{"current_steps": 3465, "total_steps": 5676, "loss": 0.46914681792259216, "lr": 7.217251380289602e-06, "epoch": 1.2209691629955948, "percentage": 61.05, "elapsed_time": "5:05:31", "remaining_time": "3:14:57"} +{"current_steps": 3466, "total_steps": 5676, "loss": 0.5980997085571289, "lr": 7.211655591052833e-06, "epoch": 1.2213215859030837, "percentage": 61.06, "elapsed_time": "5:05:36", "remaining_time": "3:14:51"} +{"current_steps": 3467, "total_steps": 5676, "loss": 0.5982732772827148, "lr": 7.206060748372212e-06, "epoch": 1.2216740088105726, "percentage": 61.08, "elapsed_time": "5:05:41", "remaining_time": "3:14:46"} +{"current_steps": 3468, "total_steps": 5676, "loss": 0.612629771232605, "lr": 7.200466854147019e-06, "epoch": 1.2220264317180616, "percentage": 61.1, "elapsed_time": "5:05:47", "remaining_time": "3:14:41"} +{"current_steps": 3469, "total_steps": 5676, "loss": 0.606558084487915, "lr": 7.194873910276205e-06, "epoch": 1.2223788546255507, "percentage": 61.12, "elapsed_time": "5:05:53", "remaining_time": "3:14:36"} +{"current_steps": 3470, "total_steps": 5676, "loss": 0.7133803367614746, "lr": 7.189281918658396e-06, "epoch": 1.2227312775330397, "percentage": 61.13, "elapsed_time": "5:05:59", "remaining_time": "3:14:31"} +{"current_steps": 3471, "total_steps": 5676, "loss": 0.5640908479690552, "lr": 7.183690881191908e-06, "epoch": 1.2230837004405286, "percentage": 61.15, "elapsed_time": "5:06:02", "remaining_time": "3:14:25"} +{"current_steps": 3472, "total_steps": 5676, "loss": 0.6376210451126099, "lr": 7.178100799774717e-06, "epoch": 1.2234361233480175, "percentage": 61.17, "elapsed_time": "5:06:08", "remaining_time": "3:14:20"} +{"current_steps": 3473, "total_steps": 5676, "loss": 0.6207184791564941, "lr": 7.172511676304481e-06, "epoch": 1.2237885462555067, "percentage": 61.19, "elapsed_time": "5:06:13", "remaining_time": "3:14:14"} +{"current_steps": 3474, "total_steps": 5676, "loss": 0.47848421335220337, "lr": 7.166923512678538e-06, "epoch": 1.2241409691629956, "percentage": 61.21, "elapsed_time": "5:06:17", "remaining_time": "3:14:08"} +{"current_steps": 3475, "total_steps": 5676, "loss": 0.6052829027175903, "lr": 7.161336310793894e-06, "epoch": 1.2244933920704846, "percentage": 61.22, "elapsed_time": "5:06:22", "remaining_time": "3:14:03"} +{"current_steps": 3476, "total_steps": 5676, "loss": 0.6050940155982971, "lr": 7.155750072547229e-06, "epoch": 1.2248458149779735, "percentage": 61.24, "elapsed_time": "5:06:28", "remaining_time": "3:13:58"} +{"current_steps": 3477, "total_steps": 5676, "loss": 0.6121659278869629, "lr": 7.150164799834902e-06, "epoch": 1.2251982378854627, "percentage": 61.26, "elapsed_time": "5:06:32", "remaining_time": "3:13:52"} +{"current_steps": 3478, "total_steps": 5676, "loss": 0.6886739730834961, "lr": 7.144580494552929e-06, "epoch": 1.2255506607929516, "percentage": 61.28, "elapsed_time": "5:06:36", "remaining_time": "3:13:46"} +{"current_steps": 3479, "total_steps": 5676, "loss": 0.5001103281974792, "lr": 7.13899715859702e-06, "epoch": 1.2259030837004405, "percentage": 61.29, "elapsed_time": "5:06:41", "remaining_time": "3:13:40"} +{"current_steps": 3480, "total_steps": 5676, "loss": 0.5948734283447266, "lr": 7.133414793862532e-06, "epoch": 1.2262555066079295, "percentage": 61.31, "elapsed_time": "5:06:47", "remaining_time": "3:13:36"} +{"current_steps": 3481, "total_steps": 5676, "loss": 0.6179298162460327, "lr": 7.127833402244515e-06, "epoch": 1.2266079295154184, "percentage": 61.33, "elapsed_time": "5:06:52", "remaining_time": "3:13:30"} +{"current_steps": 3482, "total_steps": 5676, "loss": 0.5543676614761353, "lr": 7.122252985637672e-06, "epoch": 1.2269603524229076, "percentage": 61.35, "elapsed_time": "5:06:57", "remaining_time": "3:13:24"} +{"current_steps": 3483, "total_steps": 5676, "loss": 0.6279658079147339, "lr": 7.116673545936379e-06, "epoch": 1.2273127753303965, "percentage": 61.36, "elapsed_time": "5:07:03", "remaining_time": "3:13:20"} +{"current_steps": 3484, "total_steps": 5676, "loss": 0.6692230701446533, "lr": 7.111095085034687e-06, "epoch": 1.2276651982378854, "percentage": 61.38, "elapsed_time": "5:07:09", "remaining_time": "3:13:15"} +{"current_steps": 3485, "total_steps": 5676, "loss": 0.6124502420425415, "lr": 7.1055176048263085e-06, "epoch": 1.2280176211453744, "percentage": 61.4, "elapsed_time": "5:07:15", "remaining_time": "3:13:10"} +{"current_steps": 3486, "total_steps": 5676, "loss": 0.6241810321807861, "lr": 7.09994110720462e-06, "epoch": 1.2283700440528635, "percentage": 61.42, "elapsed_time": "5:07:20", "remaining_time": "3:13:04"} +{"current_steps": 3487, "total_steps": 5676, "loss": 0.6556589603424072, "lr": 7.094365594062675e-06, "epoch": 1.2287224669603525, "percentage": 61.43, "elapsed_time": "5:07:26", "remaining_time": "3:13:00"} +{"current_steps": 3488, "total_steps": 5676, "loss": 0.480433851480484, "lr": 7.0887910672931815e-06, "epoch": 1.2290748898678414, "percentage": 61.45, "elapsed_time": "5:07:32", "remaining_time": "3:12:55"} +{"current_steps": 3489, "total_steps": 5676, "loss": 0.6198803782463074, "lr": 7.083217528788524e-06, "epoch": 1.2294273127753303, "percentage": 61.47, "elapsed_time": "5:07:37", "remaining_time": "3:12:49"} +{"current_steps": 3490, "total_steps": 5676, "loss": 0.6368751525878906, "lr": 7.077644980440741e-06, "epoch": 1.2297797356828193, "percentage": 61.49, "elapsed_time": "5:07:43", "remaining_time": "3:12:44"} +{"current_steps": 3491, "total_steps": 5676, "loss": 0.5992522239685059, "lr": 7.072073424141538e-06, "epoch": 1.2301321585903084, "percentage": 61.5, "elapsed_time": "5:07:49", "remaining_time": "3:12:39"} +{"current_steps": 3492, "total_steps": 5676, "loss": 0.5917885303497314, "lr": 7.066502861782289e-06, "epoch": 1.2304845814977974, "percentage": 61.52, "elapsed_time": "5:07:55", "remaining_time": "3:12:34"} +{"current_steps": 3493, "total_steps": 5676, "loss": 0.615925669670105, "lr": 7.060933295254027e-06, "epoch": 1.2308370044052863, "percentage": 61.54, "elapsed_time": "5:07:59", "remaining_time": "3:12:29"} +{"current_steps": 3494, "total_steps": 5676, "loss": 0.4408820867538452, "lr": 7.055364726447437e-06, "epoch": 1.2311894273127753, "percentage": 61.56, "elapsed_time": "5:08:05", "remaining_time": "3:12:24"} +{"current_steps": 3495, "total_steps": 5676, "loss": 0.4918386936187744, "lr": 7.049797157252889e-06, "epoch": 1.2315418502202644, "percentage": 61.58, "elapsed_time": "5:08:10", "remaining_time": "3:12:19"} +{"current_steps": 3496, "total_steps": 5676, "loss": 0.6964970827102661, "lr": 7.0442305895603844e-06, "epoch": 1.2318942731277533, "percentage": 61.59, "elapsed_time": "5:08:15", "remaining_time": "3:12:13"} +{"current_steps": 3497, "total_steps": 5676, "loss": 0.5269606113433838, "lr": 7.038665025259615e-06, "epoch": 1.2322466960352423, "percentage": 61.61, "elapsed_time": "5:08:20", "remaining_time": "3:12:07"} +{"current_steps": 3498, "total_steps": 5676, "loss": 0.6146842241287231, "lr": 7.033100466239908e-06, "epoch": 1.2325991189427312, "percentage": 61.63, "elapsed_time": "5:08:25", "remaining_time": "3:12:02"} +{"current_steps": 3499, "total_steps": 5676, "loss": 0.7163739800453186, "lr": 7.027536914390257e-06, "epoch": 1.2329515418502202, "percentage": 61.65, "elapsed_time": "5:08:30", "remaining_time": "3:11:56"} +{"current_steps": 3500, "total_steps": 5676, "loss": 0.5851477980613708, "lr": 7.021974371599318e-06, "epoch": 1.2333039647577093, "percentage": 61.66, "elapsed_time": "5:08:35", "remaining_time": "3:11:51"} +{"current_steps": 3501, "total_steps": 5676, "loss": 0.585768461227417, "lr": 7.0164128397554e-06, "epoch": 1.2336563876651983, "percentage": 61.68, "elapsed_time": "5:08:46", "remaining_time": "3:11:49"} +{"current_steps": 3502, "total_steps": 5676, "loss": 0.5467718839645386, "lr": 7.0108523207464706e-06, "epoch": 1.2340088105726872, "percentage": 61.7, "elapsed_time": "5:08:52", "remaining_time": "3:11:44"} +{"current_steps": 3503, "total_steps": 5676, "loss": 0.638299822807312, "lr": 7.0052928164601564e-06, "epoch": 1.2343612334801761, "percentage": 61.72, "elapsed_time": "5:08:58", "remaining_time": "3:11:39"} +{"current_steps": 3504, "total_steps": 5676, "loss": 0.6737650036811829, "lr": 6.9997343287837275e-06, "epoch": 1.2347136563876653, "percentage": 61.73, "elapsed_time": "5:09:03", "remaining_time": "3:11:34"} +{"current_steps": 3505, "total_steps": 5676, "loss": 0.6659837961196899, "lr": 6.9941768596041224e-06, "epoch": 1.2350660792951542, "percentage": 61.75, "elapsed_time": "5:09:09", "remaining_time": "3:11:29"} +{"current_steps": 3506, "total_steps": 5676, "loss": 0.6731020212173462, "lr": 6.988620410807932e-06, "epoch": 1.2354185022026432, "percentage": 61.77, "elapsed_time": "5:09:14", "remaining_time": "3:11:24"} +{"current_steps": 3507, "total_steps": 5676, "loss": 0.6236598491668701, "lr": 6.983064984281389e-06, "epoch": 1.235770925110132, "percentage": 61.79, "elapsed_time": "5:09:19", "remaining_time": "3:11:18"} +{"current_steps": 3508, "total_steps": 5676, "loss": 0.6233193874359131, "lr": 6.9775105819103985e-06, "epoch": 1.236123348017621, "percentage": 61.8, "elapsed_time": "5:09:24", "remaining_time": "3:11:12"} +{"current_steps": 3509, "total_steps": 5676, "loss": 0.5914918184280396, "lr": 6.971957205580497e-06, "epoch": 1.2364757709251102, "percentage": 61.82, "elapsed_time": "5:09:28", "remaining_time": "3:11:07"} +{"current_steps": 3510, "total_steps": 5676, "loss": 0.6576484441757202, "lr": 6.966404857176893e-06, "epoch": 1.2368281938325991, "percentage": 61.84, "elapsed_time": "5:09:33", "remaining_time": "3:11:01"} +{"current_steps": 3511, "total_steps": 5676, "loss": 0.5609208941459656, "lr": 6.960853538584431e-06, "epoch": 1.237180616740088, "percentage": 61.86, "elapsed_time": "5:09:38", "remaining_time": "3:10:55"} +{"current_steps": 3512, "total_steps": 5676, "loss": 0.6405455470085144, "lr": 6.955303251687609e-06, "epoch": 1.2375330396475772, "percentage": 61.87, "elapsed_time": "5:09:42", "remaining_time": "3:10:50"} +{"current_steps": 3513, "total_steps": 5676, "loss": 0.5621844530105591, "lr": 6.949753998370579e-06, "epoch": 1.2378854625550662, "percentage": 61.89, "elapsed_time": "5:09:49", "remaining_time": "3:10:45"} +{"current_steps": 3514, "total_steps": 5676, "loss": 0.5674207210540771, "lr": 6.944205780517138e-06, "epoch": 1.238237885462555, "percentage": 61.91, "elapsed_time": "5:09:55", "remaining_time": "3:10:40"} +{"current_steps": 3515, "total_steps": 5676, "loss": 0.6744752526283264, "lr": 6.938658600010734e-06, "epoch": 1.238590308370044, "percentage": 61.93, "elapsed_time": "5:10:01", "remaining_time": "3:10:35"} +{"current_steps": 3516, "total_steps": 5676, "loss": 0.537495493888855, "lr": 6.9331124587344655e-06, "epoch": 1.238942731277533, "percentage": 61.95, "elapsed_time": "5:10:05", "remaining_time": "3:10:29"} +{"current_steps": 3517, "total_steps": 5676, "loss": 0.8405104875564575, "lr": 6.92756735857107e-06, "epoch": 1.2392951541850221, "percentage": 61.96, "elapsed_time": "5:10:11", "remaining_time": "3:10:24"} +{"current_steps": 3518, "total_steps": 5676, "loss": 0.6751723885536194, "lr": 6.92202330140294e-06, "epoch": 1.239647577092511, "percentage": 61.98, "elapsed_time": "5:10:17", "remaining_time": "3:10:20"} +{"current_steps": 3519, "total_steps": 5676, "loss": 0.5763178467750549, "lr": 6.9164802891121105e-06, "epoch": 1.24, "percentage": 62.0, "elapsed_time": "5:10:22", "remaining_time": "3:10:14"} +{"current_steps": 3520, "total_steps": 5676, "loss": 0.7713793516159058, "lr": 6.910938323580256e-06, "epoch": 1.240352422907489, "percentage": 62.02, "elapsed_time": "5:10:28", "remaining_time": "3:10:09"} +{"current_steps": 3521, "total_steps": 5676, "loss": 0.6354435682296753, "lr": 6.90539740668871e-06, "epoch": 1.240704845814978, "percentage": 62.03, "elapsed_time": "5:10:33", "remaining_time": "3:10:04"} +{"current_steps": 3522, "total_steps": 5676, "loss": 0.5121721625328064, "lr": 6.899857540318434e-06, "epoch": 1.241057268722467, "percentage": 62.05, "elapsed_time": "5:10:38", "remaining_time": "3:09:58"} +{"current_steps": 3523, "total_steps": 5676, "loss": 0.586428165435791, "lr": 6.894318726350042e-06, "epoch": 1.241409691629956, "percentage": 62.07, "elapsed_time": "5:10:43", "remaining_time": "3:09:53"} +{"current_steps": 3524, "total_steps": 5676, "loss": 0.4868311285972595, "lr": 6.888780966663792e-06, "epoch": 1.241762114537445, "percentage": 62.09, "elapsed_time": "5:10:49", "remaining_time": "3:09:48"} +{"current_steps": 3525, "total_steps": 5676, "loss": 0.7057775259017944, "lr": 6.883244263139578e-06, "epoch": 1.2421145374449338, "percentage": 62.1, "elapsed_time": "5:10:53", "remaining_time": "3:09:42"} +{"current_steps": 3526, "total_steps": 5676, "loss": 0.4993360638618469, "lr": 6.877708617656942e-06, "epoch": 1.242466960352423, "percentage": 62.12, "elapsed_time": "5:10:59", "remaining_time": "3:09:37"} +{"current_steps": 3527, "total_steps": 5676, "loss": 0.6096793413162231, "lr": 6.872174032095061e-06, "epoch": 1.242819383259912, "percentage": 62.14, "elapsed_time": "5:11:06", "remaining_time": "3:09:33"} +{"current_steps": 3528, "total_steps": 5676, "loss": 0.584385871887207, "lr": 6.866640508332751e-06, "epoch": 1.2431718061674009, "percentage": 62.16, "elapsed_time": "5:11:11", "remaining_time": "3:09:27"} +{"current_steps": 3529, "total_steps": 5676, "loss": 0.5857449173927307, "lr": 6.861108048248477e-06, "epoch": 1.2435242290748898, "percentage": 62.17, "elapsed_time": "5:11:15", "remaining_time": "3:09:21"} +{"current_steps": 3530, "total_steps": 5676, "loss": 0.4337875247001648, "lr": 6.855576653720333e-06, "epoch": 1.243876651982379, "percentage": 62.19, "elapsed_time": "5:11:19", "remaining_time": "3:09:15"} +{"current_steps": 3531, "total_steps": 5676, "loss": 0.6949163675308228, "lr": 6.850046326626058e-06, "epoch": 1.244229074889868, "percentage": 62.21, "elapsed_time": "5:11:24", "remaining_time": "3:09:10"} +{"current_steps": 3532, "total_steps": 5676, "loss": 0.5876098871231079, "lr": 6.844517068843025e-06, "epoch": 1.2445814977973568, "percentage": 62.23, "elapsed_time": "5:11:28", "remaining_time": "3:09:04"} +{"current_steps": 3533, "total_steps": 5676, "loss": 0.5460488796234131, "lr": 6.838988882248243e-06, "epoch": 1.2449339207048458, "percentage": 62.24, "elapsed_time": "5:11:33", "remaining_time": "3:08:58"} +{"current_steps": 3534, "total_steps": 5676, "loss": 0.6500875949859619, "lr": 6.833461768718365e-06, "epoch": 1.2452863436123347, "percentage": 62.26, "elapsed_time": "5:11:38", "remaining_time": "3:08:53"} +{"current_steps": 3535, "total_steps": 5676, "loss": 0.6504626274108887, "lr": 6.82793573012967e-06, "epoch": 1.2456387665198239, "percentage": 62.28, "elapsed_time": "5:11:42", "remaining_time": "3:08:47"} +{"current_steps": 3536, "total_steps": 5676, "loss": 0.6881722211837769, "lr": 6.822410768358072e-06, "epoch": 1.2459911894273128, "percentage": 62.3, "elapsed_time": "5:11:48", "remaining_time": "3:08:42"} +{"current_steps": 3537, "total_steps": 5676, "loss": 0.6747599840164185, "lr": 6.816886885279132e-06, "epoch": 1.2463436123348017, "percentage": 62.32, "elapsed_time": "5:11:53", "remaining_time": "3:08:37"} +{"current_steps": 3538, "total_steps": 5676, "loss": 0.5987570285797119, "lr": 6.811364082768028e-06, "epoch": 1.2466960352422907, "percentage": 62.33, "elapsed_time": "5:11:58", "remaining_time": "3:08:31"} +{"current_steps": 3539, "total_steps": 5676, "loss": 0.6614603996276855, "lr": 6.8058423626995885e-06, "epoch": 1.2470484581497798, "percentage": 62.35, "elapsed_time": "5:12:04", "remaining_time": "3:08:26"} +{"current_steps": 3540, "total_steps": 5676, "loss": 0.542367696762085, "lr": 6.80032172694826e-06, "epoch": 1.2474008810572688, "percentage": 62.37, "elapsed_time": "5:12:11", "remaining_time": "3:08:22"} +{"current_steps": 3541, "total_steps": 5676, "loss": 0.6200593709945679, "lr": 6.7948021773881235e-06, "epoch": 1.2477533039647577, "percentage": 62.39, "elapsed_time": "5:12:15", "remaining_time": "3:08:16"} +{"current_steps": 3542, "total_steps": 5676, "loss": 0.6425306797027588, "lr": 6.789283715892905e-06, "epoch": 1.2481057268722466, "percentage": 62.4, "elapsed_time": "5:12:20", "remaining_time": "3:08:11"} +{"current_steps": 3543, "total_steps": 5676, "loss": 0.5277592539787292, "lr": 6.78376634433594e-06, "epoch": 1.2484581497797356, "percentage": 62.42, "elapsed_time": "5:12:27", "remaining_time": "3:08:06"} +{"current_steps": 3544, "total_steps": 5676, "loss": 0.6120523810386658, "lr": 6.778250064590206e-06, "epoch": 1.2488105726872247, "percentage": 62.44, "elapsed_time": "5:12:32", "remaining_time": "3:08:01"} +{"current_steps": 3545, "total_steps": 5676, "loss": 0.538428544998169, "lr": 6.772734878528313e-06, "epoch": 1.2491629955947137, "percentage": 62.46, "elapsed_time": "5:12:39", "remaining_time": "3:07:56"} +{"current_steps": 3546, "total_steps": 5676, "loss": 0.6439732909202576, "lr": 6.76722078802249e-06, "epoch": 1.2495154185022026, "percentage": 62.47, "elapsed_time": "5:12:43", "remaining_time": "3:07:50"} +{"current_steps": 3547, "total_steps": 5676, "loss": 0.5951697826385498, "lr": 6.761707794944605e-06, "epoch": 1.2498678414096915, "percentage": 62.49, "elapsed_time": "5:12:49", "remaining_time": "3:07:45"} +{"current_steps": 3548, "total_steps": 5676, "loss": 0.5548606514930725, "lr": 6.7561959011661456e-06, "epoch": 1.2502202643171807, "percentage": 62.51, "elapsed_time": "5:12:55", "remaining_time": "3:07:40"} +{"current_steps": 3549, "total_steps": 5676, "loss": 0.4768974781036377, "lr": 6.750685108558221e-06, "epoch": 1.2505726872246696, "percentage": 62.53, "elapsed_time": "5:13:00", "remaining_time": "3:07:35"} +{"current_steps": 3550, "total_steps": 5676, "loss": 0.6629552245140076, "lr": 6.745175418991585e-06, "epoch": 1.2509251101321586, "percentage": 62.54, "elapsed_time": "5:13:04", "remaining_time": "3:07:29"} +{"current_steps": 3551, "total_steps": 5676, "loss": 0.6550329923629761, "lr": 6.739666834336599e-06, "epoch": 1.2512775330396475, "percentage": 62.56, "elapsed_time": "5:13:09", "remaining_time": "3:07:24"} +{"current_steps": 3552, "total_steps": 5676, "loss": 0.37340015172958374, "lr": 6.734159356463254e-06, "epoch": 1.2516299559471364, "percentage": 62.58, "elapsed_time": "5:13:15", "remaining_time": "3:07:19"} +{"current_steps": 3553, "total_steps": 5676, "loss": 0.6343201398849487, "lr": 6.728652987241175e-06, "epoch": 1.2519823788546256, "percentage": 62.6, "elapsed_time": "5:13:20", "remaining_time": "3:07:13"} +{"current_steps": 3554, "total_steps": 5676, "loss": 0.7555221319198608, "lr": 6.723147728539596e-06, "epoch": 1.2523348017621145, "percentage": 62.61, "elapsed_time": "5:13:26", "remaining_time": "3:07:08"} +{"current_steps": 3555, "total_steps": 5676, "loss": 0.5944523215293884, "lr": 6.717643582227384e-06, "epoch": 1.2526872246696035, "percentage": 62.63, "elapsed_time": "5:13:30", "remaining_time": "3:07:02"} +{"current_steps": 3556, "total_steps": 5676, "loss": 0.5686212778091431, "lr": 6.71214055017303e-06, "epoch": 1.2530396475770926, "percentage": 62.65, "elapsed_time": "5:13:36", "remaining_time": "3:06:58"} +{"current_steps": 3557, "total_steps": 5676, "loss": 0.6401857137680054, "lr": 6.706638634244629e-06, "epoch": 1.2533920704845816, "percentage": 62.67, "elapsed_time": "5:13:43", "remaining_time": "3:06:53"} +{"current_steps": 3558, "total_steps": 5676, "loss": 0.4571516513824463, "lr": 6.701137836309926e-06, "epoch": 1.2537444933920705, "percentage": 62.68, "elapsed_time": "5:13:48", "remaining_time": "3:06:48"} +{"current_steps": 3559, "total_steps": 5676, "loss": 0.5857570171356201, "lr": 6.695638158236255e-06, "epoch": 1.2540969162995594, "percentage": 62.7, "elapsed_time": "5:13:53", "remaining_time": "3:06:42"} +{"current_steps": 3560, "total_steps": 5676, "loss": 0.6981472969055176, "lr": 6.690139601890601e-06, "epoch": 1.2544493392070484, "percentage": 62.72, "elapsed_time": "5:13:58", "remaining_time": "3:06:37"} +{"current_steps": 3561, "total_steps": 5676, "loss": 0.5120254755020142, "lr": 6.684642169139544e-06, "epoch": 1.2548017621145373, "percentage": 62.74, "elapsed_time": "5:14:04", "remaining_time": "3:06:32"} +{"current_steps": 3562, "total_steps": 5676, "loss": 0.6975923776626587, "lr": 6.67914586184929e-06, "epoch": 1.2551541850220265, "percentage": 62.76, "elapsed_time": "5:14:08", "remaining_time": "3:06:26"} +{"current_steps": 3563, "total_steps": 5676, "loss": 0.5825072526931763, "lr": 6.673650681885668e-06, "epoch": 1.2555066079295154, "percentage": 62.77, "elapsed_time": "5:14:13", "remaining_time": "3:06:20"} +{"current_steps": 3564, "total_steps": 5676, "loss": 0.5701749324798584, "lr": 6.668156631114124e-06, "epoch": 1.2558590308370043, "percentage": 62.79, "elapsed_time": "5:14:17", "remaining_time": "3:06:14"} +{"current_steps": 3565, "total_steps": 5676, "loss": 0.5230482220649719, "lr": 6.662663711399705e-06, "epoch": 1.2562114537444935, "percentage": 62.81, "elapsed_time": "5:14:21", "remaining_time": "3:06:08"} +{"current_steps": 3566, "total_steps": 5676, "loss": 0.6680361032485962, "lr": 6.657171924607102e-06, "epoch": 1.2565638766519824, "percentage": 62.83, "elapsed_time": "5:14:27", "remaining_time": "3:06:03"} +{"current_steps": 3567, "total_steps": 5676, "loss": 0.6745159029960632, "lr": 6.651681272600592e-06, "epoch": 1.2569162995594714, "percentage": 62.84, "elapsed_time": "5:14:32", "remaining_time": "3:05:58"} +{"current_steps": 3568, "total_steps": 5676, "loss": 0.587162971496582, "lr": 6.646191757244089e-06, "epoch": 1.2572687224669603, "percentage": 62.86, "elapsed_time": "5:14:39", "remaining_time": "3:05:54"} +{"current_steps": 3569, "total_steps": 5676, "loss": 0.6170785427093506, "lr": 6.640703380401111e-06, "epoch": 1.2576211453744492, "percentage": 62.88, "elapsed_time": "5:14:43", "remaining_time": "3:05:47"} +{"current_steps": 3570, "total_steps": 5676, "loss": 0.4955494999885559, "lr": 6.6352161439347875e-06, "epoch": 1.2579735682819384, "percentage": 62.9, "elapsed_time": "5:14:50", "remaining_time": "3:05:43"} +{"current_steps": 3571, "total_steps": 5676, "loss": 0.7183424234390259, "lr": 6.62973004970787e-06, "epoch": 1.2583259911894273, "percentage": 62.91, "elapsed_time": "5:14:56", "remaining_time": "3:05:38"} +{"current_steps": 3572, "total_steps": 5676, "loss": 0.6266030669212341, "lr": 6.624245099582713e-06, "epoch": 1.2586784140969163, "percentage": 62.93, "elapsed_time": "5:15:01", "remaining_time": "3:05:33"} +{"current_steps": 3573, "total_steps": 5676, "loss": 0.5234469175338745, "lr": 6.6187612954212845e-06, "epoch": 1.2590308370044052, "percentage": 62.95, "elapsed_time": "5:15:05", "remaining_time": "3:05:27"} +{"current_steps": 3574, "total_steps": 5676, "loss": 0.7066231966018677, "lr": 6.6132786390851725e-06, "epoch": 1.2593832599118944, "percentage": 62.97, "elapsed_time": "5:15:10", "remaining_time": "3:05:22"} +{"current_steps": 3575, "total_steps": 5676, "loss": 0.622086226940155, "lr": 6.60779713243556e-06, "epoch": 1.2597356828193833, "percentage": 62.98, "elapsed_time": "5:15:16", "remaining_time": "3:05:17"} +{"current_steps": 3576, "total_steps": 5676, "loss": 0.6607370376586914, "lr": 6.6023167773332554e-06, "epoch": 1.2600881057268722, "percentage": 63.0, "elapsed_time": "5:15:21", "remaining_time": "3:05:11"} +{"current_steps": 3577, "total_steps": 5676, "loss": 0.6846165657043457, "lr": 6.596837575638663e-06, "epoch": 1.2604405286343612, "percentage": 63.02, "elapsed_time": "5:15:26", "remaining_time": "3:05:06"} +{"current_steps": 3578, "total_steps": 5676, "loss": 0.6329103708267212, "lr": 6.5913595292118024e-06, "epoch": 1.2607929515418501, "percentage": 63.04, "elapsed_time": "5:15:31", "remaining_time": "3:05:00"} +{"current_steps": 3579, "total_steps": 5676, "loss": 0.7942261695861816, "lr": 6.585882639912302e-06, "epoch": 1.2611453744493393, "percentage": 63.05, "elapsed_time": "5:15:38", "remaining_time": "3:04:56"} +{"current_steps": 3580, "total_steps": 5676, "loss": 0.5446548461914062, "lr": 6.580406909599393e-06, "epoch": 1.2614977973568282, "percentage": 63.07, "elapsed_time": "5:15:43", "remaining_time": "3:04:51"} +{"current_steps": 3581, "total_steps": 5676, "loss": 0.581193208694458, "lr": 6.574932340131917e-06, "epoch": 1.2618502202643171, "percentage": 63.09, "elapsed_time": "5:15:47", "remaining_time": "3:04:44"} +{"current_steps": 3582, "total_steps": 5676, "loss": 0.6099729537963867, "lr": 6.569458933368323e-06, "epoch": 1.2622026431718063, "percentage": 63.11, "elapsed_time": "5:15:52", "remaining_time": "3:04:39"} +{"current_steps": 3583, "total_steps": 5676, "loss": 0.45215970277786255, "lr": 6.563986691166655e-06, "epoch": 1.2625550660792952, "percentage": 63.13, "elapsed_time": "5:15:56", "remaining_time": "3:04:33"} +{"current_steps": 3584, "total_steps": 5676, "loss": 0.6674731969833374, "lr": 6.558515615384573e-06, "epoch": 1.2629074889867842, "percentage": 63.14, "elapsed_time": "5:16:02", "remaining_time": "3:04:28"} +{"current_steps": 3585, "total_steps": 5676, "loss": 0.4951098561286926, "lr": 6.553045707879338e-06, "epoch": 1.2632599118942731, "percentage": 63.16, "elapsed_time": "5:16:07", "remaining_time": "3:04:23"} +{"current_steps": 3586, "total_steps": 5676, "loss": 0.5853816270828247, "lr": 6.54757697050781e-06, "epoch": 1.263612334801762, "percentage": 63.18, "elapsed_time": "5:16:12", "remaining_time": "3:04:17"} +{"current_steps": 3587, "total_steps": 5676, "loss": 0.5236951112747192, "lr": 6.5421094051264575e-06, "epoch": 1.263964757709251, "percentage": 63.2, "elapsed_time": "5:16:17", "remaining_time": "3:04:11"} +{"current_steps": 3588, "total_steps": 5676, "loss": 0.5717612504959106, "lr": 6.536643013591347e-06, "epoch": 1.2643171806167401, "percentage": 63.21, "elapsed_time": "5:16:22", "remaining_time": "3:04:06"} +{"current_steps": 3589, "total_steps": 5676, "loss": 0.6144098043441772, "lr": 6.531177797758155e-06, "epoch": 1.264669603524229, "percentage": 63.23, "elapsed_time": "5:16:28", "remaining_time": "3:04:01"} +{"current_steps": 3590, "total_steps": 5676, "loss": 0.5634705424308777, "lr": 6.525713759482144e-06, "epoch": 1.265022026431718, "percentage": 63.25, "elapsed_time": "5:16:32", "remaining_time": "3:03:55"} +{"current_steps": 3591, "total_steps": 5676, "loss": 0.582956075668335, "lr": 6.520250900618186e-06, "epoch": 1.2653744493392072, "percentage": 63.27, "elapsed_time": "5:16:38", "remaining_time": "3:03:51"} +{"current_steps": 3592, "total_steps": 5676, "loss": 0.7649297714233398, "lr": 6.514789223020754e-06, "epoch": 1.265726872246696, "percentage": 63.28, "elapsed_time": "5:16:44", "remaining_time": "3:03:46"} +{"current_steps": 3593, "total_steps": 5676, "loss": 0.6035098433494568, "lr": 6.509328728543918e-06, "epoch": 1.266079295154185, "percentage": 63.3, "elapsed_time": "5:16:51", "remaining_time": "3:03:41"} +{"current_steps": 3594, "total_steps": 5676, "loss": 0.6405705809593201, "lr": 6.503869419041344e-06, "epoch": 1.266431718061674, "percentage": 63.32, "elapsed_time": "5:16:56", "remaining_time": "3:03:36"} +{"current_steps": 3595, "total_steps": 5676, "loss": 0.674353301525116, "lr": 6.498411296366299e-06, "epoch": 1.266784140969163, "percentage": 63.34, "elapsed_time": "5:17:02", "remaining_time": "3:03:31"} +{"current_steps": 3596, "total_steps": 5676, "loss": 0.6018465757369995, "lr": 6.492954362371644e-06, "epoch": 1.2671365638766519, "percentage": 63.35, "elapsed_time": "5:17:08", "remaining_time": "3:03:26"} +{"current_steps": 3597, "total_steps": 5676, "loss": 0.6491270065307617, "lr": 6.487498618909845e-06, "epoch": 1.267488986784141, "percentage": 63.37, "elapsed_time": "5:17:12", "remaining_time": "3:03:20"} +{"current_steps": 3598, "total_steps": 5676, "loss": 0.5126988887786865, "lr": 6.4820440678329474e-06, "epoch": 1.26784140969163, "percentage": 63.39, "elapsed_time": "5:17:18", "remaining_time": "3:03:15"} +{"current_steps": 3599, "total_steps": 5676, "loss": 0.5931694507598877, "lr": 6.476590710992605e-06, "epoch": 1.2681938325991189, "percentage": 63.41, "elapsed_time": "5:17:23", "remaining_time": "3:03:10"} +{"current_steps": 3600, "total_steps": 5676, "loss": 0.5455423593521118, "lr": 6.471138550240066e-06, "epoch": 1.268546255506608, "percentage": 63.42, "elapsed_time": "5:17:28", "remaining_time": "3:03:04"} +{"current_steps": 3601, "total_steps": 5676, "loss": 0.4870053231716156, "lr": 6.465687587426166e-06, "epoch": 1.268898678414097, "percentage": 63.44, "elapsed_time": "5:17:37", "remaining_time": "3:03:01"} +{"current_steps": 3602, "total_steps": 5676, "loss": 0.6434903144836426, "lr": 6.460237824401337e-06, "epoch": 1.269251101321586, "percentage": 63.46, "elapsed_time": "5:17:42", "remaining_time": "3:02:55"} +{"current_steps": 3603, "total_steps": 5676, "loss": 0.6256476640701294, "lr": 6.454789263015609e-06, "epoch": 1.2696035242290749, "percentage": 63.48, "elapsed_time": "5:17:48", "remaining_time": "3:02:50"} +{"current_steps": 3604, "total_steps": 5676, "loss": 0.6304135322570801, "lr": 6.449341905118589e-06, "epoch": 1.2699559471365638, "percentage": 63.5, "elapsed_time": "5:17:54", "remaining_time": "3:02:45"} +{"current_steps": 3605, "total_steps": 5676, "loss": 0.5315194725990295, "lr": 6.443895752559498e-06, "epoch": 1.2703083700440527, "percentage": 63.51, "elapsed_time": "5:17:59", "remaining_time": "3:02:40"} +{"current_steps": 3606, "total_steps": 5676, "loss": 0.5232852697372437, "lr": 6.438450807187127e-06, "epoch": 1.2706607929515419, "percentage": 63.53, "elapsed_time": "5:18:05", "remaining_time": "3:02:35"} +{"current_steps": 3607, "total_steps": 5676, "loss": 0.4462543725967407, "lr": 6.433007070849863e-06, "epoch": 1.2710132158590308, "percentage": 63.55, "elapsed_time": "5:18:09", "remaining_time": "3:02:29"} +{"current_steps": 3608, "total_steps": 5676, "loss": 0.6347709894180298, "lr": 6.4275645453956945e-06, "epoch": 1.2713656387665198, "percentage": 63.57, "elapsed_time": "5:18:15", "remaining_time": "3:02:24"} +{"current_steps": 3609, "total_steps": 5676, "loss": 0.5277259349822998, "lr": 6.422123232672182e-06, "epoch": 1.271718061674009, "percentage": 63.58, "elapsed_time": "5:18:21", "remaining_time": "3:02:20"} +{"current_steps": 3610, "total_steps": 5676, "loss": 0.6297650933265686, "lr": 6.416683134526486e-06, "epoch": 1.2720704845814979, "percentage": 63.6, "elapsed_time": "5:18:27", "remaining_time": "3:02:15"} +{"current_steps": 3611, "total_steps": 5676, "loss": 0.503609836101532, "lr": 6.411244252805351e-06, "epoch": 1.2724229074889868, "percentage": 63.62, "elapsed_time": "5:18:32", "remaining_time": "3:02:09"} +{"current_steps": 3612, "total_steps": 5676, "loss": 0.6026735305786133, "lr": 6.405806589355099e-06, "epoch": 1.2727753303964757, "percentage": 63.64, "elapsed_time": "5:18:38", "remaining_time": "3:02:04"} +{"current_steps": 3613, "total_steps": 5676, "loss": 0.4918368458747864, "lr": 6.400370146021662e-06, "epoch": 1.2731277533039647, "percentage": 63.65, "elapsed_time": "5:18:44", "remaining_time": "3:02:00"} +{"current_steps": 3614, "total_steps": 5676, "loss": 0.6215550899505615, "lr": 6.394934924650532e-06, "epoch": 1.2734801762114538, "percentage": 63.67, "elapsed_time": "5:18:48", "remaining_time": "3:01:53"} +{"current_steps": 3615, "total_steps": 5676, "loss": 0.6979820728302002, "lr": 6.389500927086801e-06, "epoch": 1.2738325991189428, "percentage": 63.69, "elapsed_time": "5:18:52", "remaining_time": "3:01:48"} +{"current_steps": 3616, "total_steps": 5676, "loss": 0.5661836266517639, "lr": 6.384068155175143e-06, "epoch": 1.2741850220264317, "percentage": 63.71, "elapsed_time": "5:18:57", "remaining_time": "3:01:42"} +{"current_steps": 3617, "total_steps": 5676, "loss": 0.699792742729187, "lr": 6.378636610759812e-06, "epoch": 1.2745374449339206, "percentage": 63.72, "elapsed_time": "5:19:02", "remaining_time": "3:01:37"} +{"current_steps": 3618, "total_steps": 5676, "loss": 0.6418631076812744, "lr": 6.373206295684653e-06, "epoch": 1.2748898678414098, "percentage": 63.74, "elapsed_time": "5:19:07", "remaining_time": "3:01:31"} +{"current_steps": 3619, "total_steps": 5676, "loss": 0.4975489675998688, "lr": 6.3677772117930895e-06, "epoch": 1.2752422907488987, "percentage": 63.76, "elapsed_time": "5:19:12", "remaining_time": "3:01:26"} +{"current_steps": 3620, "total_steps": 5676, "loss": 0.5621567964553833, "lr": 6.362349360928117e-06, "epoch": 1.2755947136563877, "percentage": 63.78, "elapsed_time": "5:19:18", "remaining_time": "3:01:21"} +{"current_steps": 3621, "total_steps": 5676, "loss": 0.538573682308197, "lr": 6.356922744932335e-06, "epoch": 1.2759471365638766, "percentage": 63.79, "elapsed_time": "5:19:25", "remaining_time": "3:01:16"} +{"current_steps": 3622, "total_steps": 5676, "loss": 0.5726763010025024, "lr": 6.351497365647903e-06, "epoch": 1.2762995594713655, "percentage": 63.81, "elapsed_time": "5:19:30", "remaining_time": "3:01:11"} +{"current_steps": 3623, "total_steps": 5676, "loss": 0.5911343097686768, "lr": 6.346073224916565e-06, "epoch": 1.2766519823788547, "percentage": 63.83, "elapsed_time": "5:19:37", "remaining_time": "3:01:06"} +{"current_steps": 3624, "total_steps": 5676, "loss": 0.6181383728981018, "lr": 6.340650324579658e-06, "epoch": 1.2770044052863436, "percentage": 63.85, "elapsed_time": "5:19:41", "remaining_time": "3:01:01"} +{"current_steps": 3625, "total_steps": 5676, "loss": 0.5941140651702881, "lr": 6.3352286664780785e-06, "epoch": 1.2773568281938326, "percentage": 63.87, "elapsed_time": "5:19:47", "remaining_time": "3:00:55"} +{"current_steps": 3626, "total_steps": 5676, "loss": 0.7604472637176514, "lr": 6.329808252452316e-06, "epoch": 1.2777092511013217, "percentage": 63.88, "elapsed_time": "5:19:52", "remaining_time": "3:00:50"} +{"current_steps": 3627, "total_steps": 5676, "loss": 0.6063867211341858, "lr": 6.324389084342435e-06, "epoch": 1.2780616740088107, "percentage": 63.9, "elapsed_time": "5:19:57", "remaining_time": "3:00:44"} +{"current_steps": 3628, "total_steps": 5676, "loss": 0.7202302813529968, "lr": 6.3189711639880644e-06, "epoch": 1.2784140969162996, "percentage": 63.92, "elapsed_time": "5:20:02", "remaining_time": "3:00:39"} +{"current_steps": 3629, "total_steps": 5676, "loss": 0.5934856534004211, "lr": 6.313554493228431e-06, "epoch": 1.2787665198237885, "percentage": 63.94, "elapsed_time": "5:20:07", "remaining_time": "3:00:34"} +{"current_steps": 3630, "total_steps": 5676, "loss": 0.6403088569641113, "lr": 6.3081390739023175e-06, "epoch": 1.2791189427312775, "percentage": 63.95, "elapsed_time": "5:20:12", "remaining_time": "3:00:29"} +{"current_steps": 3631, "total_steps": 5676, "loss": 0.6679831743240356, "lr": 6.302724907848096e-06, "epoch": 1.2794713656387664, "percentage": 63.97, "elapsed_time": "5:20:18", "remaining_time": "3:00:23"} +{"current_steps": 3632, "total_steps": 5676, "loss": 0.6914902329444885, "lr": 6.297311996903703e-06, "epoch": 1.2798237885462556, "percentage": 63.99, "elapsed_time": "5:20:23", "remaining_time": "3:00:18"} +{"current_steps": 3633, "total_steps": 5676, "loss": 0.5391600131988525, "lr": 6.2919003429066535e-06, "epoch": 1.2801762114537445, "percentage": 64.01, "elapsed_time": "5:20:30", "remaining_time": "3:00:13"} +{"current_steps": 3634, "total_steps": 5676, "loss": 0.5740962028503418, "lr": 6.286489947694041e-06, "epoch": 1.2805286343612334, "percentage": 64.02, "elapsed_time": "5:20:34", "remaining_time": "3:00:08"} +{"current_steps": 3635, "total_steps": 5676, "loss": 0.6497045159339905, "lr": 6.281080813102523e-06, "epoch": 1.2808810572687226, "percentage": 64.04, "elapsed_time": "5:20:39", "remaining_time": "3:00:02"} +{"current_steps": 3636, "total_steps": 5676, "loss": 0.5481048226356506, "lr": 6.275672940968326e-06, "epoch": 1.2812334801762115, "percentage": 64.06, "elapsed_time": "5:20:45", "remaining_time": "2:59:57"} +{"current_steps": 3637, "total_steps": 5676, "loss": 0.5412508249282837, "lr": 6.270266333127266e-06, "epoch": 1.2815859030837005, "percentage": 64.08, "elapsed_time": "5:20:50", "remaining_time": "2:59:52"} +{"current_steps": 3638, "total_steps": 5676, "loss": 0.5055446624755859, "lr": 6.264860991414709e-06, "epoch": 1.2819383259911894, "percentage": 64.09, "elapsed_time": "5:20:56", "remaining_time": "2:59:47"} +{"current_steps": 3639, "total_steps": 5676, "loss": 0.6073929071426392, "lr": 6.259456917665605e-06, "epoch": 1.2822907488986783, "percentage": 64.11, "elapsed_time": "5:21:02", "remaining_time": "2:59:42"} +{"current_steps": 3640, "total_steps": 5676, "loss": 0.5277928113937378, "lr": 6.254054113714467e-06, "epoch": 1.2826431718061673, "percentage": 64.13, "elapsed_time": "5:21:07", "remaining_time": "2:59:37"} +{"current_steps": 3641, "total_steps": 5676, "loss": 0.5106299519538879, "lr": 6.248652581395378e-06, "epoch": 1.2829955947136564, "percentage": 64.15, "elapsed_time": "5:21:13", "remaining_time": "2:59:32"} +{"current_steps": 3642, "total_steps": 5676, "loss": 0.485049843788147, "lr": 6.243252322541993e-06, "epoch": 1.2833480176211454, "percentage": 64.16, "elapsed_time": "5:21:19", "remaining_time": "2:59:27"} +{"current_steps": 3643, "total_steps": 5676, "loss": 0.5899066925048828, "lr": 6.237853338987532e-06, "epoch": 1.2837004405286343, "percentage": 64.18, "elapsed_time": "5:21:22", "remaining_time": "2:59:21"} +{"current_steps": 3644, "total_steps": 5676, "loss": 0.5761981010437012, "lr": 6.2324556325647745e-06, "epoch": 1.2840528634361235, "percentage": 64.2, "elapsed_time": "5:21:28", "remaining_time": "2:59:15"} +{"current_steps": 3645, "total_steps": 5676, "loss": 0.6288208961486816, "lr": 6.227059205106085e-06, "epoch": 1.2844052863436124, "percentage": 64.22, "elapsed_time": "5:21:34", "remaining_time": "2:59:10"} +{"current_steps": 3646, "total_steps": 5676, "loss": 0.6122645139694214, "lr": 6.2216640584433726e-06, "epoch": 1.2847577092511013, "percentage": 64.24, "elapsed_time": "5:21:39", "remaining_time": "2:59:05"} +{"current_steps": 3647, "total_steps": 5676, "loss": 0.5838489532470703, "lr": 6.2162701944081295e-06, "epoch": 1.2851101321585903, "percentage": 64.25, "elapsed_time": "5:21:46", "remaining_time": "2:59:01"} +{"current_steps": 3648, "total_steps": 5676, "loss": 0.6020689606666565, "lr": 6.2108776148314005e-06, "epoch": 1.2854625550660792, "percentage": 64.27, "elapsed_time": "5:21:51", "remaining_time": "2:58:55"} +{"current_steps": 3649, "total_steps": 5676, "loss": 0.5852698683738708, "lr": 6.205486321543798e-06, "epoch": 1.2858149779735684, "percentage": 64.29, "elapsed_time": "5:21:56", "remaining_time": "2:58:50"} +{"current_steps": 3650, "total_steps": 5676, "loss": 0.560903012752533, "lr": 6.2000963163755015e-06, "epoch": 1.2861674008810573, "percentage": 64.31, "elapsed_time": "5:22:01", "remaining_time": "2:58:44"} +{"current_steps": 3651, "total_steps": 5676, "loss": 0.7750356197357178, "lr": 6.194707601156249e-06, "epoch": 1.2865198237885462, "percentage": 64.32, "elapsed_time": "5:22:06", "remaining_time": "2:58:39"} +{"current_steps": 3652, "total_steps": 5676, "loss": 0.5503605604171753, "lr": 6.189320177715338e-06, "epoch": 1.2868722466960352, "percentage": 64.34, "elapsed_time": "5:22:11", "remaining_time": "2:58:33"} +{"current_steps": 3653, "total_steps": 5676, "loss": 0.6910672187805176, "lr": 6.183934047881636e-06, "epoch": 1.2872246696035243, "percentage": 64.36, "elapsed_time": "5:22:16", "remaining_time": "2:58:28"} +{"current_steps": 3654, "total_steps": 5676, "loss": 0.7773069739341736, "lr": 6.1785492134835626e-06, "epoch": 1.2875770925110133, "percentage": 64.38, "elapsed_time": "5:22:20", "remaining_time": "2:58:22"} +{"current_steps": 3655, "total_steps": 5676, "loss": 0.6777454018592834, "lr": 6.173165676349103e-06, "epoch": 1.2879295154185022, "percentage": 64.39, "elapsed_time": "5:22:25", "remaining_time": "2:58:17"} +{"current_steps": 3656, "total_steps": 5676, "loss": 0.6103118658065796, "lr": 6.167783438305803e-06, "epoch": 1.2882819383259911, "percentage": 64.41, "elapsed_time": "5:22:30", "remaining_time": "2:58:11"} +{"current_steps": 3657, "total_steps": 5676, "loss": 0.593717634677887, "lr": 6.1624025011807595e-06, "epoch": 1.28863436123348, "percentage": 64.43, "elapsed_time": "5:22:36", "remaining_time": "2:58:06"} +{"current_steps": 3658, "total_steps": 5676, "loss": 0.5822824835777283, "lr": 6.1570228668006395e-06, "epoch": 1.2889867841409692, "percentage": 64.45, "elapsed_time": "5:22:42", "remaining_time": "2:58:01"} +{"current_steps": 3659, "total_steps": 5676, "loss": 0.5180603861808777, "lr": 6.151644536991656e-06, "epoch": 1.2893392070484582, "percentage": 64.46, "elapsed_time": "5:22:48", "remaining_time": "2:57:56"} +{"current_steps": 3660, "total_steps": 5676, "loss": 0.6283069849014282, "lr": 6.14626751357959e-06, "epoch": 1.289691629955947, "percentage": 64.48, "elapsed_time": "5:22:53", "remaining_time": "2:57:51"} +{"current_steps": 3661, "total_steps": 5676, "loss": 0.7590633630752563, "lr": 6.14089179838977e-06, "epoch": 1.290044052863436, "percentage": 64.5, "elapsed_time": "5:22:58", "remaining_time": "2:57:46"} +{"current_steps": 3662, "total_steps": 5676, "loss": 0.6044079661369324, "lr": 6.135517393247081e-06, "epoch": 1.2903964757709252, "percentage": 64.52, "elapsed_time": "5:23:05", "remaining_time": "2:57:41"} +{"current_steps": 3663, "total_steps": 5676, "loss": 0.603421688079834, "lr": 6.130144299975973e-06, "epoch": 1.2907488986784141, "percentage": 64.53, "elapsed_time": "5:23:09", "remaining_time": "2:57:35"} +{"current_steps": 3664, "total_steps": 5676, "loss": 0.577094554901123, "lr": 6.1247725204004395e-06, "epoch": 1.291101321585903, "percentage": 64.55, "elapsed_time": "5:23:14", "remaining_time": "2:57:30"} +{"current_steps": 3665, "total_steps": 5676, "loss": 0.5752004981040955, "lr": 6.119402056344033e-06, "epoch": 1.291453744493392, "percentage": 64.57, "elapsed_time": "5:23:19", "remaining_time": "2:57:24"} +{"current_steps": 3666, "total_steps": 5676, "loss": 0.730962872505188, "lr": 6.114032909629863e-06, "epoch": 1.291806167400881, "percentage": 64.59, "elapsed_time": "5:23:26", "remaining_time": "2:57:20"} +{"current_steps": 3667, "total_steps": 5676, "loss": 0.5361749529838562, "lr": 6.108665082080578e-06, "epoch": 1.29215859030837, "percentage": 64.61, "elapsed_time": "5:23:31", "remaining_time": "2:57:14"} +{"current_steps": 3668, "total_steps": 5676, "loss": 0.4841603636741638, "lr": 6.103298575518401e-06, "epoch": 1.292511013215859, "percentage": 64.62, "elapsed_time": "5:23:37", "remaining_time": "2:57:10"} +{"current_steps": 3669, "total_steps": 5676, "loss": 0.6614999771118164, "lr": 6.097933391765087e-06, "epoch": 1.292863436123348, "percentage": 64.64, "elapsed_time": "5:23:42", "remaining_time": "2:57:04"} +{"current_steps": 3670, "total_steps": 5676, "loss": 0.6088405847549438, "lr": 6.092569532641947e-06, "epoch": 1.2932158590308371, "percentage": 64.66, "elapsed_time": "5:23:48", "remaining_time": "2:56:59"} +{"current_steps": 3671, "total_steps": 5676, "loss": 0.601859986782074, "lr": 6.087206999969848e-06, "epoch": 1.293568281938326, "percentage": 64.68, "elapsed_time": "5:23:52", "remaining_time": "2:56:53"} +{"current_steps": 3672, "total_steps": 5676, "loss": 0.5724194049835205, "lr": 6.081845795569204e-06, "epoch": 1.293920704845815, "percentage": 64.69, "elapsed_time": "5:23:58", "remaining_time": "2:56:48"} +{"current_steps": 3673, "total_steps": 5676, "loss": 0.7899144887924194, "lr": 6.07648592125997e-06, "epoch": 1.294273127753304, "percentage": 64.71, "elapsed_time": "5:24:03", "remaining_time": "2:56:43"} +{"current_steps": 3674, "total_steps": 5676, "loss": 0.5778594017028809, "lr": 6.071127378861667e-06, "epoch": 1.2946255506607929, "percentage": 64.73, "elapsed_time": "5:24:09", "remaining_time": "2:56:38"} +{"current_steps": 3675, "total_steps": 5676, "loss": 0.6357566118240356, "lr": 6.065770170193342e-06, "epoch": 1.2949779735682818, "percentage": 64.75, "elapsed_time": "5:24:15", "remaining_time": "2:56:33"} +{"current_steps": 3676, "total_steps": 5676, "loss": 0.511436939239502, "lr": 6.0604142970736115e-06, "epoch": 1.295330396475771, "percentage": 64.76, "elapsed_time": "5:24:20", "remaining_time": "2:56:27"} +{"current_steps": 3677, "total_steps": 5676, "loss": 0.6469998955726624, "lr": 6.0550597613206205e-06, "epoch": 1.29568281938326, "percentage": 64.78, "elapsed_time": "5:24:26", "remaining_time": "2:56:22"} +{"current_steps": 3678, "total_steps": 5676, "loss": 0.5724819898605347, "lr": 6.049706564752069e-06, "epoch": 1.2960352422907488, "percentage": 64.8, "elapsed_time": "5:24:30", "remaining_time": "2:56:17"} +{"current_steps": 3679, "total_steps": 5676, "loss": 0.6567148566246033, "lr": 6.044354709185203e-06, "epoch": 1.296387665198238, "percentage": 64.82, "elapsed_time": "5:24:36", "remaining_time": "2:56:12"} +{"current_steps": 3680, "total_steps": 5676, "loss": 0.6694033145904541, "lr": 6.039004196436807e-06, "epoch": 1.296740088105727, "percentage": 64.83, "elapsed_time": "5:24:41", "remaining_time": "2:56:06"} +{"current_steps": 3681, "total_steps": 5676, "loss": 0.5147275924682617, "lr": 6.033655028323215e-06, "epoch": 1.2970925110132159, "percentage": 64.85, "elapsed_time": "5:24:46", "remaining_time": "2:56:01"} +{"current_steps": 3682, "total_steps": 5676, "loss": 0.5881609320640564, "lr": 6.0283072066603075e-06, "epoch": 1.2974449339207048, "percentage": 64.87, "elapsed_time": "5:24:51", "remaining_time": "2:55:55"} +{"current_steps": 3683, "total_steps": 5676, "loss": 0.625927209854126, "lr": 6.022960733263493e-06, "epoch": 1.2977973568281937, "percentage": 64.89, "elapsed_time": "5:24:57", "remaining_time": "2:55:50"} +{"current_steps": 3684, "total_steps": 5676, "loss": 0.693459153175354, "lr": 6.017615609947747e-06, "epoch": 1.2981497797356827, "percentage": 64.9, "elapsed_time": "5:25:02", "remaining_time": "2:55:45"} +{"current_steps": 3685, "total_steps": 5676, "loss": 0.5185744762420654, "lr": 6.0122718385275615e-06, "epoch": 1.2985022026431718, "percentage": 64.92, "elapsed_time": "5:25:07", "remaining_time": "2:55:39"} +{"current_steps": 3686, "total_steps": 5676, "loss": 0.5153995752334595, "lr": 6.006929420816982e-06, "epoch": 1.2988546255506608, "percentage": 64.94, "elapsed_time": "5:25:12", "remaining_time": "2:55:34"} +{"current_steps": 3687, "total_steps": 5676, "loss": 0.5844067931175232, "lr": 6.001588358629598e-06, "epoch": 1.2992070484581497, "percentage": 64.96, "elapsed_time": "5:25:18", "remaining_time": "2:55:29"} +{"current_steps": 3688, "total_steps": 5676, "loss": 0.6021767854690552, "lr": 5.996248653778529e-06, "epoch": 1.2995594713656389, "percentage": 64.98, "elapsed_time": "5:25:23", "remaining_time": "2:55:23"} +{"current_steps": 3689, "total_steps": 5676, "loss": 0.573150098323822, "lr": 5.990910308076443e-06, "epoch": 1.2999118942731278, "percentage": 64.99, "elapsed_time": "5:25:27", "remaining_time": "2:55:18"} +{"current_steps": 3690, "total_steps": 5676, "loss": 0.5125507116317749, "lr": 5.985573323335541e-06, "epoch": 1.3002643171806167, "percentage": 65.01, "elapsed_time": "5:25:32", "remaining_time": "2:55:12"} +{"current_steps": 3691, "total_steps": 5676, "loss": 0.541732668876648, "lr": 5.980237701367556e-06, "epoch": 1.3006167400881057, "percentage": 65.03, "elapsed_time": "5:25:38", "remaining_time": "2:55:07"} +{"current_steps": 3692, "total_steps": 5676, "loss": 0.66359543800354, "lr": 5.974903443983778e-06, "epoch": 1.3009691629955946, "percentage": 65.05, "elapsed_time": "5:25:42", "remaining_time": "2:55:01"} +{"current_steps": 3693, "total_steps": 5676, "loss": 0.6986300349235535, "lr": 5.969570552995014e-06, "epoch": 1.3013215859030838, "percentage": 65.06, "elapsed_time": "5:25:47", "remaining_time": "2:54:56"} +{"current_steps": 3694, "total_steps": 5676, "loss": 0.6829022169113159, "lr": 5.9642390302116125e-06, "epoch": 1.3016740088105727, "percentage": 65.08, "elapsed_time": "5:25:53", "remaining_time": "2:54:51"} +{"current_steps": 3695, "total_steps": 5676, "loss": 0.5710464715957642, "lr": 5.9589088774434655e-06, "epoch": 1.3020264317180616, "percentage": 65.1, "elapsed_time": "5:25:58", "remaining_time": "2:54:45"} +{"current_steps": 3696, "total_steps": 5676, "loss": 0.5604938268661499, "lr": 5.953580096499989e-06, "epoch": 1.3023788546255506, "percentage": 65.12, "elapsed_time": "5:26:03", "remaining_time": "2:54:40"} +{"current_steps": 3697, "total_steps": 5676, "loss": 0.678723931312561, "lr": 5.948252689190141e-06, "epoch": 1.3027312775330397, "percentage": 65.13, "elapsed_time": "5:26:09", "remaining_time": "2:54:35"} +{"current_steps": 3698, "total_steps": 5676, "loss": 0.6652591228485107, "lr": 5.9429266573224145e-06, "epoch": 1.3030837004405287, "percentage": 65.15, "elapsed_time": "5:26:14", "remaining_time": "2:54:30"} +{"current_steps": 3699, "total_steps": 5676, "loss": 0.6141147017478943, "lr": 5.937602002704819e-06, "epoch": 1.3034361233480176, "percentage": 65.17, "elapsed_time": "5:26:19", "remaining_time": "2:54:24"} +{"current_steps": 3700, "total_steps": 5676, "loss": 0.5260860919952393, "lr": 5.932278727144924e-06, "epoch": 1.3037885462555066, "percentage": 65.19, "elapsed_time": "5:26:23", "remaining_time": "2:54:18"} +{"current_steps": 3701, "total_steps": 5676, "loss": 0.464357852935791, "lr": 5.926956832449806e-06, "epoch": 1.3041409691629955, "percentage": 65.2, "elapsed_time": "5:26:33", "remaining_time": "2:54:15"} +{"current_steps": 3702, "total_steps": 5676, "loss": 0.6513686180114746, "lr": 5.921636320426085e-06, "epoch": 1.3044933920704846, "percentage": 65.22, "elapsed_time": "5:26:37", "remaining_time": "2:54:10"} +{"current_steps": 3703, "total_steps": 5676, "loss": 0.44547855854034424, "lr": 5.91631719287991e-06, "epoch": 1.3048458149779736, "percentage": 65.24, "elapsed_time": "5:26:42", "remaining_time": "2:54:04"} +{"current_steps": 3704, "total_steps": 5676, "loss": 0.714026153087616, "lr": 5.910999451616959e-06, "epoch": 1.3051982378854625, "percentage": 65.26, "elapsed_time": "5:26:47", "remaining_time": "2:53:58"} +{"current_steps": 3705, "total_steps": 5676, "loss": 0.48294252157211304, "lr": 5.90568309844244e-06, "epoch": 1.3055506607929517, "percentage": 65.27, "elapsed_time": "5:26:53", "remaining_time": "2:53:53"} +{"current_steps": 3706, "total_steps": 5676, "loss": 0.587759256362915, "lr": 5.900368135161093e-06, "epoch": 1.3059030837004406, "percentage": 65.29, "elapsed_time": "5:26:58", "remaining_time": "2:53:48"} +{"current_steps": 3707, "total_steps": 5676, "loss": 0.6251810789108276, "lr": 5.895054563577172e-06, "epoch": 1.3062555066079296, "percentage": 65.31, "elapsed_time": "5:27:04", "remaining_time": "2:53:43"} +{"current_steps": 3708, "total_steps": 5676, "loss": 0.6488438844680786, "lr": 5.889742385494481e-06, "epoch": 1.3066079295154185, "percentage": 65.33, "elapsed_time": "5:27:09", "remaining_time": "2:53:38"} +{"current_steps": 3709, "total_steps": 5676, "loss": 0.6682882308959961, "lr": 5.8844316027163315e-06, "epoch": 1.3069603524229074, "percentage": 65.35, "elapsed_time": "5:27:15", "remaining_time": "2:53:33"} +{"current_steps": 3710, "total_steps": 5676, "loss": 0.6537875533103943, "lr": 5.879122217045573e-06, "epoch": 1.3073127753303964, "percentage": 65.36, "elapsed_time": "5:27:21", "remaining_time": "2:53:28"} +{"current_steps": 3711, "total_steps": 5676, "loss": 0.6813541650772095, "lr": 5.873814230284576e-06, "epoch": 1.3076651982378855, "percentage": 65.38, "elapsed_time": "5:27:25", "remaining_time": "2:53:22"} +{"current_steps": 3712, "total_steps": 5676, "loss": 0.6962395906448364, "lr": 5.868507644235233e-06, "epoch": 1.3080176211453745, "percentage": 65.4, "elapsed_time": "5:27:31", "remaining_time": "2:53:17"} +{"current_steps": 3713, "total_steps": 5676, "loss": 0.6872841119766235, "lr": 5.863202460698972e-06, "epoch": 1.3083700440528634, "percentage": 65.42, "elapsed_time": "5:27:37", "remaining_time": "2:53:12"} +{"current_steps": 3714, "total_steps": 5676, "loss": 0.7200508117675781, "lr": 5.857898681476732e-06, "epoch": 1.3087224669603525, "percentage": 65.43, "elapsed_time": "5:27:41", "remaining_time": "2:53:06"} +{"current_steps": 3715, "total_steps": 5676, "loss": 0.6100003719329834, "lr": 5.852596308368982e-06, "epoch": 1.3090748898678415, "percentage": 65.45, "elapsed_time": "5:27:45", "remaining_time": "2:53:00"} +{"current_steps": 3716, "total_steps": 5676, "loss": 0.7347345352172852, "lr": 5.847295343175714e-06, "epoch": 1.3094273127753304, "percentage": 65.47, "elapsed_time": "5:27:50", "remaining_time": "2:52:55"} +{"current_steps": 3717, "total_steps": 5676, "loss": 0.6955733895301819, "lr": 5.841995787696438e-06, "epoch": 1.3097797356828194, "percentage": 65.49, "elapsed_time": "5:27:55", "remaining_time": "2:52:49"} +{"current_steps": 3718, "total_steps": 5676, "loss": 0.5266987085342407, "lr": 5.836697643730193e-06, "epoch": 1.3101321585903083, "percentage": 65.5, "elapsed_time": "5:28:01", "remaining_time": "2:52:44"} +{"current_steps": 3719, "total_steps": 5676, "loss": 0.5978814363479614, "lr": 5.83140091307553e-06, "epoch": 1.3104845814977972, "percentage": 65.52, "elapsed_time": "5:28:08", "remaining_time": "2:52:40"} +{"current_steps": 3720, "total_steps": 5676, "loss": 0.608231782913208, "lr": 5.826105597530526e-06, "epoch": 1.3108370044052864, "percentage": 65.54, "elapsed_time": "5:28:13", "remaining_time": "2:52:34"} +{"current_steps": 3721, "total_steps": 5676, "loss": 0.5834963321685791, "lr": 5.820811698892775e-06, "epoch": 1.3111894273127753, "percentage": 65.56, "elapsed_time": "5:28:19", "remaining_time": "2:52:29"} +{"current_steps": 3722, "total_steps": 5676, "loss": 0.6675208806991577, "lr": 5.8155192189593915e-06, "epoch": 1.3115418502202643, "percentage": 65.57, "elapsed_time": "5:28:24", "remaining_time": "2:52:24"} +{"current_steps": 3723, "total_steps": 5676, "loss": 0.655093789100647, "lr": 5.810228159527003e-06, "epoch": 1.3118942731277534, "percentage": 65.59, "elapsed_time": "5:28:30", "remaining_time": "2:52:19"} +{"current_steps": 3724, "total_steps": 5676, "loss": 0.5658842921257019, "lr": 5.804938522391768e-06, "epoch": 1.3122466960352424, "percentage": 65.61, "elapsed_time": "5:28:34", "remaining_time": "2:52:13"} +{"current_steps": 3725, "total_steps": 5676, "loss": 0.4502618610858917, "lr": 5.799650309349348e-06, "epoch": 1.3125991189427313, "percentage": 65.63, "elapsed_time": "5:28:39", "remaining_time": "2:52:08"} +{"current_steps": 3726, "total_steps": 5676, "loss": 0.6165845394134521, "lr": 5.79436352219493e-06, "epoch": 1.3129515418502202, "percentage": 65.64, "elapsed_time": "5:28:44", "remaining_time": "2:52:03"} +{"current_steps": 3727, "total_steps": 5676, "loss": 0.6315968036651611, "lr": 5.7890781627232115e-06, "epoch": 1.3133039647577092, "percentage": 65.66, "elapsed_time": "5:28:51", "remaining_time": "2:51:58"} +{"current_steps": 3728, "total_steps": 5676, "loss": 0.58831787109375, "lr": 5.783794232728408e-06, "epoch": 1.313656387665198, "percentage": 65.68, "elapsed_time": "5:28:57", "remaining_time": "2:51:53"} +{"current_steps": 3729, "total_steps": 5676, "loss": 0.5056396722793579, "lr": 5.778511734004248e-06, "epoch": 1.3140088105726873, "percentage": 65.7, "elapsed_time": "5:29:02", "remaining_time": "2:51:48"} +{"current_steps": 3730, "total_steps": 5676, "loss": 0.5469251871109009, "lr": 5.773230668343978e-06, "epoch": 1.3143612334801762, "percentage": 65.72, "elapsed_time": "5:29:08", "remaining_time": "2:51:43"} +{"current_steps": 3731, "total_steps": 5676, "loss": 0.7011934518814087, "lr": 5.76795103754035e-06, "epoch": 1.3147136563876651, "percentage": 65.73, "elapsed_time": "5:29:13", "remaining_time": "2:51:37"} +{"current_steps": 3732, "total_steps": 5676, "loss": 0.7080543041229248, "lr": 5.762672843385643e-06, "epoch": 1.3150660792951543, "percentage": 65.75, "elapsed_time": "5:29:19", "remaining_time": "2:51:32"} +{"current_steps": 3733, "total_steps": 5676, "loss": 0.5180330276489258, "lr": 5.757396087671634e-06, "epoch": 1.3154185022026432, "percentage": 65.77, "elapsed_time": "5:29:26", "remaining_time": "2:51:28"} +{"current_steps": 3734, "total_steps": 5676, "loss": 0.5282220840454102, "lr": 5.75212077218962e-06, "epoch": 1.3157709251101322, "percentage": 65.79, "elapsed_time": "5:29:31", "remaining_time": "2:51:22"} +{"current_steps": 3735, "total_steps": 5676, "loss": 0.7174440026283264, "lr": 5.746846898730403e-06, "epoch": 1.316123348017621, "percentage": 65.8, "elapsed_time": "5:29:37", "remaining_time": "2:51:17"} +{"current_steps": 3736, "total_steps": 5676, "loss": 0.537194013595581, "lr": 5.7415744690843025e-06, "epoch": 1.31647577092511, "percentage": 65.82, "elapsed_time": "5:29:43", "remaining_time": "2:51:12"} +{"current_steps": 3737, "total_steps": 5676, "loss": 0.7514588832855225, "lr": 5.7363034850411415e-06, "epoch": 1.3168281938325992, "percentage": 65.84, "elapsed_time": "5:29:48", "remaining_time": "2:51:07"} +{"current_steps": 3738, "total_steps": 5676, "loss": 0.601151704788208, "lr": 5.731033948390252e-06, "epoch": 1.3171806167400881, "percentage": 65.86, "elapsed_time": "5:29:55", "remaining_time": "2:51:03"} +{"current_steps": 3739, "total_steps": 5676, "loss": 0.6046192646026611, "lr": 5.7257658609204865e-06, "epoch": 1.317533039647577, "percentage": 65.87, "elapsed_time": "5:30:00", "remaining_time": "2:50:57"} +{"current_steps": 3740, "total_steps": 5676, "loss": 0.5003835558891296, "lr": 5.720499224420196e-06, "epoch": 1.317885462555066, "percentage": 65.89, "elapsed_time": "5:30:05", "remaining_time": "2:50:52"} +{"current_steps": 3741, "total_steps": 5676, "loss": 0.6251966953277588, "lr": 5.715234040677229e-06, "epoch": 1.3182378854625552, "percentage": 65.91, "elapsed_time": "5:30:11", "remaining_time": "2:50:47"} +{"current_steps": 3742, "total_steps": 5676, "loss": 0.6681240797042847, "lr": 5.709970311478961e-06, "epoch": 1.318590308370044, "percentage": 65.93, "elapsed_time": "5:30:15", "remaining_time": "2:50:41"} +{"current_steps": 3743, "total_steps": 5676, "loss": 0.582561194896698, "lr": 5.704708038612261e-06, "epoch": 1.318942731277533, "percentage": 65.94, "elapsed_time": "5:30:21", "remaining_time": "2:50:36"} +{"current_steps": 3744, "total_steps": 5676, "loss": 0.5616302490234375, "lr": 5.699447223863508e-06, "epoch": 1.319295154185022, "percentage": 65.96, "elapsed_time": "5:30:27", "remaining_time": "2:50:31"} +{"current_steps": 3745, "total_steps": 5676, "loss": 0.6131408214569092, "lr": 5.6941878690185835e-06, "epoch": 1.319647577092511, "percentage": 65.98, "elapsed_time": "5:30:32", "remaining_time": "2:50:26"} +{"current_steps": 3746, "total_steps": 5676, "loss": 0.5772547721862793, "lr": 5.688929975862873e-06, "epoch": 1.32, "percentage": 66.0, "elapsed_time": "5:30:36", "remaining_time": "2:50:19"} +{"current_steps": 3747, "total_steps": 5676, "loss": 0.5927203893661499, "lr": 5.683673546181274e-06, "epoch": 1.320352422907489, "percentage": 66.01, "elapsed_time": "5:30:41", "remaining_time": "2:50:14"} +{"current_steps": 3748, "total_steps": 5676, "loss": 0.6001334190368652, "lr": 5.67841858175818e-06, "epoch": 1.320704845814978, "percentage": 66.03, "elapsed_time": "5:30:46", "remaining_time": "2:50:09"} +{"current_steps": 3749, "total_steps": 5676, "loss": 0.4598100781440735, "lr": 5.673165084377479e-06, "epoch": 1.321057268722467, "percentage": 66.05, "elapsed_time": "5:30:51", "remaining_time": "2:50:03"} +{"current_steps": 3750, "total_steps": 5676, "loss": 0.6455222368240356, "lr": 5.667913055822578e-06, "epoch": 1.321409691629956, "percentage": 66.07, "elapsed_time": "5:30:55", "remaining_time": "2:49:58"} +{"current_steps": 3751, "total_steps": 5676, "loss": 0.6327164173126221, "lr": 5.662662497876375e-06, "epoch": 1.321762114537445, "percentage": 66.09, "elapsed_time": "5:31:01", "remaining_time": "2:49:52"} +{"current_steps": 3752, "total_steps": 5676, "loss": 0.6699539422988892, "lr": 5.657413412321271e-06, "epoch": 1.322114537444934, "percentage": 66.1, "elapsed_time": "5:31:07", "remaining_time": "2:49:47"} +{"current_steps": 3753, "total_steps": 5676, "loss": 0.7507830858230591, "lr": 5.6521658009391676e-06, "epoch": 1.3224669603524228, "percentage": 66.12, "elapsed_time": "5:31:13", "remaining_time": "2:49:42"} +{"current_steps": 3754, "total_steps": 5676, "loss": 0.5164662003517151, "lr": 5.646919665511461e-06, "epoch": 1.3228193832599118, "percentage": 66.14, "elapsed_time": "5:31:19", "remaining_time": "2:49:38"} +{"current_steps": 3755, "total_steps": 5676, "loss": 0.7059702277183533, "lr": 5.641675007819058e-06, "epoch": 1.323171806167401, "percentage": 66.16, "elapsed_time": "5:31:23", "remaining_time": "2:49:32"} +{"current_steps": 3756, "total_steps": 5676, "loss": 0.6535515189170837, "lr": 5.636431829642359e-06, "epoch": 1.3235242290748899, "percentage": 66.17, "elapsed_time": "5:31:28", "remaining_time": "2:49:26"} +{"current_steps": 3757, "total_steps": 5676, "loss": 0.5912176370620728, "lr": 5.631190132761247e-06, "epoch": 1.3238766519823788, "percentage": 66.19, "elapsed_time": "5:31:33", "remaining_time": "2:49:21"} +{"current_steps": 3758, "total_steps": 5676, "loss": 0.6527940034866333, "lr": 5.625949918955126e-06, "epoch": 1.324229074889868, "percentage": 66.21, "elapsed_time": "5:31:40", "remaining_time": "2:49:16"} +{"current_steps": 3759, "total_steps": 5676, "loss": 0.7236875295639038, "lr": 5.620711190002879e-06, "epoch": 1.324581497797357, "percentage": 66.23, "elapsed_time": "5:31:45", "remaining_time": "2:49:11"} +{"current_steps": 3760, "total_steps": 5676, "loss": 0.6823146343231201, "lr": 5.6154739476829e-06, "epoch": 1.3249339207048458, "percentage": 66.24, "elapsed_time": "5:31:50", "remaining_time": "2:49:05"} +{"current_steps": 3761, "total_steps": 5676, "loss": 0.5795537233352661, "lr": 5.610238193773061e-06, "epoch": 1.3252863436123348, "percentage": 66.26, "elapsed_time": "5:31:54", "remaining_time": "2:49:00"} +{"current_steps": 3762, "total_steps": 5676, "loss": 0.5530939102172852, "lr": 5.605003930050738e-06, "epoch": 1.3256387665198237, "percentage": 66.28, "elapsed_time": "5:31:59", "remaining_time": "2:48:54"} +{"current_steps": 3763, "total_steps": 5676, "loss": 0.5362278819084167, "lr": 5.599771158292806e-06, "epoch": 1.3259911894273126, "percentage": 66.3, "elapsed_time": "5:32:06", "remaining_time": "2:48:49"} +{"current_steps": 3764, "total_steps": 5676, "loss": 0.6136768460273743, "lr": 5.5945398802756315e-06, "epoch": 1.3263436123348018, "percentage": 66.31, "elapsed_time": "5:32:12", "remaining_time": "2:48:45"} +{"current_steps": 3765, "total_steps": 5676, "loss": 0.5979033708572388, "lr": 5.589310097775055e-06, "epoch": 1.3266960352422907, "percentage": 66.33, "elapsed_time": "5:32:17", "remaining_time": "2:48:39"} +{"current_steps": 3766, "total_steps": 5676, "loss": 0.6750006675720215, "lr": 5.584081812566439e-06, "epoch": 1.3270484581497797, "percentage": 66.35, "elapsed_time": "5:32:21", "remaining_time": "2:48:33"} +{"current_steps": 3767, "total_steps": 5676, "loss": 0.6004951000213623, "lr": 5.578855026424619e-06, "epoch": 1.3274008810572688, "percentage": 66.37, "elapsed_time": "5:32:27", "remaining_time": "2:48:28"} +{"current_steps": 3768, "total_steps": 5676, "loss": 0.570702075958252, "lr": 5.573629741123926e-06, "epoch": 1.3277533039647578, "percentage": 66.38, "elapsed_time": "5:32:33", "remaining_time": "2:48:23"} +{"current_steps": 3769, "total_steps": 5676, "loss": 0.506945788860321, "lr": 5.5684059584381826e-06, "epoch": 1.3281057268722467, "percentage": 66.4, "elapsed_time": "5:32:39", "remaining_time": "2:48:18"} +{"current_steps": 3770, "total_steps": 5676, "loss": 0.5935436487197876, "lr": 5.563183680140696e-06, "epoch": 1.3284581497797356, "percentage": 66.42, "elapsed_time": "5:32:45", "remaining_time": "2:48:14"} +{"current_steps": 3771, "total_steps": 5676, "loss": 0.641446590423584, "lr": 5.5579629080042755e-06, "epoch": 1.3288105726872246, "percentage": 66.44, "elapsed_time": "5:32:52", "remaining_time": "2:48:09"} +{"current_steps": 3772, "total_steps": 5676, "loss": 0.5816437005996704, "lr": 5.552743643801209e-06, "epoch": 1.3291629955947137, "percentage": 66.46, "elapsed_time": "5:32:57", "remaining_time": "2:48:04"} +{"current_steps": 3773, "total_steps": 5676, "loss": 0.666487991809845, "lr": 5.547525889303265e-06, "epoch": 1.3295154185022027, "percentage": 66.47, "elapsed_time": "5:33:02", "remaining_time": "2:47:58"} +{"current_steps": 3774, "total_steps": 5676, "loss": 0.7961397767066956, "lr": 5.542309646281718e-06, "epoch": 1.3298678414096916, "percentage": 66.49, "elapsed_time": "5:33:06", "remaining_time": "2:47:52"} +{"current_steps": 3775, "total_steps": 5676, "loss": 0.4736033082008362, "lr": 5.53709491650732e-06, "epoch": 1.3302202643171805, "percentage": 66.51, "elapsed_time": "5:33:11", "remaining_time": "2:47:47"} +{"current_steps": 3776, "total_steps": 5676, "loss": 0.542208194732666, "lr": 5.531881701750304e-06, "epoch": 1.3305726872246697, "percentage": 66.53, "elapsed_time": "5:33:17", "remaining_time": "2:47:42"} +{"current_steps": 3777, "total_steps": 5676, "loss": 0.6306429505348206, "lr": 5.526670003780399e-06, "epoch": 1.3309251101321586, "percentage": 66.54, "elapsed_time": "5:33:24", "remaining_time": "2:47:37"} +{"current_steps": 3778, "total_steps": 5676, "loss": 0.531991720199585, "lr": 5.521459824366808e-06, "epoch": 1.3312775330396476, "percentage": 66.56, "elapsed_time": "5:33:28", "remaining_time": "2:47:32"} +{"current_steps": 3779, "total_steps": 5676, "loss": 0.688262939453125, "lr": 5.516251165278235e-06, "epoch": 1.3316299559471365, "percentage": 66.58, "elapsed_time": "5:33:33", "remaining_time": "2:47:26"} +{"current_steps": 3780, "total_steps": 5676, "loss": 0.7555293440818787, "lr": 5.511044028282853e-06, "epoch": 1.3319823788546254, "percentage": 66.6, "elapsed_time": "5:33:39", "remaining_time": "2:47:21"} +{"current_steps": 3781, "total_steps": 5676, "loss": 0.7518796324729919, "lr": 5.505838415148317e-06, "epoch": 1.3323348017621146, "percentage": 66.61, "elapsed_time": "5:33:44", "remaining_time": "2:47:16"} +{"current_steps": 3782, "total_steps": 5676, "loss": 0.5161253809928894, "lr": 5.500634327641777e-06, "epoch": 1.3326872246696035, "percentage": 66.63, "elapsed_time": "5:33:50", "remaining_time": "2:47:11"} +{"current_steps": 3783, "total_steps": 5676, "loss": 0.5617681741714478, "lr": 5.4954317675298586e-06, "epoch": 1.3330396475770925, "percentage": 66.65, "elapsed_time": "5:33:55", "remaining_time": "2:47:05"} +{"current_steps": 3784, "total_steps": 5676, "loss": 0.5707885026931763, "lr": 5.4902307365786676e-06, "epoch": 1.3333920704845814, "percentage": 66.67, "elapsed_time": "5:34:01", "remaining_time": "2:47:00"} +{"current_steps": 3785, "total_steps": 5676, "loss": 0.5842025876045227, "lr": 5.485031236553792e-06, "epoch": 1.3337444933920706, "percentage": 66.68, "elapsed_time": "5:34:06", "remaining_time": "2:46:55"} +{"current_steps": 3786, "total_steps": 5676, "loss": 0.7103949785232544, "lr": 5.479833269220296e-06, "epoch": 1.3340969162995595, "percentage": 66.7, "elapsed_time": "5:34:09", "remaining_time": "2:46:48"} +{"current_steps": 3787, "total_steps": 5676, "loss": 0.7704740762710571, "lr": 5.474636836342737e-06, "epoch": 1.3344493392070484, "percentage": 66.72, "elapsed_time": "5:34:15", "remaining_time": "2:46:43"} +{"current_steps": 3788, "total_steps": 5676, "loss": 0.6402652263641357, "lr": 5.469441939685137e-06, "epoch": 1.3348017621145374, "percentage": 66.74, "elapsed_time": "5:34:20", "remaining_time": "2:46:38"} +{"current_steps": 3789, "total_steps": 5676, "loss": 0.8214348554611206, "lr": 5.464248581011002e-06, "epoch": 1.3351541850220263, "percentage": 66.75, "elapsed_time": "5:34:24", "remaining_time": "2:46:32"} +{"current_steps": 3790, "total_steps": 5676, "loss": 0.6372429132461548, "lr": 5.459056762083318e-06, "epoch": 1.3355066079295155, "percentage": 66.77, "elapsed_time": "5:34:30", "remaining_time": "2:46:27"} +{"current_steps": 3791, "total_steps": 5676, "loss": 0.5418422222137451, "lr": 5.453866484664543e-06, "epoch": 1.3358590308370044, "percentage": 66.79, "elapsed_time": "5:34:35", "remaining_time": "2:46:21"} +{"current_steps": 3792, "total_steps": 5676, "loss": 0.6574567556381226, "lr": 5.448677750516613e-06, "epoch": 1.3362114537444933, "percentage": 66.81, "elapsed_time": "5:34:40", "remaining_time": "2:46:16"} +{"current_steps": 3793, "total_steps": 5676, "loss": 0.5174030661582947, "lr": 5.443490561400948e-06, "epoch": 1.3365638766519825, "percentage": 66.83, "elapsed_time": "5:34:45", "remaining_time": "2:46:11"} +{"current_steps": 3794, "total_steps": 5676, "loss": 0.595477819442749, "lr": 5.4383049190784275e-06, "epoch": 1.3369162995594714, "percentage": 66.84, "elapsed_time": "5:34:51", "remaining_time": "2:46:06"} +{"current_steps": 3795, "total_steps": 5676, "loss": 0.6177364587783813, "lr": 5.4331208253094255e-06, "epoch": 1.3372687224669604, "percentage": 66.86, "elapsed_time": "5:34:56", "remaining_time": "2:46:00"} +{"current_steps": 3796, "total_steps": 5676, "loss": 0.6106897592544556, "lr": 5.4279382818537774e-06, "epoch": 1.3376211453744493, "percentage": 66.88, "elapsed_time": "5:35:01", "remaining_time": "2:45:55"} +{"current_steps": 3797, "total_steps": 5676, "loss": 0.46700483560562134, "lr": 5.422757290470795e-06, "epoch": 1.3379735682819383, "percentage": 66.9, "elapsed_time": "5:35:06", "remaining_time": "2:45:50"} +{"current_steps": 3798, "total_steps": 5676, "loss": 0.5408231019973755, "lr": 5.417577852919262e-06, "epoch": 1.3383259911894272, "percentage": 66.91, "elapsed_time": "5:35:12", "remaining_time": "2:45:44"} +{"current_steps": 3799, "total_steps": 5676, "loss": 0.6430809497833252, "lr": 5.412399970957439e-06, "epoch": 1.3386784140969163, "percentage": 66.93, "elapsed_time": "5:35:16", "remaining_time": "2:45:39"} +{"current_steps": 3800, "total_steps": 5676, "loss": 0.6817858219146729, "lr": 5.4072236463430535e-06, "epoch": 1.3390308370044053, "percentage": 66.95, "elapsed_time": "5:35:21", "remaining_time": "2:45:33"} +{"current_steps": 3801, "total_steps": 5676, "loss": 0.5492604970932007, "lr": 5.402048880833308e-06, "epoch": 1.3393832599118942, "percentage": 66.97, "elapsed_time": "5:35:32", "remaining_time": "2:45:31"} +{"current_steps": 3802, "total_steps": 5676, "loss": 0.6148543357849121, "lr": 5.39687567618487e-06, "epoch": 1.3397356828193834, "percentage": 66.98, "elapsed_time": "5:35:37", "remaining_time": "2:45:25"} +{"current_steps": 3803, "total_steps": 5676, "loss": 0.5921820402145386, "lr": 5.391704034153894e-06, "epoch": 1.3400881057268723, "percentage": 67.0, "elapsed_time": "5:35:44", "remaining_time": "2:45:21"} +{"current_steps": 3804, "total_steps": 5676, "loss": 0.49728113412857056, "lr": 5.386533956495974e-06, "epoch": 1.3404405286343613, "percentage": 67.02, "elapsed_time": "5:35:49", "remaining_time": "2:45:15"} +{"current_steps": 3805, "total_steps": 5676, "loss": 0.5944808125495911, "lr": 5.381365444966205e-06, "epoch": 1.3407929515418502, "percentage": 67.04, "elapsed_time": "5:35:54", "remaining_time": "2:45:10"} +{"current_steps": 3806, "total_steps": 5676, "loss": 0.5197580456733704, "lr": 5.376198501319128e-06, "epoch": 1.3411453744493391, "percentage": 67.05, "elapsed_time": "5:36:00", "remaining_time": "2:45:05"} +{"current_steps": 3807, "total_steps": 5676, "loss": 0.6229256391525269, "lr": 5.3710331273087625e-06, "epoch": 1.341497797356828, "percentage": 67.07, "elapsed_time": "5:36:07", "remaining_time": "2:45:01"} +{"current_steps": 3808, "total_steps": 5676, "loss": 0.5305753946304321, "lr": 5.365869324688591e-06, "epoch": 1.3418502202643172, "percentage": 67.09, "elapsed_time": "5:36:11", "remaining_time": "2:44:54"} +{"current_steps": 3809, "total_steps": 5676, "loss": 0.4002259373664856, "lr": 5.360707095211566e-06, "epoch": 1.3422026431718062, "percentage": 67.11, "elapsed_time": "5:36:17", "remaining_time": "2:44:49"} +{"current_steps": 3810, "total_steps": 5676, "loss": 0.5211426615715027, "lr": 5.3555464406300965e-06, "epoch": 1.342555066079295, "percentage": 67.12, "elapsed_time": "5:36:22", "remaining_time": "2:44:44"} +{"current_steps": 3811, "total_steps": 5676, "loss": 0.5998013019561768, "lr": 5.350387362696077e-06, "epoch": 1.3429074889867842, "percentage": 67.14, "elapsed_time": "5:36:27", "remaining_time": "2:44:39"} +{"current_steps": 3812, "total_steps": 5676, "loss": 0.5330953598022461, "lr": 5.345229863160839e-06, "epoch": 1.3432599118942732, "percentage": 67.16, "elapsed_time": "5:36:33", "remaining_time": "2:44:34"} +{"current_steps": 3813, "total_steps": 5676, "loss": 0.6999118328094482, "lr": 5.340073943775206e-06, "epoch": 1.3436123348017621, "percentage": 67.18, "elapsed_time": "5:36:39", "remaining_time": "2:44:29"} +{"current_steps": 3814, "total_steps": 5676, "loss": 0.6286367177963257, "lr": 5.334919606289446e-06, "epoch": 1.343964757709251, "percentage": 67.2, "elapsed_time": "5:36:45", "remaining_time": "2:44:24"} +{"current_steps": 3815, "total_steps": 5676, "loss": 0.5793008804321289, "lr": 5.329766852453296e-06, "epoch": 1.34431718061674, "percentage": 67.21, "elapsed_time": "5:36:50", "remaining_time": "2:44:19"} +{"current_steps": 3816, "total_steps": 5676, "loss": 0.5811383128166199, "lr": 5.324615684015957e-06, "epoch": 1.3446696035242292, "percentage": 67.23, "elapsed_time": "5:36:56", "remaining_time": "2:44:13"} +{"current_steps": 3817, "total_steps": 5676, "loss": 0.7389675378799438, "lr": 5.319466102726087e-06, "epoch": 1.345022026431718, "percentage": 67.25, "elapsed_time": "5:37:00", "remaining_time": "2:44:08"} +{"current_steps": 3818, "total_steps": 5676, "loss": 0.6105868220329285, "lr": 5.314318110331815e-06, "epoch": 1.345374449339207, "percentage": 67.27, "elapsed_time": "5:37:05", "remaining_time": "2:44:02"} +{"current_steps": 3819, "total_steps": 5676, "loss": 0.5979465842247009, "lr": 5.3091717085807235e-06, "epoch": 1.345726872246696, "percentage": 67.28, "elapsed_time": "5:37:09", "remaining_time": "2:43:56"} +{"current_steps": 3820, "total_steps": 5676, "loss": 0.6722681522369385, "lr": 5.304026899219846e-06, "epoch": 1.3460792951541851, "percentage": 67.3, "elapsed_time": "5:37:16", "remaining_time": "2:43:52"} +{"current_steps": 3821, "total_steps": 5676, "loss": 0.4687497913837433, "lr": 5.298883683995697e-06, "epoch": 1.346431718061674, "percentage": 67.32, "elapsed_time": "5:37:22", "remaining_time": "2:43:47"} +{"current_steps": 3822, "total_steps": 5676, "loss": 0.563692569732666, "lr": 5.29374206465423e-06, "epoch": 1.346784140969163, "percentage": 67.34, "elapsed_time": "5:37:26", "remaining_time": "2:43:41"} +{"current_steps": 3823, "total_steps": 5676, "loss": 0.604897141456604, "lr": 5.2886020429408716e-06, "epoch": 1.347136563876652, "percentage": 67.35, "elapsed_time": "5:37:33", "remaining_time": "2:43:36"} +{"current_steps": 3824, "total_steps": 5676, "loss": 0.6270164251327515, "lr": 5.283463620600493e-06, "epoch": 1.3474889867841409, "percentage": 67.37, "elapsed_time": "5:37:37", "remaining_time": "2:43:31"} +{"current_steps": 3825, "total_steps": 5676, "loss": 0.6487830877304077, "lr": 5.278326799377428e-06, "epoch": 1.34784140969163, "percentage": 67.39, "elapsed_time": "5:37:42", "remaining_time": "2:43:25"} +{"current_steps": 3826, "total_steps": 5676, "loss": 0.5816935896873474, "lr": 5.273191581015474e-06, "epoch": 1.348193832599119, "percentage": 67.41, "elapsed_time": "5:37:47", "remaining_time": "2:43:20"} +{"current_steps": 3827, "total_steps": 5676, "loss": 0.6281115412712097, "lr": 5.26805796725788e-06, "epoch": 1.348546255506608, "percentage": 67.42, "elapsed_time": "5:37:52", "remaining_time": "2:43:14"} +{"current_steps": 3828, "total_steps": 5676, "loss": 0.5031973123550415, "lr": 5.2629259598473335e-06, "epoch": 1.348898678414097, "percentage": 67.44, "elapsed_time": "5:37:57", "remaining_time": "2:43:08"} +{"current_steps": 3829, "total_steps": 5676, "loss": 0.6220165491104126, "lr": 5.257795560526005e-06, "epoch": 1.349251101321586, "percentage": 67.46, "elapsed_time": "5:38:02", "remaining_time": "2:43:03"} +{"current_steps": 3830, "total_steps": 5676, "loss": 0.6451058387756348, "lr": 5.2526667710354995e-06, "epoch": 1.349603524229075, "percentage": 67.48, "elapsed_time": "5:38:07", "remaining_time": "2:42:58"} +{"current_steps": 3831, "total_steps": 5676, "loss": 0.7524863481521606, "lr": 5.247539593116884e-06, "epoch": 1.3499559471365639, "percentage": 67.49, "elapsed_time": "5:38:12", "remaining_time": "2:42:52"} +{"current_steps": 3832, "total_steps": 5676, "loss": 0.6270921230316162, "lr": 5.242414028510674e-06, "epoch": 1.3503083700440528, "percentage": 67.51, "elapsed_time": "5:38:18", "remaining_time": "2:42:47"} +{"current_steps": 3833, "total_steps": 5676, "loss": 0.6196550130844116, "lr": 5.237290078956836e-06, "epoch": 1.3506607929515417, "percentage": 67.53, "elapsed_time": "5:38:22", "remaining_time": "2:42:42"} +{"current_steps": 3834, "total_steps": 5676, "loss": 0.8512230515480042, "lr": 5.232167746194798e-06, "epoch": 1.351013215859031, "percentage": 67.55, "elapsed_time": "5:38:27", "remaining_time": "2:42:36"} +{"current_steps": 3835, "total_steps": 5676, "loss": 0.5196807980537415, "lr": 5.227047031963435e-06, "epoch": 1.3513656387665198, "percentage": 67.57, "elapsed_time": "5:38:31", "remaining_time": "2:42:30"} +{"current_steps": 3836, "total_steps": 5676, "loss": 0.6713111400604248, "lr": 5.2219279380010565e-06, "epoch": 1.3517180616740088, "percentage": 67.58, "elapsed_time": "5:38:36", "remaining_time": "2:42:24"} +{"current_steps": 3837, "total_steps": 5676, "loss": 0.7150874137878418, "lr": 5.216810466045448e-06, "epoch": 1.352070484581498, "percentage": 67.6, "elapsed_time": "5:38:40", "remaining_time": "2:42:19"} +{"current_steps": 3838, "total_steps": 5676, "loss": 0.5812375545501709, "lr": 5.211694617833827e-06, "epoch": 1.3524229074889869, "percentage": 67.62, "elapsed_time": "5:38:46", "remaining_time": "2:42:14"} +{"current_steps": 3839, "total_steps": 5676, "loss": 0.5842182040214539, "lr": 5.2065803951028675e-06, "epoch": 1.3527753303964758, "percentage": 67.64, "elapsed_time": "5:38:52", "remaining_time": "2:42:09"} +{"current_steps": 3840, "total_steps": 5676, "loss": 0.5432665348052979, "lr": 5.201467799588685e-06, "epoch": 1.3531277533039647, "percentage": 67.65, "elapsed_time": "5:38:56", "remaining_time": "2:42:03"} +{"current_steps": 3841, "total_steps": 5676, "loss": 0.551771879196167, "lr": 5.196356833026845e-06, "epoch": 1.3534801762114537, "percentage": 67.67, "elapsed_time": "5:39:02", "remaining_time": "2:41:58"} +{"current_steps": 3842, "total_steps": 5676, "loss": 0.6961710453033447, "lr": 5.19124749715237e-06, "epoch": 1.3538325991189426, "percentage": 67.69, "elapsed_time": "5:39:08", "remaining_time": "2:41:53"} +{"current_steps": 3843, "total_steps": 5676, "loss": 0.7105714678764343, "lr": 5.18613979369972e-06, "epoch": 1.3541850220264318, "percentage": 67.71, "elapsed_time": "5:39:13", "remaining_time": "2:41:48"} +{"current_steps": 3844, "total_steps": 5676, "loss": 0.7100229263305664, "lr": 5.181033724402789e-06, "epoch": 1.3545374449339207, "percentage": 67.72, "elapsed_time": "5:39:19", "remaining_time": "2:41:42"} +{"current_steps": 3845, "total_steps": 5676, "loss": 0.651812732219696, "lr": 5.175929290994941e-06, "epoch": 1.3548898678414096, "percentage": 67.74, "elapsed_time": "5:39:24", "remaining_time": "2:41:37"} +{"current_steps": 3846, "total_steps": 5676, "loss": 0.5194147825241089, "lr": 5.170826495208967e-06, "epoch": 1.3552422907488988, "percentage": 67.76, "elapsed_time": "5:39:29", "remaining_time": "2:41:31"} +{"current_steps": 3847, "total_steps": 5676, "loss": 0.5939956307411194, "lr": 5.16572533877711e-06, "epoch": 1.3555947136563877, "percentage": 67.78, "elapsed_time": "5:39:34", "remaining_time": "2:41:26"} +{"current_steps": 3848, "total_steps": 5676, "loss": 0.6434104442596436, "lr": 5.160625823431051e-06, "epoch": 1.3559471365638767, "percentage": 67.79, "elapsed_time": "5:39:39", "remaining_time": "2:41:21"} +{"current_steps": 3849, "total_steps": 5676, "loss": 0.5256108045578003, "lr": 5.155527950901914e-06, "epoch": 1.3562995594713656, "percentage": 67.81, "elapsed_time": "5:39:45", "remaining_time": "2:41:16"} +{"current_steps": 3850, "total_steps": 5676, "loss": 0.5632717609405518, "lr": 5.150431722920277e-06, "epoch": 1.3566519823788545, "percentage": 67.83, "elapsed_time": "5:39:50", "remaining_time": "2:41:11"} +{"current_steps": 3851, "total_steps": 5676, "loss": 0.5964382886886597, "lr": 5.145337141216149e-06, "epoch": 1.3570044052863435, "percentage": 67.85, "elapsed_time": "5:39:56", "remaining_time": "2:41:05"} +{"current_steps": 3852, "total_steps": 5676, "loss": 0.7268366813659668, "lr": 5.140244207518971e-06, "epoch": 1.3573568281938326, "percentage": 67.86, "elapsed_time": "5:40:01", "remaining_time": "2:41:00"} +{"current_steps": 3853, "total_steps": 5676, "loss": 0.7376477122306824, "lr": 5.135152923557647e-06, "epoch": 1.3577092511013216, "percentage": 67.88, "elapsed_time": "5:40:06", "remaining_time": "2:40:55"} +{"current_steps": 3854, "total_steps": 5676, "loss": 0.50569748878479, "lr": 5.130063291060505e-06, "epoch": 1.3580616740088105, "percentage": 67.9, "elapsed_time": "5:40:10", "remaining_time": "2:40:49"} +{"current_steps": 3855, "total_steps": 5676, "loss": 0.5639374256134033, "lr": 5.12497531175532e-06, "epoch": 1.3584140969162997, "percentage": 67.92, "elapsed_time": "5:40:16", "remaining_time": "2:40:44"} +{"current_steps": 3856, "total_steps": 5676, "loss": 0.5051915645599365, "lr": 5.1198889873692994e-06, "epoch": 1.3587665198237886, "percentage": 67.94, "elapsed_time": "5:40:22", "remaining_time": "2:40:39"} +{"current_steps": 3857, "total_steps": 5676, "loss": 0.4718795120716095, "lr": 5.114804319629088e-06, "epoch": 1.3591189427312775, "percentage": 67.95, "elapsed_time": "5:40:27", "remaining_time": "2:40:33"} +{"current_steps": 3858, "total_steps": 5676, "loss": 0.5684067606925964, "lr": 5.109721310260781e-06, "epoch": 1.3594713656387665, "percentage": 67.97, "elapsed_time": "5:40:32", "remaining_time": "2:40:28"} +{"current_steps": 3859, "total_steps": 5676, "loss": 0.5757609605789185, "lr": 5.104639960989903e-06, "epoch": 1.3598237885462554, "percentage": 67.99, "elapsed_time": "5:40:39", "remaining_time": "2:40:23"} +{"current_steps": 3860, "total_steps": 5676, "loss": 0.5971167087554932, "lr": 5.099560273541401e-06, "epoch": 1.3601762114537446, "percentage": 68.01, "elapsed_time": "5:40:45", "remaining_time": "2:40:18"} +{"current_steps": 3861, "total_steps": 5676, "loss": 0.6959896683692932, "lr": 5.094482249639683e-06, "epoch": 1.3605286343612335, "percentage": 68.02, "elapsed_time": "5:40:51", "remaining_time": "2:40:14"} +{"current_steps": 3862, "total_steps": 5676, "loss": 0.6954548358917236, "lr": 5.089405891008574e-06, "epoch": 1.3608810572687224, "percentage": 68.04, "elapsed_time": "5:40:56", "remaining_time": "2:40:08"} +{"current_steps": 3863, "total_steps": 5676, "loss": 0.5659986138343811, "lr": 5.084331199371343e-06, "epoch": 1.3612334801762114, "percentage": 68.06, "elapsed_time": "5:41:02", "remaining_time": "2:40:03"} +{"current_steps": 3864, "total_steps": 5676, "loss": 0.5582559108734131, "lr": 5.079258176450687e-06, "epoch": 1.3615859030837005, "percentage": 68.08, "elapsed_time": "5:41:07", "remaining_time": "2:39:57"} +{"current_steps": 3865, "total_steps": 5676, "loss": 0.5337075591087341, "lr": 5.0741868239687395e-06, "epoch": 1.3619383259911895, "percentage": 68.09, "elapsed_time": "5:41:13", "remaining_time": "2:39:53"} +{"current_steps": 3866, "total_steps": 5676, "loss": 0.621441125869751, "lr": 5.069117143647075e-06, "epoch": 1.3622907488986784, "percentage": 68.11, "elapsed_time": "5:41:19", "remaining_time": "2:39:48"} +{"current_steps": 3867, "total_steps": 5676, "loss": 0.5476670861244202, "lr": 5.064049137206677e-06, "epoch": 1.3626431718061673, "percentage": 68.13, "elapsed_time": "5:41:24", "remaining_time": "2:39:42"} +{"current_steps": 3868, "total_steps": 5676, "loss": 0.5357356071472168, "lr": 5.058982806367989e-06, "epoch": 1.3629955947136563, "percentage": 68.15, "elapsed_time": "5:41:30", "remaining_time": "2:39:37"} +{"current_steps": 3869, "total_steps": 5676, "loss": 0.5722761750221252, "lr": 5.053918152850868e-06, "epoch": 1.3633480176211454, "percentage": 68.16, "elapsed_time": "5:41:35", "remaining_time": "2:39:32"} +{"current_steps": 3870, "total_steps": 5676, "loss": 0.7271207571029663, "lr": 5.048855178374606e-06, "epoch": 1.3637004405286344, "percentage": 68.18, "elapsed_time": "5:41:41", "remaining_time": "2:39:27"} +{"current_steps": 3871, "total_steps": 5676, "loss": 0.6213557720184326, "lr": 5.043793884657926e-06, "epoch": 1.3640528634361233, "percentage": 68.2, "elapsed_time": "5:41:47", "remaining_time": "2:39:22"} +{"current_steps": 3872, "total_steps": 5676, "loss": 0.6509476900100708, "lr": 5.03873427341898e-06, "epoch": 1.3644052863436125, "percentage": 68.22, "elapsed_time": "5:41:52", "remaining_time": "2:39:16"} +{"current_steps": 3873, "total_steps": 5676, "loss": 0.5321642756462097, "lr": 5.0336763463753425e-06, "epoch": 1.3647577092511014, "percentage": 68.23, "elapsed_time": "5:41:58", "remaining_time": "2:39:11"} +{"current_steps": 3874, "total_steps": 5676, "loss": 0.7237476110458374, "lr": 5.028620105244035e-06, "epoch": 1.3651101321585903, "percentage": 68.25, "elapsed_time": "5:42:03", "remaining_time": "2:39:06"} +{"current_steps": 3875, "total_steps": 5676, "loss": 0.6653447151184082, "lr": 5.0235655517414805e-06, "epoch": 1.3654625550660793, "percentage": 68.27, "elapsed_time": "5:42:09", "remaining_time": "2:39:01"} +{"current_steps": 3876, "total_steps": 5676, "loss": 0.6188938617706299, "lr": 5.018512687583552e-06, "epoch": 1.3658149779735682, "percentage": 68.29, "elapsed_time": "5:42:14", "remaining_time": "2:38:56"} +{"current_steps": 3877, "total_steps": 5676, "loss": 0.6341606378555298, "lr": 5.013461514485536e-06, "epoch": 1.3661674008810571, "percentage": 68.31, "elapsed_time": "5:42:21", "remaining_time": "2:38:51"} +{"current_steps": 3878, "total_steps": 5676, "loss": 0.6148994565010071, "lr": 5.00841203416215e-06, "epoch": 1.3665198237885463, "percentage": 68.32, "elapsed_time": "5:42:26", "remaining_time": "2:38:46"} +{"current_steps": 3879, "total_steps": 5676, "loss": 0.6292222142219543, "lr": 5.003364248327533e-06, "epoch": 1.3668722466960352, "percentage": 68.34, "elapsed_time": "5:42:31", "remaining_time": "2:38:40"} +{"current_steps": 3880, "total_steps": 5676, "loss": 0.6648836135864258, "lr": 4.998318158695255e-06, "epoch": 1.3672246696035242, "percentage": 68.36, "elapsed_time": "5:42:38", "remaining_time": "2:38:36"} +{"current_steps": 3881, "total_steps": 5676, "loss": 0.5175273418426514, "lr": 4.993273766978297e-06, "epoch": 1.3675770925110133, "percentage": 68.38, "elapsed_time": "5:42:42", "remaining_time": "2:38:30"} +{"current_steps": 3882, "total_steps": 5676, "loss": 0.5686253309249878, "lr": 4.98823107488909e-06, "epoch": 1.3679295154185023, "percentage": 68.39, "elapsed_time": "5:42:48", "remaining_time": "2:38:25"} +{"current_steps": 3883, "total_steps": 5676, "loss": 0.6128156185150146, "lr": 4.983190084139452e-06, "epoch": 1.3682819383259912, "percentage": 68.41, "elapsed_time": "5:42:53", "remaining_time": "2:38:19"} +{"current_steps": 3884, "total_steps": 5676, "loss": 0.6849625110626221, "lr": 4.978150796440656e-06, "epoch": 1.3686343612334801, "percentage": 68.43, "elapsed_time": "5:42:58", "remaining_time": "2:38:14"} +{"current_steps": 3885, "total_steps": 5676, "loss": 0.5735955238342285, "lr": 4.973113213503379e-06, "epoch": 1.368986784140969, "percentage": 68.45, "elapsed_time": "5:43:03", "remaining_time": "2:38:09"} +{"current_steps": 3886, "total_steps": 5676, "loss": 0.4584425091743469, "lr": 4.968077337037724e-06, "epoch": 1.369339207048458, "percentage": 68.46, "elapsed_time": "5:43:09", "remaining_time": "2:38:04"} +{"current_steps": 3887, "total_steps": 5676, "loss": 0.547109067440033, "lr": 4.963043168753212e-06, "epoch": 1.3696916299559472, "percentage": 68.48, "elapsed_time": "5:43:15", "remaining_time": "2:37:58"} +{"current_steps": 3888, "total_steps": 5676, "loss": 0.6966128349304199, "lr": 4.9580107103587895e-06, "epoch": 1.3700440528634361, "percentage": 68.5, "elapsed_time": "5:43:21", "remaining_time": "2:37:54"} +{"current_steps": 3889, "total_steps": 5676, "loss": 0.6275819540023804, "lr": 4.952979963562814e-06, "epoch": 1.370396475770925, "percentage": 68.52, "elapsed_time": "5:43:26", "remaining_time": "2:37:48"} +{"current_steps": 3890, "total_steps": 5676, "loss": 0.5678467750549316, "lr": 4.94795093007308e-06, "epoch": 1.3707488986784142, "percentage": 68.53, "elapsed_time": "5:43:30", "remaining_time": "2:37:42"} +{"current_steps": 3891, "total_steps": 5676, "loss": 0.6516115665435791, "lr": 4.942923611596772e-06, "epoch": 1.3711013215859031, "percentage": 68.55, "elapsed_time": "5:43:35", "remaining_time": "2:37:37"} +{"current_steps": 3892, "total_steps": 5676, "loss": 0.6279621124267578, "lr": 4.937898009840518e-06, "epoch": 1.371453744493392, "percentage": 68.57, "elapsed_time": "5:43:40", "remaining_time": "2:37:32"} +{"current_steps": 3893, "total_steps": 5676, "loss": 0.6123322248458862, "lr": 4.932874126510353e-06, "epoch": 1.371806167400881, "percentage": 68.59, "elapsed_time": "5:43:46", "remaining_time": "2:37:26"} +{"current_steps": 3894, "total_steps": 5676, "loss": 0.43412432074546814, "lr": 4.927851963311726e-06, "epoch": 1.37215859030837, "percentage": 68.6, "elapsed_time": "5:43:50", "remaining_time": "2:37:21"} +{"current_steps": 3895, "total_steps": 5676, "loss": 0.6582022905349731, "lr": 4.922831521949507e-06, "epoch": 1.372511013215859, "percentage": 68.62, "elapsed_time": "5:43:56", "remaining_time": "2:37:16"} +{"current_steps": 3896, "total_steps": 5676, "loss": 0.6219466328620911, "lr": 4.917812804127976e-06, "epoch": 1.372863436123348, "percentage": 68.64, "elapsed_time": "5:44:02", "remaining_time": "2:37:11"} +{"current_steps": 3897, "total_steps": 5676, "loss": 0.5352981090545654, "lr": 4.9127958115508365e-06, "epoch": 1.373215859030837, "percentage": 68.66, "elapsed_time": "5:44:07", "remaining_time": "2:37:05"} +{"current_steps": 3898, "total_steps": 5676, "loss": 0.47646182775497437, "lr": 4.907780545921205e-06, "epoch": 1.373568281938326, "percentage": 68.68, "elapsed_time": "5:44:12", "remaining_time": "2:37:00"} +{"current_steps": 3899, "total_steps": 5676, "loss": 0.5335453748703003, "lr": 4.902767008941594e-06, "epoch": 1.373920704845815, "percentage": 68.69, "elapsed_time": "5:44:18", "remaining_time": "2:36:55"} +{"current_steps": 3900, "total_steps": 5676, "loss": 0.576435923576355, "lr": 4.897755202313954e-06, "epoch": 1.374273127753304, "percentage": 68.71, "elapsed_time": "5:44:23", "remaining_time": "2:36:49"} +{"current_steps": 3901, "total_steps": 5676, "loss": 0.533431887626648, "lr": 4.8927451277396365e-06, "epoch": 1.374625550660793, "percentage": 68.73, "elapsed_time": "5:44:33", "remaining_time": "2:36:46"} +{"current_steps": 3902, "total_steps": 5676, "loss": 0.6892110109329224, "lr": 4.8877367869194035e-06, "epoch": 1.3749779735682819, "percentage": 68.75, "elapsed_time": "5:44:38", "remaining_time": "2:36:41"} +{"current_steps": 3903, "total_steps": 5676, "loss": 0.7028052806854248, "lr": 4.8827301815534335e-06, "epoch": 1.3753303964757708, "percentage": 68.76, "elapsed_time": "5:44:44", "remaining_time": "2:36:36"} +{"current_steps": 3904, "total_steps": 5676, "loss": 0.6883414387702942, "lr": 4.877725313341306e-06, "epoch": 1.37568281938326, "percentage": 68.78, "elapsed_time": "5:44:48", "remaining_time": "2:36:30"} +{"current_steps": 3905, "total_steps": 5676, "loss": 0.6712944507598877, "lr": 4.8727221839820285e-06, "epoch": 1.376035242290749, "percentage": 68.8, "elapsed_time": "5:44:52", "remaining_time": "2:36:24"} +{"current_steps": 3906, "total_steps": 5676, "loss": 0.6139085292816162, "lr": 4.867720795174006e-06, "epoch": 1.3763876651982379, "percentage": 68.82, "elapsed_time": "5:44:58", "remaining_time": "2:36:19"} +{"current_steps": 3907, "total_steps": 5676, "loss": 0.6463953256607056, "lr": 4.862721148615043e-06, "epoch": 1.3767400881057268, "percentage": 68.83, "elapsed_time": "5:45:03", "remaining_time": "2:36:14"} +{"current_steps": 3908, "total_steps": 5676, "loss": 0.6790587306022644, "lr": 4.857723246002376e-06, "epoch": 1.377092511013216, "percentage": 68.85, "elapsed_time": "5:45:08", "remaining_time": "2:36:08"} +{"current_steps": 3909, "total_steps": 5676, "loss": 0.4996854066848755, "lr": 4.852727089032634e-06, "epoch": 1.3774449339207049, "percentage": 68.87, "elapsed_time": "5:45:13", "remaining_time": "2:36:02"} +{"current_steps": 3910, "total_steps": 5676, "loss": 0.5826590061187744, "lr": 4.847732679401855e-06, "epoch": 1.3777973568281938, "percentage": 68.89, "elapsed_time": "5:45:18", "remaining_time": "2:35:57"} +{"current_steps": 3911, "total_steps": 5676, "loss": 0.5044558048248291, "lr": 4.842740018805489e-06, "epoch": 1.3781497797356828, "percentage": 68.9, "elapsed_time": "5:45:23", "remaining_time": "2:35:52"} +{"current_steps": 3912, "total_steps": 5676, "loss": 0.49022918939590454, "lr": 4.837749108938381e-06, "epoch": 1.3785022026431717, "percentage": 68.92, "elapsed_time": "5:45:27", "remaining_time": "2:35:46"} +{"current_steps": 3913, "total_steps": 5676, "loss": 0.7034850120544434, "lr": 4.832759951494798e-06, "epoch": 1.3788546255506609, "percentage": 68.94, "elapsed_time": "5:45:33", "remaining_time": "2:35:41"} +{"current_steps": 3914, "total_steps": 5676, "loss": 0.5835636854171753, "lr": 4.827772548168408e-06, "epoch": 1.3792070484581498, "percentage": 68.96, "elapsed_time": "5:45:39", "remaining_time": "2:35:36"} +{"current_steps": 3915, "total_steps": 5676, "loss": 0.6000608205795288, "lr": 4.822786900652262e-06, "epoch": 1.3795594713656387, "percentage": 68.97, "elapsed_time": "5:45:44", "remaining_time": "2:35:31"} +{"current_steps": 3916, "total_steps": 5676, "loss": 0.6121091842651367, "lr": 4.817803010638847e-06, "epoch": 1.3799118942731279, "percentage": 68.99, "elapsed_time": "5:45:49", "remaining_time": "2:35:25"} +{"current_steps": 3917, "total_steps": 5676, "loss": 0.457197904586792, "lr": 4.812820879820034e-06, "epoch": 1.3802643171806168, "percentage": 69.01, "elapsed_time": "5:45:54", "remaining_time": "2:35:20"} +{"current_steps": 3918, "total_steps": 5676, "loss": 0.6495843529701233, "lr": 4.807840509887102e-06, "epoch": 1.3806167400881058, "percentage": 69.03, "elapsed_time": "5:45:58", "remaining_time": "2:35:14"} +{"current_steps": 3919, "total_steps": 5676, "loss": 0.6245059967041016, "lr": 4.80286190253073e-06, "epoch": 1.3809691629955947, "percentage": 69.05, "elapsed_time": "5:46:03", "remaining_time": "2:35:08"} +{"current_steps": 3920, "total_steps": 5676, "loss": 0.5648606419563293, "lr": 4.797885059440998e-06, "epoch": 1.3813215859030836, "percentage": 69.06, "elapsed_time": "5:46:08", "remaining_time": "2:35:03"} +{"current_steps": 3921, "total_steps": 5676, "loss": 0.6593670845031738, "lr": 4.7929099823073945e-06, "epoch": 1.3816740088105726, "percentage": 69.08, "elapsed_time": "5:46:13", "remaining_time": "2:34:58"} +{"current_steps": 3922, "total_steps": 5676, "loss": 0.6400346159934998, "lr": 4.787936672818807e-06, "epoch": 1.3820264317180617, "percentage": 69.1, "elapsed_time": "5:46:19", "remaining_time": "2:34:52"} +{"current_steps": 3923, "total_steps": 5676, "loss": 0.6042170524597168, "lr": 4.782965132663505e-06, "epoch": 1.3823788546255507, "percentage": 69.12, "elapsed_time": "5:46:23", "remaining_time": "2:34:47"} +{"current_steps": 3924, "total_steps": 5676, "loss": 0.6224586963653564, "lr": 4.777995363529184e-06, "epoch": 1.3827312775330396, "percentage": 69.13, "elapsed_time": "5:46:28", "remaining_time": "2:34:41"} +{"current_steps": 3925, "total_steps": 5676, "loss": 0.6944444179534912, "lr": 4.7730273671029235e-06, "epoch": 1.3830837004405288, "percentage": 69.15, "elapsed_time": "5:46:33", "remaining_time": "2:34:36"} +{"current_steps": 3926, "total_steps": 5676, "loss": 0.5871950387954712, "lr": 4.768061145071201e-06, "epoch": 1.3834361233480177, "percentage": 69.17, "elapsed_time": "5:46:39", "remaining_time": "2:34:31"} +{"current_steps": 3927, "total_steps": 5676, "loss": 0.6438909769058228, "lr": 4.763096699119897e-06, "epoch": 1.3837885462555066, "percentage": 69.19, "elapsed_time": "5:46:45", "remaining_time": "2:34:26"} +{"current_steps": 3928, "total_steps": 5676, "loss": 0.6338443756103516, "lr": 4.75813403093428e-06, "epoch": 1.3841409691629956, "percentage": 69.2, "elapsed_time": "5:46:51", "remaining_time": "2:34:21"} +{"current_steps": 3929, "total_steps": 5676, "loss": 0.6343874931335449, "lr": 4.753173142199036e-06, "epoch": 1.3844933920704845, "percentage": 69.22, "elapsed_time": "5:46:56", "remaining_time": "2:34:15"} +{"current_steps": 3930, "total_steps": 5676, "loss": 0.5383629202842712, "lr": 4.7482140345982174e-06, "epoch": 1.3848458149779734, "percentage": 69.24, "elapsed_time": "5:47:02", "remaining_time": "2:34:10"} +{"current_steps": 3931, "total_steps": 5676, "loss": 0.5365063548088074, "lr": 4.743256709815289e-06, "epoch": 1.3851982378854626, "percentage": 69.26, "elapsed_time": "5:47:06", "remaining_time": "2:34:04"} +{"current_steps": 3932, "total_steps": 5676, "loss": 0.6310757398605347, "lr": 4.738301169533116e-06, "epoch": 1.3855506607929515, "percentage": 69.27, "elapsed_time": "5:47:10", "remaining_time": "2:33:59"} +{"current_steps": 3933, "total_steps": 5676, "loss": 0.7609038949012756, "lr": 4.733347415433946e-06, "epoch": 1.3859030837004405, "percentage": 69.29, "elapsed_time": "5:47:15", "remaining_time": "2:33:53"} +{"current_steps": 3934, "total_steps": 5676, "loss": 0.5837516784667969, "lr": 4.728395449199423e-06, "epoch": 1.3862555066079296, "percentage": 69.31, "elapsed_time": "5:47:19", "remaining_time": "2:33:47"} +{"current_steps": 3935, "total_steps": 5676, "loss": 0.6075407862663269, "lr": 4.7234452725105875e-06, "epoch": 1.3866079295154186, "percentage": 69.33, "elapsed_time": "5:47:25", "remaining_time": "2:33:42"} +{"current_steps": 3936, "total_steps": 5676, "loss": 0.5246843099594116, "lr": 4.718496887047864e-06, "epoch": 1.3869603524229075, "percentage": 69.34, "elapsed_time": "5:47:30", "remaining_time": "2:33:37"} +{"current_steps": 3937, "total_steps": 5676, "loss": 0.6256884336471558, "lr": 4.713550294491091e-06, "epoch": 1.3873127753303964, "percentage": 69.36, "elapsed_time": "5:47:35", "remaining_time": "2:33:32"} +{"current_steps": 3938, "total_steps": 5676, "loss": 0.5039727687835693, "lr": 4.708605496519467e-06, "epoch": 1.3876651982378854, "percentage": 69.38, "elapsed_time": "5:47:42", "remaining_time": "2:33:27"} +{"current_steps": 3939, "total_steps": 5676, "loss": 0.5302769541740417, "lr": 4.703662494811599e-06, "epoch": 1.3880176211453745, "percentage": 69.4, "elapsed_time": "5:47:47", "remaining_time": "2:33:21"} +{"current_steps": 3940, "total_steps": 5676, "loss": 0.654889702796936, "lr": 4.698721291045491e-06, "epoch": 1.3883700440528635, "percentage": 69.42, "elapsed_time": "5:47:53", "remaining_time": "2:33:17"} +{"current_steps": 3941, "total_steps": 5676, "loss": 0.5571156740188599, "lr": 4.693781886898521e-06, "epoch": 1.3887224669603524, "percentage": 69.43, "elapsed_time": "5:47:58", "remaining_time": "2:33:11"} +{"current_steps": 3942, "total_steps": 5676, "loss": 0.489155113697052, "lr": 4.688844284047466e-06, "epoch": 1.3890748898678413, "percentage": 69.45, "elapsed_time": "5:48:02", "remaining_time": "2:33:05"} +{"current_steps": 3943, "total_steps": 5676, "loss": 0.6422649621963501, "lr": 4.683908484168487e-06, "epoch": 1.3894273127753305, "percentage": 69.47, "elapsed_time": "5:48:08", "remaining_time": "2:33:00"} +{"current_steps": 3944, "total_steps": 5676, "loss": 0.6800041794776917, "lr": 4.67897448893713e-06, "epoch": 1.3897797356828194, "percentage": 69.49, "elapsed_time": "5:48:13", "remaining_time": "2:32:55"} +{"current_steps": 3945, "total_steps": 5676, "loss": 0.6091655492782593, "lr": 4.674042300028345e-06, "epoch": 1.3901321585903084, "percentage": 69.5, "elapsed_time": "5:48:18", "remaining_time": "2:32:49"} +{"current_steps": 3946, "total_steps": 5676, "loss": 0.6217864751815796, "lr": 4.669111919116442e-06, "epoch": 1.3904845814977973, "percentage": 69.52, "elapsed_time": "5:48:24", "remaining_time": "2:32:45"} +{"current_steps": 3947, "total_steps": 5676, "loss": 0.6140862703323364, "lr": 4.664183347875144e-06, "epoch": 1.3908370044052862, "percentage": 69.54, "elapsed_time": "5:48:30", "remaining_time": "2:32:39"} +{"current_steps": 3948, "total_steps": 5676, "loss": 0.5485835075378418, "lr": 4.659256587977542e-06, "epoch": 1.3911894273127754, "percentage": 69.56, "elapsed_time": "5:48:35", "remaining_time": "2:32:34"} +{"current_steps": 3949, "total_steps": 5676, "loss": 0.642849862575531, "lr": 4.654331641096118e-06, "epoch": 1.3915418502202643, "percentage": 69.57, "elapsed_time": "5:48:40", "remaining_time": "2:32:29"} +{"current_steps": 3950, "total_steps": 5676, "loss": 0.7084407806396484, "lr": 4.649408508902739e-06, "epoch": 1.3918942731277533, "percentage": 69.59, "elapsed_time": "5:48:45", "remaining_time": "2:32:23"} +{"current_steps": 3951, "total_steps": 5676, "loss": 0.4798510670661926, "lr": 4.644487193068653e-06, "epoch": 1.3922466960352424, "percentage": 69.61, "elapsed_time": "5:48:50", "remaining_time": "2:32:18"} +{"current_steps": 3952, "total_steps": 5676, "loss": 0.6350974440574646, "lr": 4.639567695264493e-06, "epoch": 1.3925991189427314, "percentage": 69.63, "elapsed_time": "5:48:54", "remaining_time": "2:32:12"} +{"current_steps": 3953, "total_steps": 5676, "loss": 0.6046940684318542, "lr": 4.634650017160285e-06, "epoch": 1.3929515418502203, "percentage": 69.64, "elapsed_time": "5:49:00", "remaining_time": "2:32:07"} +{"current_steps": 3954, "total_steps": 5676, "loss": 0.5262438058853149, "lr": 4.629734160425412e-06, "epoch": 1.3933039647577092, "percentage": 69.66, "elapsed_time": "5:49:06", "remaining_time": "2:32:02"} +{"current_steps": 3955, "total_steps": 5676, "loss": 0.4836997985839844, "lr": 4.6248201267286666e-06, "epoch": 1.3936563876651982, "percentage": 69.68, "elapsed_time": "5:49:12", "remaining_time": "2:31:57"} +{"current_steps": 3956, "total_steps": 5676, "loss": 0.5491573810577393, "lr": 4.619907917738206e-06, "epoch": 1.394008810572687, "percentage": 69.7, "elapsed_time": "5:49:16", "remaining_time": "2:31:51"} +{"current_steps": 3957, "total_steps": 5676, "loss": 0.5778772830963135, "lr": 4.614997535121574e-06, "epoch": 1.3943612334801763, "percentage": 69.71, "elapsed_time": "5:49:22", "remaining_time": "2:31:46"} +{"current_steps": 3958, "total_steps": 5676, "loss": 0.6235651969909668, "lr": 4.61008898054569e-06, "epoch": 1.3947136563876652, "percentage": 69.73, "elapsed_time": "5:49:28", "remaining_time": "2:31:41"} +{"current_steps": 3959, "total_steps": 5676, "loss": 0.5192956924438477, "lr": 4.605182255676857e-06, "epoch": 1.3950660792951541, "percentage": 69.75, "elapsed_time": "5:49:34", "remaining_time": "2:31:36"} +{"current_steps": 3960, "total_steps": 5676, "loss": 0.5652563571929932, "lr": 4.600277362180753e-06, "epoch": 1.3954185022026433, "percentage": 69.77, "elapsed_time": "5:49:40", "remaining_time": "2:31:31"} +{"current_steps": 3961, "total_steps": 5676, "loss": 0.6451884508132935, "lr": 4.595374301722445e-06, "epoch": 1.3957709251101322, "percentage": 69.79, "elapsed_time": "5:49:44", "remaining_time": "2:31:25"} +{"current_steps": 3962, "total_steps": 5676, "loss": 0.6358006000518799, "lr": 4.5904730759663555e-06, "epoch": 1.3961233480176212, "percentage": 69.8, "elapsed_time": "5:49:50", "remaining_time": "2:31:20"} +{"current_steps": 3963, "total_steps": 5676, "loss": 0.6122751832008362, "lr": 4.5855736865763104e-06, "epoch": 1.39647577092511, "percentage": 69.82, "elapsed_time": "5:49:55", "remaining_time": "2:31:15"} +{"current_steps": 3964, "total_steps": 5676, "loss": 0.5563797354698181, "lr": 4.580676135215495e-06, "epoch": 1.396828193832599, "percentage": 69.84, "elapsed_time": "5:50:00", "remaining_time": "2:31:09"} +{"current_steps": 3965, "total_steps": 5676, "loss": 0.5915960669517517, "lr": 4.575780423546476e-06, "epoch": 1.397180616740088, "percentage": 69.86, "elapsed_time": "5:50:06", "remaining_time": "2:31:05"} +{"current_steps": 3966, "total_steps": 5676, "loss": 0.5755159854888916, "lr": 4.570886553231196e-06, "epoch": 1.3975330396475771, "percentage": 69.87, "elapsed_time": "5:50:11", "remaining_time": "2:30:59"} +{"current_steps": 3967, "total_steps": 5676, "loss": 0.7017625570297241, "lr": 4.565994525930967e-06, "epoch": 1.397885462555066, "percentage": 69.89, "elapsed_time": "5:50:16", "remaining_time": "2:30:54"} +{"current_steps": 3968, "total_steps": 5676, "loss": 0.7623441815376282, "lr": 4.5611043433064875e-06, "epoch": 1.398237885462555, "percentage": 69.91, "elapsed_time": "5:50:21", "remaining_time": "2:30:48"} +{"current_steps": 3969, "total_steps": 5676, "loss": 0.5561864376068115, "lr": 4.556216007017822e-06, "epoch": 1.3985903083700442, "percentage": 69.93, "elapsed_time": "5:50:26", "remaining_time": "2:30:43"} +{"current_steps": 3970, "total_steps": 5676, "loss": 0.516582727432251, "lr": 4.5513295187243975e-06, "epoch": 1.398942731277533, "percentage": 69.94, "elapsed_time": "5:50:32", "remaining_time": "2:30:38"} +{"current_steps": 3971, "total_steps": 5676, "loss": 0.6324976682662964, "lr": 4.5464448800850366e-06, "epoch": 1.399295154185022, "percentage": 69.96, "elapsed_time": "5:50:39", "remaining_time": "2:30:33"} +{"current_steps": 3972, "total_steps": 5676, "loss": 0.5926251411437988, "lr": 4.541562092757918e-06, "epoch": 1.399647577092511, "percentage": 69.98, "elapsed_time": "5:50:45", "remaining_time": "2:30:28"} +{"current_steps": 3973, "total_steps": 5676, "loss": 0.5677082538604736, "lr": 4.536681158400598e-06, "epoch": 1.4, "percentage": 70.0, "elapsed_time": "5:50:49", "remaining_time": "2:30:22"} +{"current_steps": 3974, "total_steps": 5676, "loss": 0.5267887115478516, "lr": 4.531802078669997e-06, "epoch": 1.400352422907489, "percentage": 70.01, "elapsed_time": "5:50:56", "remaining_time": "2:30:17"} +{"current_steps": 3975, "total_steps": 5676, "loss": 0.6361796855926514, "lr": 4.526924855222411e-06, "epoch": 1.400704845814978, "percentage": 70.03, "elapsed_time": "5:51:00", "remaining_time": "2:30:12"} +{"current_steps": 3976, "total_steps": 5676, "loss": 0.5906916856765747, "lr": 4.522049489713513e-06, "epoch": 1.401057268722467, "percentage": 70.05, "elapsed_time": "5:51:04", "remaining_time": "2:30:06"} +{"current_steps": 3977, "total_steps": 5676, "loss": 0.647320568561554, "lr": 4.517175983798334e-06, "epoch": 1.4014096916299559, "percentage": 70.07, "elapsed_time": "5:51:08", "remaining_time": "2:30:00"} +{"current_steps": 3978, "total_steps": 5676, "loss": 0.6129240989685059, "lr": 4.512304339131271e-06, "epoch": 1.401762114537445, "percentage": 70.08, "elapsed_time": "5:51:13", "remaining_time": "2:29:55"} +{"current_steps": 3979, "total_steps": 5676, "loss": 0.5550417900085449, "lr": 4.507434557366106e-06, "epoch": 1.402114537444934, "percentage": 70.1, "elapsed_time": "5:51:18", "remaining_time": "2:29:49"} +{"current_steps": 3980, "total_steps": 5676, "loss": 0.5677829384803772, "lr": 4.502566640155972e-06, "epoch": 1.402466960352423, "percentage": 70.12, "elapsed_time": "5:51:23", "remaining_time": "2:29:44"} +{"current_steps": 3981, "total_steps": 5676, "loss": 0.6627114415168762, "lr": 4.497700589153379e-06, "epoch": 1.4028193832599118, "percentage": 70.14, "elapsed_time": "5:51:29", "remaining_time": "2:29:39"} +{"current_steps": 3982, "total_steps": 5676, "loss": 0.7225712537765503, "lr": 4.492836406010197e-06, "epoch": 1.4031718061674008, "percentage": 70.16, "elapsed_time": "5:51:34", "remaining_time": "2:29:33"} +{"current_steps": 3983, "total_steps": 5676, "loss": 0.5259708762168884, "lr": 4.487974092377661e-06, "epoch": 1.40352422907489, "percentage": 70.17, "elapsed_time": "5:51:40", "remaining_time": "2:29:28"} +{"current_steps": 3984, "total_steps": 5676, "loss": 0.5509500503540039, "lr": 4.4831136499063856e-06, "epoch": 1.4038766519823789, "percentage": 70.19, "elapsed_time": "5:51:45", "remaining_time": "2:29:23"} +{"current_steps": 3985, "total_steps": 5676, "loss": 0.5436242818832397, "lr": 4.478255080246338e-06, "epoch": 1.4042290748898678, "percentage": 70.21, "elapsed_time": "5:51:51", "remaining_time": "2:29:18"} +{"current_steps": 3986, "total_steps": 5676, "loss": 0.591008186340332, "lr": 4.473398385046839e-06, "epoch": 1.4045814977973567, "percentage": 70.23, "elapsed_time": "5:51:56", "remaining_time": "2:29:13"} +{"current_steps": 3987, "total_steps": 5676, "loss": 0.7463438510894775, "lr": 4.4685435659565975e-06, "epoch": 1.404933920704846, "percentage": 70.24, "elapsed_time": "5:52:02", "remaining_time": "2:29:08"} +{"current_steps": 3988, "total_steps": 5676, "loss": 0.5672414898872375, "lr": 4.46369062462367e-06, "epoch": 1.4052863436123348, "percentage": 70.26, "elapsed_time": "5:52:09", "remaining_time": "2:29:03"} +{"current_steps": 3989, "total_steps": 5676, "loss": 0.6022762060165405, "lr": 4.458839562695481e-06, "epoch": 1.4056387665198238, "percentage": 70.28, "elapsed_time": "5:52:14", "remaining_time": "2:28:57"} +{"current_steps": 3990, "total_steps": 5676, "loss": 0.8312792181968689, "lr": 4.453990381818811e-06, "epoch": 1.4059911894273127, "percentage": 70.3, "elapsed_time": "5:52:19", "remaining_time": "2:28:52"} +{"current_steps": 3991, "total_steps": 5676, "loss": 0.475655198097229, "lr": 4.4491430836398055e-06, "epoch": 1.4063436123348017, "percentage": 70.31, "elapsed_time": "5:52:25", "remaining_time": "2:28:47"} +{"current_steps": 3992, "total_steps": 5676, "loss": 0.6317172050476074, "lr": 4.444297669803981e-06, "epoch": 1.4066960352422908, "percentage": 70.33, "elapsed_time": "5:52:31", "remaining_time": "2:28:42"} +{"current_steps": 3993, "total_steps": 5676, "loss": 0.5412036180496216, "lr": 4.439454141956194e-06, "epoch": 1.4070484581497797, "percentage": 70.35, "elapsed_time": "5:52:36", "remaining_time": "2:28:37"} +{"current_steps": 3994, "total_steps": 5676, "loss": 0.6401170492172241, "lr": 4.434612501740671e-06, "epoch": 1.4074008810572687, "percentage": 70.37, "elapsed_time": "5:52:42", "remaining_time": "2:28:32"} +{"current_steps": 3995, "total_steps": 5676, "loss": 0.6175628900527954, "lr": 4.429772750801007e-06, "epoch": 1.4077533039647578, "percentage": 70.38, "elapsed_time": "5:52:47", "remaining_time": "2:28:26"} +{"current_steps": 3996, "total_steps": 5676, "loss": 0.6875946521759033, "lr": 4.424934890780142e-06, "epoch": 1.4081057268722468, "percentage": 70.4, "elapsed_time": "5:52:52", "remaining_time": "2:28:21"} +{"current_steps": 3997, "total_steps": 5676, "loss": 0.6404017210006714, "lr": 4.420098923320378e-06, "epoch": 1.4084581497797357, "percentage": 70.42, "elapsed_time": "5:52:58", "remaining_time": "2:28:16"} +{"current_steps": 3998, "total_steps": 5676, "loss": 0.7569783329963684, "lr": 4.415264850063378e-06, "epoch": 1.4088105726872246, "percentage": 70.44, "elapsed_time": "5:53:02", "remaining_time": "2:28:10"} +{"current_steps": 3999, "total_steps": 5676, "loss": 0.6125228404998779, "lr": 4.410432672650154e-06, "epoch": 1.4091629955947136, "percentage": 70.45, "elapsed_time": "5:53:09", "remaining_time": "2:28:05"} +{"current_steps": 4000, "total_steps": 5676, "loss": 0.6187582015991211, "lr": 4.405602392721091e-06, "epoch": 1.4095154185022025, "percentage": 70.47, "elapsed_time": "5:53:14", "remaining_time": "2:28:00"} +{"current_steps": 4001, "total_steps": 5676, "loss": 0.6086148023605347, "lr": 4.400774011915907e-06, "epoch": 1.4098678414096917, "percentage": 70.49, "elapsed_time": "5:53:24", "remaining_time": "2:27:57"} +{"current_steps": 4002, "total_steps": 5676, "loss": 0.4140232801437378, "lr": 4.3959475318736885e-06, "epoch": 1.4102202643171806, "percentage": 70.51, "elapsed_time": "5:53:30", "remaining_time": "2:27:51"} +{"current_steps": 4003, "total_steps": 5676, "loss": 0.5065237879753113, "lr": 4.391122954232883e-06, "epoch": 1.4105726872246696, "percentage": 70.53, "elapsed_time": "5:53:35", "remaining_time": "2:27:46"} +{"current_steps": 4004, "total_steps": 5676, "loss": 0.4817734658718109, "lr": 4.386300280631279e-06, "epoch": 1.4109251101321587, "percentage": 70.54, "elapsed_time": "5:53:41", "remaining_time": "2:27:41"} +{"current_steps": 4005, "total_steps": 5676, "loss": 0.6339706778526306, "lr": 4.381479512706025e-06, "epoch": 1.4112775330396476, "percentage": 70.56, "elapsed_time": "5:53:47", "remaining_time": "2:27:36"} +{"current_steps": 4006, "total_steps": 5676, "loss": 0.6581720113754272, "lr": 4.376660652093621e-06, "epoch": 1.4116299559471366, "percentage": 70.58, "elapsed_time": "5:53:52", "remaining_time": "2:27:31"} +{"current_steps": 4007, "total_steps": 5676, "loss": 0.722156286239624, "lr": 4.3718437004299174e-06, "epoch": 1.4119823788546255, "percentage": 70.6, "elapsed_time": "5:53:58", "remaining_time": "2:27:26"} +{"current_steps": 4008, "total_steps": 5676, "loss": 0.5262913703918457, "lr": 4.36702865935013e-06, "epoch": 1.4123348017621145, "percentage": 70.61, "elapsed_time": "5:54:03", "remaining_time": "2:27:20"} +{"current_steps": 4009, "total_steps": 5676, "loss": 0.6242132186889648, "lr": 4.362215530488805e-06, "epoch": 1.4126872246696034, "percentage": 70.63, "elapsed_time": "5:54:08", "remaining_time": "2:27:15"} +{"current_steps": 4010, "total_steps": 5676, "loss": 0.48776593804359436, "lr": 4.35740431547985e-06, "epoch": 1.4130396475770926, "percentage": 70.65, "elapsed_time": "5:54:14", "remaining_time": "2:27:10"} +{"current_steps": 4011, "total_steps": 5676, "loss": 0.5528746843338013, "lr": 4.352595015956528e-06, "epoch": 1.4133920704845815, "percentage": 70.67, "elapsed_time": "5:54:19", "remaining_time": "2:27:04"} +{"current_steps": 4012, "total_steps": 5676, "loss": 0.6282942295074463, "lr": 4.347787633551444e-06, "epoch": 1.4137444933920704, "percentage": 70.68, "elapsed_time": "5:54:23", "remaining_time": "2:26:59"} +{"current_steps": 4013, "total_steps": 5676, "loss": 0.6113284826278687, "lr": 4.342982169896555e-06, "epoch": 1.4140969162995596, "percentage": 70.7, "elapsed_time": "5:54:27", "remaining_time": "2:26:53"} +{"current_steps": 4014, "total_steps": 5676, "loss": 0.5756875872612, "lr": 4.3381786266231685e-06, "epoch": 1.4144493392070485, "percentage": 70.72, "elapsed_time": "5:54:34", "remaining_time": "2:26:48"} +{"current_steps": 4015, "total_steps": 5676, "loss": 0.6180154085159302, "lr": 4.333377005361931e-06, "epoch": 1.4148017621145375, "percentage": 70.74, "elapsed_time": "5:54:38", "remaining_time": "2:26:42"} +{"current_steps": 4016, "total_steps": 5676, "loss": 0.5728827118873596, "lr": 4.328577307742855e-06, "epoch": 1.4151541850220264, "percentage": 70.75, "elapsed_time": "5:54:44", "remaining_time": "2:26:37"} +{"current_steps": 4017, "total_steps": 5676, "loss": 0.5248062014579773, "lr": 4.323779535395278e-06, "epoch": 1.4155066079295153, "percentage": 70.77, "elapsed_time": "5:54:49", "remaining_time": "2:26:32"} +{"current_steps": 4018, "total_steps": 5676, "loss": 0.5938228368759155, "lr": 4.318983689947895e-06, "epoch": 1.4158590308370045, "percentage": 70.79, "elapsed_time": "5:54:54", "remaining_time": "2:26:27"} +{"current_steps": 4019, "total_steps": 5676, "loss": 0.7085045576095581, "lr": 4.3141897730287544e-06, "epoch": 1.4162114537444934, "percentage": 70.81, "elapsed_time": "5:54:59", "remaining_time": "2:26:21"} +{"current_steps": 4020, "total_steps": 5676, "loss": 0.599969744682312, "lr": 4.309397786265235e-06, "epoch": 1.4165638766519824, "percentage": 70.82, "elapsed_time": "5:55:03", "remaining_time": "2:26:15"} +{"current_steps": 4021, "total_steps": 5676, "loss": 0.5784738063812256, "lr": 4.30460773128407e-06, "epoch": 1.4169162995594713, "percentage": 70.84, "elapsed_time": "5:55:08", "remaining_time": "2:26:10"} +{"current_steps": 4022, "total_steps": 5676, "loss": 0.6492793560028076, "lr": 4.299819609711332e-06, "epoch": 1.4172687224669605, "percentage": 70.86, "elapsed_time": "5:55:12", "remaining_time": "2:26:04"} +{"current_steps": 4023, "total_steps": 5676, "loss": 0.6690749526023865, "lr": 4.2950334231724375e-06, "epoch": 1.4176211453744494, "percentage": 70.88, "elapsed_time": "5:55:18", "remaining_time": "2:25:59"} +{"current_steps": 4024, "total_steps": 5676, "loss": 0.5919139981269836, "lr": 4.290249173292158e-06, "epoch": 1.4179735682819383, "percentage": 70.89, "elapsed_time": "5:55:23", "remaining_time": "2:25:53"} +{"current_steps": 4025, "total_steps": 5676, "loss": 0.5661630630493164, "lr": 4.285466861694583e-06, "epoch": 1.4183259911894273, "percentage": 70.91, "elapsed_time": "5:55:29", "remaining_time": "2:25:48"} +{"current_steps": 4026, "total_steps": 5676, "loss": 0.547730565071106, "lr": 4.280686490003169e-06, "epoch": 1.4186784140969162, "percentage": 70.93, "elapsed_time": "5:55:35", "remaining_time": "2:25:44"} +{"current_steps": 4027, "total_steps": 5676, "loss": 0.6150445938110352, "lr": 4.2759080598406985e-06, "epoch": 1.4190308370044054, "percentage": 70.95, "elapsed_time": "5:55:39", "remaining_time": "2:25:38"} +{"current_steps": 4028, "total_steps": 5676, "loss": 0.6522917747497559, "lr": 4.271131572829303e-06, "epoch": 1.4193832599118943, "percentage": 70.97, "elapsed_time": "5:55:44", "remaining_time": "2:25:32"} +{"current_steps": 4029, "total_steps": 5676, "loss": 0.8456230163574219, "lr": 4.266357030590449e-06, "epoch": 1.4197356828193832, "percentage": 70.98, "elapsed_time": "5:55:50", "remaining_time": "2:25:27"} +{"current_steps": 4030, "total_steps": 5676, "loss": 0.6059526801109314, "lr": 4.261584434744945e-06, "epoch": 1.4200881057268724, "percentage": 71.0, "elapsed_time": "5:55:56", "remaining_time": "2:25:22"} +{"current_steps": 4031, "total_steps": 5676, "loss": 0.6289907693862915, "lr": 4.256813786912937e-06, "epoch": 1.4204405286343613, "percentage": 71.02, "elapsed_time": "5:56:01", "remaining_time": "2:25:17"} +{"current_steps": 4032, "total_steps": 5676, "loss": 0.48954638838768005, "lr": 4.252045088713919e-06, "epoch": 1.4207929515418503, "percentage": 71.04, "elapsed_time": "5:56:06", "remaining_time": "2:25:12"} +{"current_steps": 4033, "total_steps": 5676, "loss": 0.6999461054801941, "lr": 4.2472783417667055e-06, "epoch": 1.4211453744493392, "percentage": 71.05, "elapsed_time": "5:56:11", "remaining_time": "2:25:06"} +{"current_steps": 4034, "total_steps": 5676, "loss": 0.5610899925231934, "lr": 4.242513547689466e-06, "epoch": 1.4214977973568281, "percentage": 71.07, "elapsed_time": "5:56:16", "remaining_time": "2:25:01"} +{"current_steps": 4035, "total_steps": 5676, "loss": 0.6240172386169434, "lr": 4.237750708099697e-06, "epoch": 1.421850220264317, "percentage": 71.09, "elapsed_time": "5:56:20", "remaining_time": "2:24:55"} +{"current_steps": 4036, "total_steps": 5676, "loss": 0.6368240118026733, "lr": 4.2329898246142356e-06, "epoch": 1.4222026431718062, "percentage": 71.11, "elapsed_time": "5:56:26", "remaining_time": "2:24:50"} +{"current_steps": 4037, "total_steps": 5676, "loss": 0.6242600679397583, "lr": 4.228230898849253e-06, "epoch": 1.4225550660792952, "percentage": 71.12, "elapsed_time": "5:56:31", "remaining_time": "2:24:44"} +{"current_steps": 4038, "total_steps": 5676, "loss": 0.6446138620376587, "lr": 4.223473932420255e-06, "epoch": 1.422907488986784, "percentage": 71.14, "elapsed_time": "5:56:37", "remaining_time": "2:24:39"} +{"current_steps": 4039, "total_steps": 5676, "loss": 0.7108229398727417, "lr": 4.218718926942081e-06, "epoch": 1.4232599118942733, "percentage": 71.16, "elapsed_time": "5:56:42", "remaining_time": "2:24:34"} +{"current_steps": 4040, "total_steps": 5676, "loss": 0.5279660820960999, "lr": 4.213965884028919e-06, "epoch": 1.4236123348017622, "percentage": 71.18, "elapsed_time": "5:56:47", "remaining_time": "2:24:28"} +{"current_steps": 4041, "total_steps": 5676, "loss": 0.6422853469848633, "lr": 4.209214805294264e-06, "epoch": 1.4239647577092511, "percentage": 71.19, "elapsed_time": "5:56:52", "remaining_time": "2:24:23"} +{"current_steps": 4042, "total_steps": 5676, "loss": 0.6355602741241455, "lr": 4.2044656923509704e-06, "epoch": 1.42431718061674, "percentage": 71.21, "elapsed_time": "5:56:57", "remaining_time": "2:24:18"} +{"current_steps": 4043, "total_steps": 5676, "loss": 0.5351370573043823, "lr": 4.19971854681121e-06, "epoch": 1.424669603524229, "percentage": 71.23, "elapsed_time": "5:57:02", "remaining_time": "2:24:12"} +{"current_steps": 4044, "total_steps": 5676, "loss": 0.5573978424072266, "lr": 4.194973370286492e-06, "epoch": 1.425022026431718, "percentage": 71.25, "elapsed_time": "5:57:06", "remaining_time": "2:24:06"} +{"current_steps": 4045, "total_steps": 5676, "loss": 0.5865412950515747, "lr": 4.1902301643876555e-06, "epoch": 1.425374449339207, "percentage": 71.26, "elapsed_time": "5:57:11", "remaining_time": "2:24:01"} +{"current_steps": 4046, "total_steps": 5676, "loss": 0.6231919527053833, "lr": 4.185488930724868e-06, "epoch": 1.425726872246696, "percentage": 71.28, "elapsed_time": "5:57:16", "remaining_time": "2:23:56"} +{"current_steps": 4047, "total_steps": 5676, "loss": 0.48964112997055054, "lr": 4.180749670907638e-06, "epoch": 1.426079295154185, "percentage": 71.3, "elapsed_time": "5:57:21", "remaining_time": "2:23:50"} +{"current_steps": 4048, "total_steps": 5676, "loss": 0.6299121975898743, "lr": 4.176012386544796e-06, "epoch": 1.4264317180616741, "percentage": 71.32, "elapsed_time": "5:57:27", "remaining_time": "2:23:45"} +{"current_steps": 4049, "total_steps": 5676, "loss": 0.5612789392471313, "lr": 4.171277079244492e-06, "epoch": 1.426784140969163, "percentage": 71.34, "elapsed_time": "5:57:32", "remaining_time": "2:23:40"} +{"current_steps": 4050, "total_steps": 5676, "loss": 0.4315321147441864, "lr": 4.166543750614227e-06, "epoch": 1.427136563876652, "percentage": 71.35, "elapsed_time": "5:57:37", "remaining_time": "2:23:34"} +{"current_steps": 4051, "total_steps": 5676, "loss": 0.6300851702690125, "lr": 4.1618124022608146e-06, "epoch": 1.427488986784141, "percentage": 71.37, "elapsed_time": "5:57:42", "remaining_time": "2:23:29"} +{"current_steps": 4052, "total_steps": 5676, "loss": 0.6258795261383057, "lr": 4.1570830357904e-06, "epoch": 1.4278414096916299, "percentage": 71.39, "elapsed_time": "5:57:48", "remaining_time": "2:23:24"} +{"current_steps": 4053, "total_steps": 5676, "loss": 0.7364479303359985, "lr": 4.152355652808457e-06, "epoch": 1.4281938325991188, "percentage": 71.41, "elapsed_time": "5:57:52", "remaining_time": "2:23:18"} +{"current_steps": 4054, "total_steps": 5676, "loss": 0.44845038652420044, "lr": 4.147630254919781e-06, "epoch": 1.428546255506608, "percentage": 71.42, "elapsed_time": "5:57:56", "remaining_time": "2:23:12"} +{"current_steps": 4055, "total_steps": 5676, "loss": 0.516815185546875, "lr": 4.142906843728504e-06, "epoch": 1.428898678414097, "percentage": 71.44, "elapsed_time": "5:58:02", "remaining_time": "2:23:07"} +{"current_steps": 4056, "total_steps": 5676, "loss": 0.6296960711479187, "lr": 4.138185420838079e-06, "epoch": 1.4292511013215858, "percentage": 71.46, "elapsed_time": "5:58:07", "remaining_time": "2:23:02"} +{"current_steps": 4057, "total_steps": 5676, "loss": 0.5709103345870972, "lr": 4.133465987851269e-06, "epoch": 1.429603524229075, "percentage": 71.48, "elapsed_time": "5:58:13", "remaining_time": "2:22:57"} +{"current_steps": 4058, "total_steps": 5676, "loss": 0.5672547817230225, "lr": 4.128748546370186e-06, "epoch": 1.429955947136564, "percentage": 71.49, "elapsed_time": "5:58:17", "remaining_time": "2:22:51"} +{"current_steps": 4059, "total_steps": 5676, "loss": 0.5927014350891113, "lr": 4.124033097996252e-06, "epoch": 1.4303083700440529, "percentage": 71.51, "elapsed_time": "5:58:21", "remaining_time": "2:22:45"} +{"current_steps": 4060, "total_steps": 5676, "loss": 0.7021238803863525, "lr": 4.119319644330214e-06, "epoch": 1.4306607929515418, "percentage": 71.53, "elapsed_time": "5:58:27", "remaining_time": "2:22:40"} +{"current_steps": 4061, "total_steps": 5676, "loss": 0.5940784215927124, "lr": 4.114608186972143e-06, "epoch": 1.4310132158590307, "percentage": 71.55, "elapsed_time": "5:58:32", "remaining_time": "2:22:35"} +{"current_steps": 4062, "total_steps": 5676, "loss": 0.46511122584342957, "lr": 4.109898727521429e-06, "epoch": 1.43136563876652, "percentage": 71.56, "elapsed_time": "5:58:37", "remaining_time": "2:22:29"} +{"current_steps": 4063, "total_steps": 5676, "loss": 0.4710976481437683, "lr": 4.105191267576797e-06, "epoch": 1.4317180616740088, "percentage": 71.58, "elapsed_time": "5:58:42", "remaining_time": "2:22:24"} +{"current_steps": 4064, "total_steps": 5676, "loss": 0.5947977900505066, "lr": 4.100485808736273e-06, "epoch": 1.4320704845814978, "percentage": 71.6, "elapsed_time": "5:58:47", "remaining_time": "2:22:19"} +{"current_steps": 4065, "total_steps": 5676, "loss": 0.6312115788459778, "lr": 4.095782352597214e-06, "epoch": 1.4324229074889867, "percentage": 71.62, "elapsed_time": "5:58:51", "remaining_time": "2:22:13"} +{"current_steps": 4066, "total_steps": 5676, "loss": 0.5709977149963379, "lr": 4.091080900756303e-06, "epoch": 1.4327753303964759, "percentage": 71.63, "elapsed_time": "5:58:56", "remaining_time": "2:22:07"} +{"current_steps": 4067, "total_steps": 5676, "loss": 0.6183716058731079, "lr": 4.086381454809535e-06, "epoch": 1.4331277533039648, "percentage": 71.65, "elapsed_time": "5:59:01", "remaining_time": "2:22:02"} +{"current_steps": 4068, "total_steps": 5676, "loss": 0.5139745473861694, "lr": 4.081684016352223e-06, "epoch": 1.4334801762114537, "percentage": 71.67, "elapsed_time": "5:59:06", "remaining_time": "2:21:56"} +{"current_steps": 4069, "total_steps": 5676, "loss": 0.6611173152923584, "lr": 4.076988586979004e-06, "epoch": 1.4338325991189427, "percentage": 71.69, "elapsed_time": "5:59:11", "remaining_time": "2:21:51"} +{"current_steps": 4070, "total_steps": 5676, "loss": 0.616943359375, "lr": 4.072295168283824e-06, "epoch": 1.4341850220264316, "percentage": 71.71, "elapsed_time": "5:59:16", "remaining_time": "2:21:46"} +{"current_steps": 4071, "total_steps": 5676, "loss": 0.5388625264167786, "lr": 4.067603761859965e-06, "epoch": 1.4345374449339208, "percentage": 71.72, "elapsed_time": "5:59:22", "remaining_time": "2:21:40"} +{"current_steps": 4072, "total_steps": 5676, "loss": 0.5523884892463684, "lr": 4.062914369300002e-06, "epoch": 1.4348898678414097, "percentage": 71.74, "elapsed_time": "5:59:27", "remaining_time": "2:21:35"} +{"current_steps": 4073, "total_steps": 5676, "loss": 0.5610285997390747, "lr": 4.058226992195838e-06, "epoch": 1.4352422907488986, "percentage": 71.76, "elapsed_time": "5:59:33", "remaining_time": "2:21:30"} +{"current_steps": 4074, "total_steps": 5676, "loss": 0.583917498588562, "lr": 4.0535416321387e-06, "epoch": 1.4355947136563878, "percentage": 71.78, "elapsed_time": "5:59:38", "remaining_time": "2:21:25"} +{"current_steps": 4075, "total_steps": 5676, "loss": 0.6025276184082031, "lr": 4.048858290719115e-06, "epoch": 1.4359471365638767, "percentage": 71.79, "elapsed_time": "5:59:44", "remaining_time": "2:21:20"} +{"current_steps": 4076, "total_steps": 5676, "loss": 0.5643888711929321, "lr": 4.044176969526936e-06, "epoch": 1.4362995594713657, "percentage": 71.81, "elapsed_time": "5:59:49", "remaining_time": "2:21:14"} +{"current_steps": 4077, "total_steps": 5676, "loss": 0.550167977809906, "lr": 4.0394976701513235e-06, "epoch": 1.4366519823788546, "percentage": 71.83, "elapsed_time": "5:59:55", "remaining_time": "2:21:09"} +{"current_steps": 4078, "total_steps": 5676, "loss": 0.6182876825332642, "lr": 4.034820394180749e-06, "epoch": 1.4370044052863435, "percentage": 71.85, "elapsed_time": "6:00:01", "remaining_time": "2:21:04"} +{"current_steps": 4079, "total_steps": 5676, "loss": 0.5197434425354004, "lr": 4.030145143203016e-06, "epoch": 1.4373568281938325, "percentage": 71.86, "elapsed_time": "6:00:07", "remaining_time": "2:20:59"} +{"current_steps": 4080, "total_steps": 5676, "loss": 0.5351034998893738, "lr": 4.025471918805214e-06, "epoch": 1.4377092511013216, "percentage": 71.88, "elapsed_time": "6:00:13", "remaining_time": "2:20:54"} +{"current_steps": 4081, "total_steps": 5676, "loss": 0.5576729774475098, "lr": 4.020800722573758e-06, "epoch": 1.4380616740088106, "percentage": 71.9, "elapsed_time": "6:00:16", "remaining_time": "2:20:48"} +{"current_steps": 4082, "total_steps": 5676, "loss": 0.5557611584663391, "lr": 4.016131556094381e-06, "epoch": 1.4384140969162995, "percentage": 71.92, "elapsed_time": "6:00:22", "remaining_time": "2:20:43"} +{"current_steps": 4083, "total_steps": 5676, "loss": 0.5300010442733765, "lr": 4.011464420952115e-06, "epoch": 1.4387665198237887, "percentage": 71.93, "elapsed_time": "6:00:28", "remaining_time": "2:20:38"} +{"current_steps": 4084, "total_steps": 5676, "loss": 0.5254991054534912, "lr": 4.0067993187313085e-06, "epoch": 1.4391189427312776, "percentage": 71.95, "elapsed_time": "6:00:33", "remaining_time": "2:20:33"} +{"current_steps": 4085, "total_steps": 5676, "loss": 0.5044848322868347, "lr": 4.002136251015617e-06, "epoch": 1.4394713656387665, "percentage": 71.97, "elapsed_time": "6:00:39", "remaining_time": "2:20:27"} +{"current_steps": 4086, "total_steps": 5676, "loss": 0.6422302722930908, "lr": 3.997475219388005e-06, "epoch": 1.4398237885462555, "percentage": 71.99, "elapsed_time": "6:00:44", "remaining_time": "2:20:22"} +{"current_steps": 4087, "total_steps": 5676, "loss": 0.5502497553825378, "lr": 3.992816225430758e-06, "epoch": 1.4401762114537444, "percentage": 72.0, "elapsed_time": "6:00:49", "remaining_time": "2:20:17"} +{"current_steps": 4088, "total_steps": 5676, "loss": 0.7479537129402161, "lr": 3.988159270725448e-06, "epoch": 1.4405286343612334, "percentage": 72.02, "elapsed_time": "6:00:55", "remaining_time": "2:20:12"} +{"current_steps": 4089, "total_steps": 5676, "loss": 0.5418091416358948, "lr": 3.983504356852967e-06, "epoch": 1.4408810572687225, "percentage": 72.04, "elapsed_time": "6:01:01", "remaining_time": "2:20:07"} +{"current_steps": 4090, "total_steps": 5676, "loss": 0.5262568593025208, "lr": 3.978851485393519e-06, "epoch": 1.4412334801762114, "percentage": 72.06, "elapsed_time": "6:01:06", "remaining_time": "2:20:01"} +{"current_steps": 4091, "total_steps": 5676, "loss": 0.5419692397117615, "lr": 3.974200657926607e-06, "epoch": 1.4415859030837004, "percentage": 72.08, "elapsed_time": "6:01:13", "remaining_time": "2:19:57"} +{"current_steps": 4092, "total_steps": 5676, "loss": 0.5202164649963379, "lr": 3.9695518760310425e-06, "epoch": 1.4419383259911895, "percentage": 72.09, "elapsed_time": "6:01:19", "remaining_time": "2:19:52"} +{"current_steps": 4093, "total_steps": 5676, "loss": 0.7232608795166016, "lr": 3.96490514128494e-06, "epoch": 1.4422907488986785, "percentage": 72.11, "elapsed_time": "6:01:24", "remaining_time": "2:19:46"} +{"current_steps": 4094, "total_steps": 5676, "loss": 0.5899156332015991, "lr": 3.960260455265721e-06, "epoch": 1.4426431718061674, "percentage": 72.13, "elapsed_time": "6:01:29", "remaining_time": "2:19:41"} +{"current_steps": 4095, "total_steps": 5676, "loss": 0.629068911075592, "lr": 3.95561781955012e-06, "epoch": 1.4429955947136563, "percentage": 72.15, "elapsed_time": "6:01:35", "remaining_time": "2:19:36"} +{"current_steps": 4096, "total_steps": 5676, "loss": 0.5584920644760132, "lr": 3.950977235714154e-06, "epoch": 1.4433480176211453, "percentage": 72.16, "elapsed_time": "6:01:40", "remaining_time": "2:19:30"} +{"current_steps": 4097, "total_steps": 5676, "loss": 0.713936984539032, "lr": 3.9463387053331685e-06, "epoch": 1.4437004405286344, "percentage": 72.18, "elapsed_time": "6:01:47", "remaining_time": "2:19:26"} +{"current_steps": 4098, "total_steps": 5676, "loss": 0.7157076001167297, "lr": 3.9417022299817944e-06, "epoch": 1.4440528634361234, "percentage": 72.2, "elapsed_time": "6:01:52", "remaining_time": "2:19:20"} +{"current_steps": 4099, "total_steps": 5676, "loss": 0.6540844440460205, "lr": 3.937067811233972e-06, "epoch": 1.4444052863436123, "percentage": 72.22, "elapsed_time": "6:01:57", "remaining_time": "2:19:15"} +{"current_steps": 4100, "total_steps": 5676, "loss": 0.5350022315979004, "lr": 3.9324354506629425e-06, "epoch": 1.4447577092511013, "percentage": 72.23, "elapsed_time": "6:02:03", "remaining_time": "2:19:10"} +{"current_steps": 4101, "total_steps": 5676, "loss": 0.6852695941925049, "lr": 3.9278051498412475e-06, "epoch": 1.4451101321585904, "percentage": 72.25, "elapsed_time": "6:02:13", "remaining_time": "2:19:06"} +{"current_steps": 4102, "total_steps": 5676, "loss": 0.6059536337852478, "lr": 3.923176910340728e-06, "epoch": 1.4454625550660793, "percentage": 72.27, "elapsed_time": "6:02:18", "remaining_time": "2:19:01"} +{"current_steps": 4103, "total_steps": 5676, "loss": 0.5787979364395142, "lr": 3.918550733732536e-06, "epoch": 1.4458149779735683, "percentage": 72.29, "elapsed_time": "6:02:24", "remaining_time": "2:18:56"} +{"current_steps": 4104, "total_steps": 5676, "loss": 0.6068835258483887, "lr": 3.9139266215871e-06, "epoch": 1.4461674008810572, "percentage": 72.3, "elapsed_time": "6:02:29", "remaining_time": "2:18:51"} +{"current_steps": 4105, "total_steps": 5676, "loss": 0.5123663544654846, "lr": 3.909304575474175e-06, "epoch": 1.4465198237885462, "percentage": 72.32, "elapsed_time": "6:02:34", "remaining_time": "2:18:45"} +{"current_steps": 4106, "total_steps": 5676, "loss": 0.6285420656204224, "lr": 3.9046845969627975e-06, "epoch": 1.4468722466960353, "percentage": 72.34, "elapsed_time": "6:02:39", "remaining_time": "2:18:40"} +{"current_steps": 4107, "total_steps": 5676, "loss": 0.6186035871505737, "lr": 3.9000666876213056e-06, "epoch": 1.4472246696035242, "percentage": 72.36, "elapsed_time": "6:02:44", "remaining_time": "2:18:34"} +{"current_steps": 4108, "total_steps": 5676, "loss": 0.6222661733627319, "lr": 3.895450849017336e-06, "epoch": 1.4475770925110132, "percentage": 72.37, "elapsed_time": "6:02:49", "remaining_time": "2:18:29"} +{"current_steps": 4109, "total_steps": 5676, "loss": 0.5889515280723572, "lr": 3.890837082717822e-06, "epoch": 1.4479295154185021, "percentage": 72.39, "elapsed_time": "6:02:54", "remaining_time": "2:18:23"} +{"current_steps": 4110, "total_steps": 5676, "loss": 0.6160309314727783, "lr": 3.8862253902889925e-06, "epoch": 1.4482819383259913, "percentage": 72.41, "elapsed_time": "6:03:00", "remaining_time": "2:18:18"} +{"current_steps": 4111, "total_steps": 5676, "loss": 0.48093074560165405, "lr": 3.881615773296381e-06, "epoch": 1.4486343612334802, "percentage": 72.43, "elapsed_time": "6:03:06", "remaining_time": "2:18:13"} +{"current_steps": 4112, "total_steps": 5676, "loss": 0.5851131677627563, "lr": 3.877008233304796e-06, "epoch": 1.4489867841409692, "percentage": 72.45, "elapsed_time": "6:03:10", "remaining_time": "2:18:08"} +{"current_steps": 4113, "total_steps": 5676, "loss": 0.5322093963623047, "lr": 3.872402771878365e-06, "epoch": 1.449339207048458, "percentage": 72.46, "elapsed_time": "6:03:16", "remaining_time": "2:18:02"} +{"current_steps": 4114, "total_steps": 5676, "loss": 0.652804970741272, "lr": 3.8677993905804956e-06, "epoch": 1.449691629955947, "percentage": 72.48, "elapsed_time": "6:03:20", "remaining_time": "2:17:57"} +{"current_steps": 4115, "total_steps": 5676, "loss": 0.5494996309280396, "lr": 3.863198090973891e-06, "epoch": 1.4500440528634362, "percentage": 72.5, "elapsed_time": "6:03:25", "remaining_time": "2:17:51"} +{"current_steps": 4116, "total_steps": 5676, "loss": 0.5641331672668457, "lr": 3.8585988746205505e-06, "epoch": 1.4503964757709251, "percentage": 72.52, "elapsed_time": "6:03:29", "remaining_time": "2:17:46"} +{"current_steps": 4117, "total_steps": 5676, "loss": 0.5415998697280884, "lr": 3.854001743081764e-06, "epoch": 1.450748898678414, "percentage": 72.53, "elapsed_time": "6:03:35", "remaining_time": "2:17:40"} +{"current_steps": 4118, "total_steps": 5676, "loss": 0.4399813711643219, "lr": 3.849406697918113e-06, "epoch": 1.4511013215859032, "percentage": 72.55, "elapsed_time": "6:03:41", "remaining_time": "2:17:35"} +{"current_steps": 4119, "total_steps": 5676, "loss": 0.6228655576705933, "lr": 3.84481374068948e-06, "epoch": 1.4514537444933922, "percentage": 72.57, "elapsed_time": "6:03:46", "remaining_time": "2:17:30"} +{"current_steps": 4120, "total_steps": 5676, "loss": 0.5575108528137207, "lr": 3.8402228729550195e-06, "epoch": 1.451806167400881, "percentage": 72.59, "elapsed_time": "6:03:51", "remaining_time": "2:17:25"} +{"current_steps": 4121, "total_steps": 5676, "loss": 0.5705434679985046, "lr": 3.835634096273197e-06, "epoch": 1.45215859030837, "percentage": 72.6, "elapsed_time": "6:03:56", "remaining_time": "2:17:19"} +{"current_steps": 4122, "total_steps": 5676, "loss": 0.5649456977844238, "lr": 3.831047412201758e-06, "epoch": 1.452511013215859, "percentage": 72.62, "elapsed_time": "6:04:03", "remaining_time": "2:17:14"} +{"current_steps": 4123, "total_steps": 5676, "loss": 0.5656554698944092, "lr": 3.826462822297736e-06, "epoch": 1.452863436123348, "percentage": 72.64, "elapsed_time": "6:04:08", "remaining_time": "2:17:09"} +{"current_steps": 4124, "total_steps": 5676, "loss": 0.6565591096878052, "lr": 3.82188032811746e-06, "epoch": 1.453215859030837, "percentage": 72.66, "elapsed_time": "6:04:13", "remaining_time": "2:17:04"} +{"current_steps": 4125, "total_steps": 5676, "loss": 0.6553423404693604, "lr": 3.817299931216537e-06, "epoch": 1.453568281938326, "percentage": 72.67, "elapsed_time": "6:04:18", "remaining_time": "2:16:58"} +{"current_steps": 4126, "total_steps": 5676, "loss": 0.5401671528816223, "lr": 3.812721633149883e-06, "epoch": 1.453920704845815, "percentage": 72.69, "elapsed_time": "6:04:23", "remaining_time": "2:16:53"} +{"current_steps": 4127, "total_steps": 5676, "loss": 0.7275381088256836, "lr": 3.808145435471674e-06, "epoch": 1.454273127753304, "percentage": 72.71, "elapsed_time": "6:04:27", "remaining_time": "2:16:47"} +{"current_steps": 4128, "total_steps": 5676, "loss": 0.6384310722351074, "lr": 3.80357133973539e-06, "epoch": 1.454625550660793, "percentage": 72.73, "elapsed_time": "6:04:32", "remaining_time": "2:16:42"} +{"current_steps": 4129, "total_steps": 5676, "loss": 0.7783250212669373, "lr": 3.7989993474937993e-06, "epoch": 1.454977973568282, "percentage": 72.74, "elapsed_time": "6:04:38", "remaining_time": "2:16:37"} +{"current_steps": 4130, "total_steps": 5676, "loss": 0.752954363822937, "lr": 3.7944294602989473e-06, "epoch": 1.455330396475771, "percentage": 72.76, "elapsed_time": "6:04:42", "remaining_time": "2:16:31"} +{"current_steps": 4131, "total_steps": 5676, "loss": 0.6099411249160767, "lr": 3.789861679702169e-06, "epoch": 1.4556828193832598, "percentage": 72.78, "elapsed_time": "6:04:48", "remaining_time": "2:16:26"} +{"current_steps": 4132, "total_steps": 5676, "loss": 0.6608012914657593, "lr": 3.7852960072540845e-06, "epoch": 1.4560352422907488, "percentage": 72.8, "elapsed_time": "6:04:53", "remaining_time": "2:16:20"} +{"current_steps": 4133, "total_steps": 5676, "loss": 0.5119853615760803, "lr": 3.7807324445045924e-06, "epoch": 1.456387665198238, "percentage": 72.82, "elapsed_time": "6:04:57", "remaining_time": "2:16:15"} +{"current_steps": 4134, "total_steps": 5676, "loss": 0.6353520750999451, "lr": 3.7761709930028923e-06, "epoch": 1.4567400881057269, "percentage": 72.83, "elapsed_time": "6:05:02", "remaining_time": "2:16:09"} +{"current_steps": 4135, "total_steps": 5676, "loss": 0.6427614688873291, "lr": 3.7716116542974434e-06, "epoch": 1.4570925110132158, "percentage": 72.85, "elapsed_time": "6:05:07", "remaining_time": "2:16:04"} +{"current_steps": 4136, "total_steps": 5676, "loss": 0.6205203533172607, "lr": 3.7670544299360003e-06, "epoch": 1.457444933920705, "percentage": 72.87, "elapsed_time": "6:05:10", "remaining_time": "2:15:58"} +{"current_steps": 4137, "total_steps": 5676, "loss": 0.5957762002944946, "lr": 3.7624993214656046e-06, "epoch": 1.457797356828194, "percentage": 72.89, "elapsed_time": "6:05:16", "remaining_time": "2:15:53"} +{"current_steps": 4138, "total_steps": 5676, "loss": 0.6698043346405029, "lr": 3.7579463304325714e-06, "epoch": 1.4581497797356828, "percentage": 72.9, "elapsed_time": "6:05:22", "remaining_time": "2:15:48"} +{"current_steps": 4139, "total_steps": 5676, "loss": 0.5947796106338501, "lr": 3.7533954583824982e-06, "epoch": 1.4585022026431718, "percentage": 72.92, "elapsed_time": "6:05:28", "remaining_time": "2:15:42"} +{"current_steps": 4140, "total_steps": 5676, "loss": 0.5905463695526123, "lr": 3.7488467068602664e-06, "epoch": 1.4588546255506607, "percentage": 72.94, "elapsed_time": "6:05:32", "remaining_time": "2:15:37"} +{"current_steps": 4141, "total_steps": 5676, "loss": 0.6218722462654114, "lr": 3.74430007741003e-06, "epoch": 1.4592070484581499, "percentage": 72.96, "elapsed_time": "6:05:38", "remaining_time": "2:15:32"} +{"current_steps": 4142, "total_steps": 5676, "loss": 0.6124013066291809, "lr": 3.739755571575241e-06, "epoch": 1.4595594713656388, "percentage": 72.97, "elapsed_time": "6:05:43", "remaining_time": "2:15:26"} +{"current_steps": 4143, "total_steps": 5676, "loss": 0.5816842317581177, "lr": 3.7352131908986046e-06, "epoch": 1.4599118942731277, "percentage": 72.99, "elapsed_time": "6:05:47", "remaining_time": "2:15:21"} +{"current_steps": 4144, "total_steps": 5676, "loss": 0.5225531458854675, "lr": 3.7306729369221197e-06, "epoch": 1.4602643171806167, "percentage": 73.01, "elapsed_time": "6:05:53", "remaining_time": "2:15:15"} +{"current_steps": 4145, "total_steps": 5676, "loss": 0.4536696672439575, "lr": 3.7261348111870663e-06, "epoch": 1.4606167400881058, "percentage": 73.03, "elapsed_time": "6:05:58", "remaining_time": "2:15:10"} +{"current_steps": 4146, "total_steps": 5676, "loss": 0.5777832269668579, "lr": 3.7215988152339968e-06, "epoch": 1.4609691629955948, "percentage": 73.04, "elapsed_time": "6:06:03", "remaining_time": "2:15:05"} +{"current_steps": 4147, "total_steps": 5676, "loss": 0.5964622497558594, "lr": 3.717064950602737e-06, "epoch": 1.4613215859030837, "percentage": 73.06, "elapsed_time": "6:06:09", "remaining_time": "2:15:00"} +{"current_steps": 4148, "total_steps": 5676, "loss": 0.47224390506744385, "lr": 3.7125332188323937e-06, "epoch": 1.4616740088105726, "percentage": 73.08, "elapsed_time": "6:06:15", "remaining_time": "2:14:54"} +{"current_steps": 4149, "total_steps": 5676, "loss": 0.5989271402359009, "lr": 3.708003621461347e-06, "epoch": 1.4620264317180616, "percentage": 73.1, "elapsed_time": "6:06:20", "remaining_time": "2:14:49"} +{"current_steps": 4150, "total_steps": 5676, "loss": 0.6171919107437134, "lr": 3.7034761600272627e-06, "epoch": 1.4623788546255507, "percentage": 73.11, "elapsed_time": "6:06:25", "remaining_time": "2:14:44"} +{"current_steps": 4151, "total_steps": 5676, "loss": 0.7064418792724609, "lr": 3.6989508360670647e-06, "epoch": 1.4627312775330397, "percentage": 73.13, "elapsed_time": "6:06:31", "remaining_time": "2:14:39"} +{"current_steps": 4152, "total_steps": 5676, "loss": 0.6055941581726074, "lr": 3.6944276511169577e-06, "epoch": 1.4630837004405286, "percentage": 73.15, "elapsed_time": "6:06:36", "remaining_time": "2:14:33"} +{"current_steps": 4153, "total_steps": 5676, "loss": 0.4550645351409912, "lr": 3.689906606712429e-06, "epoch": 1.4634361233480178, "percentage": 73.17, "elapsed_time": "6:06:41", "remaining_time": "2:14:28"} +{"current_steps": 4154, "total_steps": 5676, "loss": 0.5958502292633057, "lr": 3.68538770438823e-06, "epoch": 1.4637885462555067, "percentage": 73.19, "elapsed_time": "6:06:47", "remaining_time": "2:14:23"} +{"current_steps": 4155, "total_steps": 5676, "loss": 0.5574297904968262, "lr": 3.680870945678389e-06, "epoch": 1.4641409691629956, "percentage": 73.2, "elapsed_time": "6:06:53", "remaining_time": "2:14:18"} +{"current_steps": 4156, "total_steps": 5676, "loss": 0.46494683623313904, "lr": 3.676356332116202e-06, "epoch": 1.4644933920704846, "percentage": 73.22, "elapsed_time": "6:07:00", "remaining_time": "2:14:13"} +{"current_steps": 4157, "total_steps": 5676, "loss": 0.5549977421760559, "lr": 3.671843865234238e-06, "epoch": 1.4648458149779735, "percentage": 73.24, "elapsed_time": "6:07:05", "remaining_time": "2:14:08"} +{"current_steps": 4158, "total_steps": 5676, "loss": 0.5620779395103455, "lr": 3.6673335465643488e-06, "epoch": 1.4651982378854624, "percentage": 73.26, "elapsed_time": "6:07:12", "remaining_time": "2:14:03"} +{"current_steps": 4159, "total_steps": 5676, "loss": 0.5945389270782471, "lr": 3.662825377637638e-06, "epoch": 1.4655506607929516, "percentage": 73.27, "elapsed_time": "6:07:18", "remaining_time": "2:13:58"} +{"current_steps": 4160, "total_steps": 5676, "loss": 0.6923668384552002, "lr": 3.6583193599844867e-06, "epoch": 1.4659030837004405, "percentage": 73.29, "elapsed_time": "6:07:23", "remaining_time": "2:13:53"} +{"current_steps": 4161, "total_steps": 5676, "loss": 0.6848515868186951, "lr": 3.653815495134557e-06, "epoch": 1.4662555066079295, "percentage": 73.31, "elapsed_time": "6:07:28", "remaining_time": "2:13:47"} +{"current_steps": 4162, "total_steps": 5676, "loss": 0.640125036239624, "lr": 3.649313784616765e-06, "epoch": 1.4666079295154186, "percentage": 73.33, "elapsed_time": "6:07:34", "remaining_time": "2:13:42"} +{"current_steps": 4163, "total_steps": 5676, "loss": 0.6879653930664062, "lr": 3.6448142299593026e-06, "epoch": 1.4669603524229076, "percentage": 73.34, "elapsed_time": "6:07:38", "remaining_time": "2:13:36"} +{"current_steps": 4164, "total_steps": 5676, "loss": 0.6757794618606567, "lr": 3.6403168326896286e-06, "epoch": 1.4673127753303965, "percentage": 73.36, "elapsed_time": "6:07:43", "remaining_time": "2:13:31"} +{"current_steps": 4165, "total_steps": 5676, "loss": 0.6405826807022095, "lr": 3.6358215943344664e-06, "epoch": 1.4676651982378854, "percentage": 73.38, "elapsed_time": "6:07:50", "remaining_time": "2:13:26"} +{"current_steps": 4166, "total_steps": 5676, "loss": 0.692136287689209, "lr": 3.6313285164198187e-06, "epoch": 1.4680176211453744, "percentage": 73.4, "elapsed_time": "6:07:54", "remaining_time": "2:13:21"} +{"current_steps": 4167, "total_steps": 5676, "loss": 0.6305568218231201, "lr": 3.626837600470935e-06, "epoch": 1.4683700440528633, "percentage": 73.41, "elapsed_time": "6:08:00", "remaining_time": "2:13:16"} +{"current_steps": 4168, "total_steps": 5676, "loss": 0.7040449380874634, "lr": 3.6223488480123427e-06, "epoch": 1.4687224669603525, "percentage": 73.43, "elapsed_time": "6:08:04", "remaining_time": "2:13:10"} +{"current_steps": 4169, "total_steps": 5676, "loss": 0.5064427852630615, "lr": 3.6178622605678403e-06, "epoch": 1.4690748898678414, "percentage": 73.45, "elapsed_time": "6:08:10", "remaining_time": "2:13:05"} +{"current_steps": 4170, "total_steps": 5676, "loss": 0.46597155928611755, "lr": 3.6133778396604813e-06, "epoch": 1.4694273127753303, "percentage": 73.47, "elapsed_time": "6:08:16", "remaining_time": "2:13:00"} +{"current_steps": 4171, "total_steps": 5676, "loss": 0.5764908790588379, "lr": 3.6088955868125864e-06, "epoch": 1.4697797356828195, "percentage": 73.48, "elapsed_time": "6:08:22", "remaining_time": "2:12:55"} +{"current_steps": 4172, "total_steps": 5676, "loss": 0.5808656215667725, "lr": 3.6044155035457405e-06, "epoch": 1.4701321585903084, "percentage": 73.5, "elapsed_time": "6:08:26", "remaining_time": "2:12:49"} +{"current_steps": 4173, "total_steps": 5676, "loss": 0.5439287424087524, "lr": 3.599937591380791e-06, "epoch": 1.4704845814977974, "percentage": 73.52, "elapsed_time": "6:08:32", "remaining_time": "2:12:44"} +{"current_steps": 4174, "total_steps": 5676, "loss": 0.5943965911865234, "lr": 3.595461851837857e-06, "epoch": 1.4708370044052863, "percentage": 73.54, "elapsed_time": "6:08:37", "remaining_time": "2:12:39"} +{"current_steps": 4175, "total_steps": 5676, "loss": 0.631833016872406, "lr": 3.590988286436302e-06, "epoch": 1.4711894273127752, "percentage": 73.56, "elapsed_time": "6:08:42", "remaining_time": "2:12:33"} +{"current_steps": 4176, "total_steps": 5676, "loss": 0.514176070690155, "lr": 3.5865168966947718e-06, "epoch": 1.4715418502202642, "percentage": 73.57, "elapsed_time": "6:08:47", "remaining_time": "2:12:27"} +{"current_steps": 4177, "total_steps": 5676, "loss": 0.584772527217865, "lr": 3.582047684131159e-06, "epoch": 1.4718942731277533, "percentage": 73.59, "elapsed_time": "6:08:52", "remaining_time": "2:12:22"} +{"current_steps": 4178, "total_steps": 5676, "loss": 0.5085974931716919, "lr": 3.5775806502626244e-06, "epoch": 1.4722466960352423, "percentage": 73.61, "elapsed_time": "6:08:58", "remaining_time": "2:12:17"} +{"current_steps": 4179, "total_steps": 5676, "loss": 0.62562495470047, "lr": 3.573115796605584e-06, "epoch": 1.4725991189427312, "percentage": 73.63, "elapsed_time": "6:09:02", "remaining_time": "2:12:11"} +{"current_steps": 4180, "total_steps": 5676, "loss": 0.5815824270248413, "lr": 3.5686531246757206e-06, "epoch": 1.4729515418502204, "percentage": 73.64, "elapsed_time": "6:09:07", "remaining_time": "2:12:06"} +{"current_steps": 4181, "total_steps": 5676, "loss": 0.6639705300331116, "lr": 3.5641926359879663e-06, "epoch": 1.4733039647577093, "percentage": 73.66, "elapsed_time": "6:09:12", "remaining_time": "2:12:01"} +{"current_steps": 4182, "total_steps": 5676, "loss": 0.6265684962272644, "lr": 3.5597343320565293e-06, "epoch": 1.4736563876651982, "percentage": 73.68, "elapsed_time": "6:09:18", "remaining_time": "2:11:55"} +{"current_steps": 4183, "total_steps": 5676, "loss": 0.6113626956939697, "lr": 3.5552782143948504e-06, "epoch": 1.4740088105726872, "percentage": 73.7, "elapsed_time": "6:09:24", "remaining_time": "2:11:50"} +{"current_steps": 4184, "total_steps": 5676, "loss": 0.5247244834899902, "lr": 3.550824284515655e-06, "epoch": 1.4743612334801761, "percentage": 73.71, "elapsed_time": "6:09:30", "remaining_time": "2:11:45"} +{"current_steps": 4185, "total_steps": 5676, "loss": 0.5524521470069885, "lr": 3.5463725439309083e-06, "epoch": 1.4747136563876653, "percentage": 73.73, "elapsed_time": "6:09:34", "remaining_time": "2:11:40"} +{"current_steps": 4186, "total_steps": 5676, "loss": 0.5462251305580139, "lr": 3.5419229941518384e-06, "epoch": 1.4750660792951542, "percentage": 73.75, "elapsed_time": "6:09:38", "remaining_time": "2:11:34"} +{"current_steps": 4187, "total_steps": 5676, "loss": 0.6500638723373413, "lr": 3.5374756366889297e-06, "epoch": 1.4754185022026431, "percentage": 73.77, "elapsed_time": "6:09:44", "remaining_time": "2:11:29"} +{"current_steps": 4188, "total_steps": 5676, "loss": 0.4445904791355133, "lr": 3.5330304730519216e-06, "epoch": 1.475770925110132, "percentage": 73.78, "elapsed_time": "6:09:48", "remaining_time": "2:11:23"} +{"current_steps": 4189, "total_steps": 5676, "loss": 0.6068017482757568, "lr": 3.5285875047498075e-06, "epoch": 1.4761233480176212, "percentage": 73.8, "elapsed_time": "6:09:53", "remaining_time": "2:11:18"} +{"current_steps": 4190, "total_steps": 5676, "loss": 0.5577334761619568, "lr": 3.5241467332908384e-06, "epoch": 1.4764757709251102, "percentage": 73.82, "elapsed_time": "6:09:58", "remaining_time": "2:11:12"} +{"current_steps": 4191, "total_steps": 5676, "loss": 0.6596503257751465, "lr": 3.5197081601825135e-06, "epoch": 1.4768281938325991, "percentage": 73.84, "elapsed_time": "6:10:03", "remaining_time": "2:11:07"} +{"current_steps": 4192, "total_steps": 5676, "loss": 0.6260303258895874, "lr": 3.5152717869315965e-06, "epoch": 1.477180616740088, "percentage": 73.85, "elapsed_time": "6:10:08", "remaining_time": "2:11:02"} +{"current_steps": 4193, "total_steps": 5676, "loss": 0.5467355251312256, "lr": 3.510837615044097e-06, "epoch": 1.477533039647577, "percentage": 73.87, "elapsed_time": "6:10:12", "remaining_time": "2:10:56"} +{"current_steps": 4194, "total_steps": 5676, "loss": 0.5306693911552429, "lr": 3.506405646025276e-06, "epoch": 1.4778854625550661, "percentage": 73.89, "elapsed_time": "6:10:19", "remaining_time": "2:10:51"} +{"current_steps": 4195, "total_steps": 5676, "loss": 0.6130725741386414, "lr": 3.5019758813796513e-06, "epoch": 1.478237885462555, "percentage": 73.91, "elapsed_time": "6:10:24", "remaining_time": "2:10:46"} +{"current_steps": 4196, "total_steps": 5676, "loss": 0.6656272411346436, "lr": 3.4975483226109874e-06, "epoch": 1.478590308370044, "percentage": 73.93, "elapsed_time": "6:10:29", "remaining_time": "2:10:40"} +{"current_steps": 4197, "total_steps": 5676, "loss": 0.6018439531326294, "lr": 3.4931229712223047e-06, "epoch": 1.4789427312775332, "percentage": 73.94, "elapsed_time": "6:10:34", "remaining_time": "2:10:35"} +{"current_steps": 4198, "total_steps": 5676, "loss": 0.6635257005691528, "lr": 3.488699828715871e-06, "epoch": 1.479295154185022, "percentage": 73.96, "elapsed_time": "6:10:40", "remaining_time": "2:10:30"} +{"current_steps": 4199, "total_steps": 5676, "loss": 0.5760075449943542, "lr": 3.4842788965932038e-06, "epoch": 1.479647577092511, "percentage": 73.98, "elapsed_time": "6:10:46", "remaining_time": "2:10:25"} +{"current_steps": 4200, "total_steps": 5676, "loss": 0.6951982975006104, "lr": 3.4798601763550778e-06, "epoch": 1.48, "percentage": 74.0, "elapsed_time": "6:10:52", "remaining_time": "2:10:20"} +{"current_steps": 4201, "total_steps": 5676, "loss": 0.6574405431747437, "lr": 3.475443669501508e-06, "epoch": 1.480352422907489, "percentage": 74.01, "elapsed_time": "6:11:02", "remaining_time": "2:10:16"} +{"current_steps": 4202, "total_steps": 5676, "loss": 0.5912263989448547, "lr": 3.4710293775317593e-06, "epoch": 1.4807048458149779, "percentage": 74.03, "elapsed_time": "6:11:08", "remaining_time": "2:10:11"} +{"current_steps": 4203, "total_steps": 5676, "loss": 0.5169661045074463, "lr": 3.4666173019443485e-06, "epoch": 1.481057268722467, "percentage": 74.05, "elapsed_time": "6:11:12", "remaining_time": "2:10:05"} +{"current_steps": 4204, "total_steps": 5676, "loss": 0.5707660913467407, "lr": 3.4622074442370345e-06, "epoch": 1.481409691629956, "percentage": 74.07, "elapsed_time": "6:11:19", "remaining_time": "2:10:01"} +{"current_steps": 4205, "total_steps": 5676, "loss": 0.5856584310531616, "lr": 3.4577998059068354e-06, "epoch": 1.4817621145374449, "percentage": 74.08, "elapsed_time": "6:11:24", "remaining_time": "2:09:55"} +{"current_steps": 4206, "total_steps": 5676, "loss": 0.6306010484695435, "lr": 3.4533943884499975e-06, "epoch": 1.482114537444934, "percentage": 74.1, "elapsed_time": "6:11:28", "remaining_time": "2:09:49"} +{"current_steps": 4207, "total_steps": 5676, "loss": 0.6177140474319458, "lr": 3.4489911933620245e-06, "epoch": 1.482466960352423, "percentage": 74.12, "elapsed_time": "6:11:34", "remaining_time": "2:09:44"} +{"current_steps": 4208, "total_steps": 5676, "loss": 0.5527759790420532, "lr": 3.4445902221376694e-06, "epoch": 1.482819383259912, "percentage": 74.14, "elapsed_time": "6:11:38", "remaining_time": "2:09:39"} +{"current_steps": 4209, "total_steps": 5676, "loss": 0.6838431358337402, "lr": 3.440191476270922e-06, "epoch": 1.4831718061674009, "percentage": 74.15, "elapsed_time": "6:11:44", "remaining_time": "2:09:34"} +{"current_steps": 4210, "total_steps": 5676, "loss": 0.4876987636089325, "lr": 3.4357949572550196e-06, "epoch": 1.4835242290748898, "percentage": 74.17, "elapsed_time": "6:11:49", "remaining_time": "2:09:28"} +{"current_steps": 4211, "total_steps": 5676, "loss": 0.5639296770095825, "lr": 3.4314006665824427e-06, "epoch": 1.4838766519823787, "percentage": 74.19, "elapsed_time": "6:11:55", "remaining_time": "2:09:23"} +{"current_steps": 4212, "total_steps": 5676, "loss": 0.4189109802246094, "lr": 3.427008605744915e-06, "epoch": 1.4842290748898679, "percentage": 74.21, "elapsed_time": "6:12:00", "remaining_time": "2:09:17"} +{"current_steps": 4213, "total_steps": 5676, "loss": 0.6602882146835327, "lr": 3.422618776233413e-06, "epoch": 1.4845814977973568, "percentage": 74.22, "elapsed_time": "6:12:05", "remaining_time": "2:09:12"} +{"current_steps": 4214, "total_steps": 5676, "loss": 0.7642478942871094, "lr": 3.4182311795381373e-06, "epoch": 1.4849339207048458, "percentage": 74.24, "elapsed_time": "6:12:09", "remaining_time": "2:09:07"} +{"current_steps": 4215, "total_steps": 5676, "loss": 0.5550031065940857, "lr": 3.41384581714854e-06, "epoch": 1.485286343612335, "percentage": 74.26, "elapsed_time": "6:12:15", "remaining_time": "2:09:01"} +{"current_steps": 4216, "total_steps": 5676, "loss": 0.7036092281341553, "lr": 3.4094626905533223e-06, "epoch": 1.4856387665198239, "percentage": 74.28, "elapsed_time": "6:12:20", "remaining_time": "2:08:56"} +{"current_steps": 4217, "total_steps": 5676, "loss": 0.693780779838562, "lr": 3.4050818012404165e-06, "epoch": 1.4859911894273128, "percentage": 74.3, "elapsed_time": "6:12:25", "remaining_time": "2:08:51"} +{"current_steps": 4218, "total_steps": 5676, "loss": 0.6315299868583679, "lr": 3.4007031506969977e-06, "epoch": 1.4863436123348017, "percentage": 74.31, "elapsed_time": "6:12:31", "remaining_time": "2:08:46"} +{"current_steps": 4219, "total_steps": 5676, "loss": 0.47849225997924805, "lr": 3.396326740409481e-06, "epoch": 1.4866960352422907, "percentage": 74.33, "elapsed_time": "6:12:37", "remaining_time": "2:08:40"} +{"current_steps": 4220, "total_steps": 5676, "loss": 0.6200336217880249, "lr": 3.3919525718635195e-06, "epoch": 1.4870484581497798, "percentage": 74.35, "elapsed_time": "6:12:42", "remaining_time": "2:08:35"} +{"current_steps": 4221, "total_steps": 5676, "loss": 0.7594903707504272, "lr": 3.3875806465440152e-06, "epoch": 1.4874008810572688, "percentage": 74.37, "elapsed_time": "6:12:48", "remaining_time": "2:08:30"} +{"current_steps": 4222, "total_steps": 5676, "loss": 0.47159409523010254, "lr": 3.383210965935093e-06, "epoch": 1.4877533039647577, "percentage": 74.38, "elapsed_time": "6:12:54", "remaining_time": "2:08:25"} +{"current_steps": 4223, "total_steps": 5676, "loss": 0.5272495150566101, "lr": 3.3788435315201216e-06, "epoch": 1.4881057268722466, "percentage": 74.4, "elapsed_time": "6:13:01", "remaining_time": "2:08:20"} +{"current_steps": 4224, "total_steps": 5676, "loss": 0.43847334384918213, "lr": 3.3744783447817177e-06, "epoch": 1.4884581497797358, "percentage": 74.42, "elapsed_time": "6:13:06", "remaining_time": "2:08:15"} +{"current_steps": 4225, "total_steps": 5676, "loss": 0.656914234161377, "lr": 3.370115407201724e-06, "epoch": 1.4888105726872247, "percentage": 74.44, "elapsed_time": "6:13:11", "remaining_time": "2:08:10"} +{"current_steps": 4226, "total_steps": 5676, "loss": 0.6379527449607849, "lr": 3.3657547202612128e-06, "epoch": 1.4891629955947137, "percentage": 74.45, "elapsed_time": "6:13:17", "remaining_time": "2:08:04"} +{"current_steps": 4227, "total_steps": 5676, "loss": 0.6254120469093323, "lr": 3.3613962854405136e-06, "epoch": 1.4895154185022026, "percentage": 74.47, "elapsed_time": "6:13:22", "remaining_time": "2:07:59"} +{"current_steps": 4228, "total_steps": 5676, "loss": 0.6567566990852356, "lr": 3.35704010421917e-06, "epoch": 1.4898678414096915, "percentage": 74.49, "elapsed_time": "6:13:27", "remaining_time": "2:07:53"} +{"current_steps": 4229, "total_steps": 5676, "loss": 0.5121499300003052, "lr": 3.352686178075981e-06, "epoch": 1.4902202643171807, "percentage": 74.51, "elapsed_time": "6:13:33", "remaining_time": "2:07:48"} +{"current_steps": 4230, "total_steps": 5676, "loss": 0.5727466344833374, "lr": 3.3483345084889595e-06, "epoch": 1.4905726872246696, "percentage": 74.52, "elapsed_time": "6:13:38", "remaining_time": "2:07:43"} +{"current_steps": 4231, "total_steps": 5676, "loss": 0.6013318300247192, "lr": 3.3439850969353614e-06, "epoch": 1.4909251101321586, "percentage": 74.54, "elapsed_time": "6:13:42", "remaining_time": "2:07:37"} +{"current_steps": 4232, "total_steps": 5676, "loss": 0.6350653767585754, "lr": 3.3396379448916836e-06, "epoch": 1.4912775330396475, "percentage": 74.56, "elapsed_time": "6:13:46", "remaining_time": "2:07:32"} +{"current_steps": 4233, "total_steps": 5676, "loss": 0.7072123885154724, "lr": 3.335293053833645e-06, "epoch": 1.4916299559471367, "percentage": 74.58, "elapsed_time": "6:13:51", "remaining_time": "2:07:26"} +{"current_steps": 4234, "total_steps": 5676, "loss": 0.6208378076553345, "lr": 3.330950425236201e-06, "epoch": 1.4919823788546256, "percentage": 74.59, "elapsed_time": "6:13:58", "remaining_time": "2:07:21"} +{"current_steps": 4235, "total_steps": 5676, "loss": 0.656146764755249, "lr": 3.3266100605735397e-06, "epoch": 1.4923348017621145, "percentage": 74.61, "elapsed_time": "6:14:03", "remaining_time": "2:07:16"} +{"current_steps": 4236, "total_steps": 5676, "loss": 0.6145347952842712, "lr": 3.322271961319076e-06, "epoch": 1.4926872246696035, "percentage": 74.63, "elapsed_time": "6:14:08", "remaining_time": "2:07:11"} +{"current_steps": 4237, "total_steps": 5676, "loss": 0.5876312255859375, "lr": 3.3179361289454694e-06, "epoch": 1.4930396475770924, "percentage": 74.65, "elapsed_time": "6:14:14", "remaining_time": "2:07:06"} +{"current_steps": 4238, "total_steps": 5676, "loss": 0.48365384340286255, "lr": 3.3136025649245897e-06, "epoch": 1.4933920704845816, "percentage": 74.67, "elapsed_time": "6:14:20", "remaining_time": "2:07:00"} +{"current_steps": 4239, "total_steps": 5676, "loss": 0.566576361656189, "lr": 3.3092712707275467e-06, "epoch": 1.4937444933920705, "percentage": 74.68, "elapsed_time": "6:14:25", "remaining_time": "2:06:55"} +{"current_steps": 4240, "total_steps": 5676, "loss": 0.6982032060623169, "lr": 3.3049422478246886e-06, "epoch": 1.4940969162995594, "percentage": 74.7, "elapsed_time": "6:14:30", "remaining_time": "2:06:50"} +{"current_steps": 4241, "total_steps": 5676, "loss": 0.5802686214447021, "lr": 3.3006154976855787e-06, "epoch": 1.4944493392070486, "percentage": 74.72, "elapsed_time": "6:14:35", "remaining_time": "2:06:45"} +{"current_steps": 4242, "total_steps": 5676, "loss": 0.6656724214553833, "lr": 3.296291021779016e-06, "epoch": 1.4948017621145375, "percentage": 74.74, "elapsed_time": "6:14:40", "remaining_time": "2:06:39"} +{"current_steps": 4243, "total_steps": 5676, "loss": 0.5081402063369751, "lr": 3.2919688215730227e-06, "epoch": 1.4951541850220265, "percentage": 74.75, "elapsed_time": "6:14:45", "remaining_time": "2:06:34"} +{"current_steps": 4244, "total_steps": 5676, "loss": 0.6963785290718079, "lr": 3.28764889853485e-06, "epoch": 1.4955066079295154, "percentage": 74.77, "elapsed_time": "6:14:52", "remaining_time": "2:06:29"} +{"current_steps": 4245, "total_steps": 5676, "loss": 0.4953869581222534, "lr": 3.283331254130987e-06, "epoch": 1.4958590308370043, "percentage": 74.79, "elapsed_time": "6:14:58", "remaining_time": "2:06:24"} +{"current_steps": 4246, "total_steps": 5676, "loss": 0.5495179295539856, "lr": 3.2790158898271283e-06, "epoch": 1.4962114537444933, "percentage": 74.81, "elapsed_time": "6:15:03", "remaining_time": "2:06:18"} +{"current_steps": 4247, "total_steps": 5676, "loss": 0.6238610148429871, "lr": 3.274702807088208e-06, "epoch": 1.4965638766519824, "percentage": 74.82, "elapsed_time": "6:15:10", "remaining_time": "2:06:14"} +{"current_steps": 4248, "total_steps": 5676, "loss": 0.557083249092102, "lr": 3.270392007378389e-06, "epoch": 1.4969162995594714, "percentage": 74.84, "elapsed_time": "6:15:15", "remaining_time": "2:06:08"} +{"current_steps": 4249, "total_steps": 5676, "loss": 0.6317561864852905, "lr": 3.2660834921610495e-06, "epoch": 1.4972687224669603, "percentage": 74.86, "elapsed_time": "6:15:20", "remaining_time": "2:06:03"} +{"current_steps": 4250, "total_steps": 5676, "loss": 0.5957529544830322, "lr": 3.2617772628987974e-06, "epoch": 1.4976211453744495, "percentage": 74.88, "elapsed_time": "6:15:27", "remaining_time": "2:05:58"} +{"current_steps": 4251, "total_steps": 5676, "loss": 0.7661205530166626, "lr": 3.2574733210534637e-06, "epoch": 1.4979735682819384, "percentage": 74.89, "elapsed_time": "6:15:34", "remaining_time": "2:05:53"} +{"current_steps": 4252, "total_steps": 5676, "loss": 0.5616782903671265, "lr": 3.2531716680861024e-06, "epoch": 1.4983259911894273, "percentage": 74.91, "elapsed_time": "6:15:40", "remaining_time": "2:05:48"} +{"current_steps": 4253, "total_steps": 5676, "loss": 0.5679990649223328, "lr": 3.2488723054569905e-06, "epoch": 1.4986784140969163, "percentage": 74.93, "elapsed_time": "6:15:46", "remaining_time": "2:05:43"} +{"current_steps": 4254, "total_steps": 5676, "loss": 0.6355923414230347, "lr": 3.2445752346256244e-06, "epoch": 1.4990308370044052, "percentage": 74.95, "elapsed_time": "6:15:50", "remaining_time": "2:05:38"} +{"current_steps": 4255, "total_steps": 5676, "loss": 0.5050745010375977, "lr": 3.2402804570507316e-06, "epoch": 1.4993832599118941, "percentage": 74.96, "elapsed_time": "6:15:57", "remaining_time": "2:05:33"} +{"current_steps": 4256, "total_steps": 5676, "loss": 0.6585286855697632, "lr": 3.2359879741902557e-06, "epoch": 1.4997356828193833, "percentage": 74.98, "elapsed_time": "6:16:02", "remaining_time": "2:05:27"} +{"current_steps": 4257, "total_steps": 5676, "loss": 0.5108245611190796, "lr": 3.2316977875013567e-06, "epoch": 1.5000881057268722, "percentage": 75.0, "elapsed_time": "6:16:07", "remaining_time": "2:05:22"} +{"current_steps": 4258, "total_steps": 5676, "loss": 0.5270702838897705, "lr": 3.2274098984404223e-06, "epoch": 1.5004405286343614, "percentage": 75.02, "elapsed_time": "6:16:12", "remaining_time": "2:05:17"} +{"current_steps": 4259, "total_steps": 5676, "loss": 0.6421051025390625, "lr": 3.223124308463057e-06, "epoch": 1.5007929515418503, "percentage": 75.04, "elapsed_time": "6:16:17", "remaining_time": "2:05:11"} +{"current_steps": 4260, "total_steps": 5676, "loss": 0.6040945053100586, "lr": 3.218841019024084e-06, "epoch": 1.5011453744493393, "percentage": 75.05, "elapsed_time": "6:16:22", "remaining_time": "2:05:06"} +{"current_steps": 4261, "total_steps": 5676, "loss": 0.6389988660812378, "lr": 3.214560031577548e-06, "epoch": 1.5014977973568282, "percentage": 75.07, "elapsed_time": "6:16:26", "remaining_time": "2:05:00"} +{"current_steps": 4262, "total_steps": 5676, "loss": 0.6474273800849915, "lr": 3.210281347576707e-06, "epoch": 1.5018502202643171, "percentage": 75.09, "elapsed_time": "6:16:32", "remaining_time": "2:04:55"} +{"current_steps": 4263, "total_steps": 5676, "loss": 0.7020560503005981, "lr": 3.206004968474048e-06, "epoch": 1.502202643171806, "percentage": 75.11, "elapsed_time": "6:16:37", "remaining_time": "2:04:50"} +{"current_steps": 4264, "total_steps": 5676, "loss": 0.574647068977356, "lr": 3.2017308957212644e-06, "epoch": 1.502555066079295, "percentage": 75.12, "elapsed_time": "6:16:42", "remaining_time": "2:04:44"} +{"current_steps": 4265, "total_steps": 5676, "loss": 0.6912944316864014, "lr": 3.1974591307692724e-06, "epoch": 1.5029074889867842, "percentage": 75.14, "elapsed_time": "6:16:48", "remaining_time": "2:04:39"} +{"current_steps": 4266, "total_steps": 5676, "loss": 0.7738592028617859, "lr": 3.1931896750682036e-06, "epoch": 1.503259911894273, "percentage": 75.16, "elapsed_time": "6:16:52", "remaining_time": "2:04:33"} +{"current_steps": 4267, "total_steps": 5676, "loss": 0.6418012380599976, "lr": 3.188922530067402e-06, "epoch": 1.5036123348017623, "percentage": 75.18, "elapsed_time": "6:16:56", "remaining_time": "2:04:28"} +{"current_steps": 4268, "total_steps": 5676, "loss": 0.639055609703064, "lr": 3.1846576972154343e-06, "epoch": 1.5039647577092512, "percentage": 75.19, "elapsed_time": "6:17:01", "remaining_time": "2:04:22"} +{"current_steps": 4269, "total_steps": 5676, "loss": 0.5512406229972839, "lr": 3.1803951779600774e-06, "epoch": 1.5043171806167401, "percentage": 75.21, "elapsed_time": "6:17:06", "remaining_time": "2:04:17"} +{"current_steps": 4270, "total_steps": 5676, "loss": 0.5838354229927063, "lr": 3.1761349737483194e-06, "epoch": 1.504669603524229, "percentage": 75.23, "elapsed_time": "6:17:11", "remaining_time": "2:04:12"} +{"current_steps": 4271, "total_steps": 5676, "loss": 0.5903568267822266, "lr": 3.1718770860263747e-06, "epoch": 1.505022026431718, "percentage": 75.25, "elapsed_time": "6:17:19", "remaining_time": "2:04:07"} +{"current_steps": 4272, "total_steps": 5676, "loss": 0.5610073804855347, "lr": 3.1676215162396604e-06, "epoch": 1.505374449339207, "percentage": 75.26, "elapsed_time": "6:17:24", "remaining_time": "2:04:02"} +{"current_steps": 4273, "total_steps": 5676, "loss": 0.6543136835098267, "lr": 3.163368265832809e-06, "epoch": 1.5057268722466959, "percentage": 75.28, "elapsed_time": "6:17:30", "remaining_time": "2:03:57"} +{"current_steps": 4274, "total_steps": 5676, "loss": 0.6586440801620483, "lr": 3.1591173362496686e-06, "epoch": 1.506079295154185, "percentage": 75.3, "elapsed_time": "6:17:35", "remaining_time": "2:03:51"} +{"current_steps": 4275, "total_steps": 5676, "loss": 0.5360713601112366, "lr": 3.1548687289332958e-06, "epoch": 1.506431718061674, "percentage": 75.32, "elapsed_time": "6:17:41", "remaining_time": "2:03:46"} +{"current_steps": 4276, "total_steps": 5676, "loss": 0.6695356369018555, "lr": 3.1506224453259615e-06, "epoch": 1.5067841409691631, "percentage": 75.33, "elapsed_time": "6:17:47", "remaining_time": "2:03:41"} +{"current_steps": 4277, "total_steps": 5676, "loss": 0.5708016753196716, "lr": 3.146378486869146e-06, "epoch": 1.507136563876652, "percentage": 75.35, "elapsed_time": "6:17:52", "remaining_time": "2:03:36"} +{"current_steps": 4278, "total_steps": 5676, "loss": 0.5412342548370361, "lr": 3.142136855003538e-06, "epoch": 1.507488986784141, "percentage": 75.37, "elapsed_time": "6:17:58", "remaining_time": "2:03:30"} +{"current_steps": 4279, "total_steps": 5676, "loss": 0.5392874479293823, "lr": 3.1378975511690468e-06, "epoch": 1.50784140969163, "percentage": 75.39, "elapsed_time": "6:18:04", "remaining_time": "2:03:25"} +{"current_steps": 4280, "total_steps": 5676, "loss": 0.6559237241744995, "lr": 3.133660576804781e-06, "epoch": 1.5081938325991189, "percentage": 75.41, "elapsed_time": "6:18:08", "remaining_time": "2:03:20"} +{"current_steps": 4281, "total_steps": 5676, "loss": 0.49973511695861816, "lr": 3.1294259333490597e-06, "epoch": 1.5085462555066078, "percentage": 75.42, "elapsed_time": "6:18:12", "remaining_time": "2:03:14"} +{"current_steps": 4282, "total_steps": 5676, "loss": 0.5458316206932068, "lr": 3.1251936222394152e-06, "epoch": 1.5088986784140968, "percentage": 75.44, "elapsed_time": "6:18:16", "remaining_time": "2:03:08"} +{"current_steps": 4283, "total_steps": 5676, "loss": 0.628986656665802, "lr": 3.120963644912579e-06, "epoch": 1.509251101321586, "percentage": 75.46, "elapsed_time": "6:18:23", "remaining_time": "2:03:04"} +{"current_steps": 4284, "total_steps": 5676, "loss": 0.6234235167503357, "lr": 3.1167360028045103e-06, "epoch": 1.5096035242290748, "percentage": 75.48, "elapsed_time": "6:18:28", "remaining_time": "2:02:58"} +{"current_steps": 4285, "total_steps": 5676, "loss": 0.49892476201057434, "lr": 3.112510697350348e-06, "epoch": 1.509955947136564, "percentage": 75.49, "elapsed_time": "6:18:33", "remaining_time": "2:02:53"} +{"current_steps": 4286, "total_steps": 5676, "loss": 0.42951709032058716, "lr": 3.1082877299844562e-06, "epoch": 1.510308370044053, "percentage": 75.51, "elapsed_time": "6:18:38", "remaining_time": "2:02:47"} +{"current_steps": 4287, "total_steps": 5676, "loss": 0.6392263770103455, "lr": 3.1040671021404045e-06, "epoch": 1.5106607929515419, "percentage": 75.53, "elapsed_time": "6:18:43", "remaining_time": "2:02:42"} +{"current_steps": 4288, "total_steps": 5676, "loss": 0.6198933124542236, "lr": 3.099848815250964e-06, "epoch": 1.5110132158590308, "percentage": 75.55, "elapsed_time": "6:18:49", "remaining_time": "2:02:37"} +{"current_steps": 4289, "total_steps": 5676, "loss": 0.7626048922538757, "lr": 3.0956328707481055e-06, "epoch": 1.5113656387665197, "percentage": 75.56, "elapsed_time": "6:18:55", "remaining_time": "2:02:32"} +{"current_steps": 4290, "total_steps": 5676, "loss": 0.5245747566223145, "lr": 3.0914192700630175e-06, "epoch": 1.5117180616740087, "percentage": 75.58, "elapsed_time": "6:19:00", "remaining_time": "2:02:26"} +{"current_steps": 4291, "total_steps": 5676, "loss": 0.6788556575775146, "lr": 3.0872080146260818e-06, "epoch": 1.5120704845814978, "percentage": 75.6, "elapsed_time": "6:19:05", "remaining_time": "2:02:21"} +{"current_steps": 4292, "total_steps": 5676, "loss": 0.6224241852760315, "lr": 3.082999105866897e-06, "epoch": 1.5124229074889868, "percentage": 75.62, "elapsed_time": "6:19:10", "remaining_time": "2:02:16"} +{"current_steps": 4293, "total_steps": 5676, "loss": 0.706061840057373, "lr": 3.0787925452142477e-06, "epoch": 1.512775330396476, "percentage": 75.63, "elapsed_time": "6:19:17", "remaining_time": "2:02:11"} +{"current_steps": 4294, "total_steps": 5676, "loss": 0.7075262665748596, "lr": 3.07458833409613e-06, "epoch": 1.5131277533039649, "percentage": 75.65, "elapsed_time": "6:19:22", "remaining_time": "2:02:06"} +{"current_steps": 4295, "total_steps": 5676, "loss": 0.4912101626396179, "lr": 3.0703864739397494e-06, "epoch": 1.5134801762114538, "percentage": 75.67, "elapsed_time": "6:19:28", "remaining_time": "2:02:00"} +{"current_steps": 4296, "total_steps": 5676, "loss": 0.6530265808105469, "lr": 3.066186966171507e-06, "epoch": 1.5138325991189427, "percentage": 75.69, "elapsed_time": "6:19:32", "remaining_time": "2:01:55"} +{"current_steps": 4297, "total_steps": 5676, "loss": 0.4905887246131897, "lr": 3.0619898122169946e-06, "epoch": 1.5141850220264317, "percentage": 75.7, "elapsed_time": "6:19:36", "remaining_time": "2:01:49"} +{"current_steps": 4298, "total_steps": 5676, "loss": 0.5025225281715393, "lr": 3.057795013501025e-06, "epoch": 1.5145374449339206, "percentage": 75.72, "elapsed_time": "6:19:42", "remaining_time": "2:01:44"} +{"current_steps": 4299, "total_steps": 5676, "loss": 0.5769479274749756, "lr": 3.0536025714475946e-06, "epoch": 1.5148898678414096, "percentage": 75.74, "elapsed_time": "6:19:48", "remaining_time": "2:01:39"} +{"current_steps": 4300, "total_steps": 5676, "loss": 0.6275384426116943, "lr": 3.049412487479919e-06, "epoch": 1.5152422907488987, "percentage": 75.76, "elapsed_time": "6:19:52", "remaining_time": "2:01:33"} +{"current_steps": 4301, "total_steps": 5676, "loss": 0.5555096864700317, "lr": 3.04522476302039e-06, "epoch": 1.5155947136563876, "percentage": 75.78, "elapsed_time": "6:20:01", "remaining_time": "2:01:29"} +{"current_steps": 4302, "total_steps": 5676, "loss": 0.5605635643005371, "lr": 3.0410393994906096e-06, "epoch": 1.5159471365638768, "percentage": 75.79, "elapsed_time": "6:20:06", "remaining_time": "2:01:24"} +{"current_steps": 4303, "total_steps": 5676, "loss": 0.6006621718406677, "lr": 3.0368563983113864e-06, "epoch": 1.5162995594713657, "percentage": 75.81, "elapsed_time": "6:20:10", "remaining_time": "2:01:18"} +{"current_steps": 4304, "total_steps": 5676, "loss": 0.5288259983062744, "lr": 3.0326757609027147e-06, "epoch": 1.5166519823788547, "percentage": 75.83, "elapsed_time": "6:20:14", "remaining_time": "2:01:12"} +{"current_steps": 4305, "total_steps": 5676, "loss": 0.5671676993370056, "lr": 3.0284974886837903e-06, "epoch": 1.5170044052863436, "percentage": 75.85, "elapsed_time": "6:20:20", "remaining_time": "2:01:07"} +{"current_steps": 4306, "total_steps": 5676, "loss": 0.6072134971618652, "lr": 3.0243215830730075e-06, "epoch": 1.5173568281938326, "percentage": 75.86, "elapsed_time": "6:20:25", "remaining_time": "2:01:02"} +{"current_steps": 4307, "total_steps": 5676, "loss": 0.6010481119155884, "lr": 3.020148045487953e-06, "epoch": 1.5177092511013215, "percentage": 75.88, "elapsed_time": "6:20:30", "remaining_time": "2:00:56"} +{"current_steps": 4308, "total_steps": 5676, "loss": 0.6126751899719238, "lr": 3.0159768773454225e-06, "epoch": 1.5180616740088104, "percentage": 75.9, "elapsed_time": "6:20:36", "remaining_time": "2:00:51"} +{"current_steps": 4309, "total_steps": 5676, "loss": 0.5408819317817688, "lr": 3.011808080061387e-06, "epoch": 1.5184140969162996, "percentage": 75.92, "elapsed_time": "6:20:42", "remaining_time": "2:00:46"} +{"current_steps": 4310, "total_steps": 5676, "loss": 0.6528562307357788, "lr": 3.0076416550510255e-06, "epoch": 1.5187665198237885, "percentage": 75.93, "elapsed_time": "6:20:48", "remaining_time": "2:00:41"} +{"current_steps": 4311, "total_steps": 5676, "loss": 0.6355241537094116, "lr": 3.003477603728715e-06, "epoch": 1.5191189427312777, "percentage": 75.95, "elapsed_time": "6:20:54", "remaining_time": "2:00:36"} +{"current_steps": 4312, "total_steps": 5676, "loss": 0.5511878728866577, "lr": 2.9993159275080174e-06, "epoch": 1.5194713656387666, "percentage": 75.97, "elapsed_time": "6:21:00", "remaining_time": "2:00:31"} +{"current_steps": 4313, "total_steps": 5676, "loss": 0.5066816806793213, "lr": 2.9951566278016943e-06, "epoch": 1.5198237885462555, "percentage": 75.99, "elapsed_time": "6:21:04", "remaining_time": "2:00:25"} +{"current_steps": 4314, "total_steps": 5676, "loss": 0.5636533498764038, "lr": 2.9909997060216966e-06, "epoch": 1.5201762114537445, "percentage": 76.0, "elapsed_time": "6:21:10", "remaining_time": "2:00:20"} +{"current_steps": 4315, "total_steps": 5676, "loss": 0.49742352962493896, "lr": 2.9868451635791706e-06, "epoch": 1.5205286343612334, "percentage": 76.02, "elapsed_time": "6:21:16", "remaining_time": "2:00:15"} +{"current_steps": 4316, "total_steps": 5676, "loss": 0.7264617681503296, "lr": 2.9826930018844533e-06, "epoch": 1.5208810572687224, "percentage": 76.04, "elapsed_time": "6:21:20", "remaining_time": "2:00:09"} +{"current_steps": 4317, "total_steps": 5676, "loss": 0.5342350006103516, "lr": 2.978543222347076e-06, "epoch": 1.5212334801762113, "percentage": 76.06, "elapsed_time": "6:21:26", "remaining_time": "2:00:04"} +{"current_steps": 4318, "total_steps": 5676, "loss": 0.4324883818626404, "lr": 2.9743958263757554e-06, "epoch": 1.5215859030837005, "percentage": 76.07, "elapsed_time": "6:21:31", "remaining_time": "1:59:59"} +{"current_steps": 4319, "total_steps": 5676, "loss": 0.5867510437965393, "lr": 2.970250815378409e-06, "epoch": 1.5219383259911894, "percentage": 76.09, "elapsed_time": "6:21:35", "remaining_time": "1:59:53"} +{"current_steps": 4320, "total_steps": 5676, "loss": 0.7176594734191895, "lr": 2.966108190762138e-06, "epoch": 1.5222907488986785, "percentage": 76.11, "elapsed_time": "6:21:41", "remaining_time": "1:59:48"} +{"current_steps": 4321, "total_steps": 5676, "loss": 0.5810995101928711, "lr": 2.9619679539332337e-06, "epoch": 1.5226431718061675, "percentage": 76.13, "elapsed_time": "6:21:46", "remaining_time": "1:59:43"} +{"current_steps": 4322, "total_steps": 5676, "loss": 0.6262675523757935, "lr": 2.957830106297177e-06, "epoch": 1.5229955947136564, "percentage": 76.15, "elapsed_time": "6:21:51", "remaining_time": "1:59:37"} +{"current_steps": 4323, "total_steps": 5676, "loss": 0.7743325233459473, "lr": 2.9536946492586383e-06, "epoch": 1.5233480176211454, "percentage": 76.16, "elapsed_time": "6:21:57", "remaining_time": "1:59:32"} +{"current_steps": 4324, "total_steps": 5676, "loss": 0.7706553936004639, "lr": 2.9495615842214776e-06, "epoch": 1.5237004405286343, "percentage": 76.18, "elapsed_time": "6:22:02", "remaining_time": "1:59:27"} +{"current_steps": 4325, "total_steps": 5676, "loss": 0.5982425808906555, "lr": 2.9454309125887405e-06, "epoch": 1.5240528634361232, "percentage": 76.2, "elapsed_time": "6:22:08", "remaining_time": "1:59:22"} +{"current_steps": 4326, "total_steps": 5676, "loss": 0.5580830574035645, "lr": 2.9413026357626596e-06, "epoch": 1.5244052863436124, "percentage": 76.22, "elapsed_time": "6:22:12", "remaining_time": "1:59:16"} +{"current_steps": 4327, "total_steps": 5676, "loss": 0.5316063165664673, "lr": 2.937176755144662e-06, "epoch": 1.5247577092511013, "percentage": 76.23, "elapsed_time": "6:22:17", "remaining_time": "1:59:11"} +{"current_steps": 4328, "total_steps": 5676, "loss": 0.574161171913147, "lr": 2.9330532721353523e-06, "epoch": 1.5251101321585903, "percentage": 76.25, "elapsed_time": "6:22:23", "remaining_time": "1:59:05"} +{"current_steps": 4329, "total_steps": 5676, "loss": 0.5339558720588684, "lr": 2.9289321881345257e-06, "epoch": 1.5254625550660794, "percentage": 76.27, "elapsed_time": "6:22:28", "remaining_time": "1:59:00"} +{"current_steps": 4330, "total_steps": 5676, "loss": 0.594109296798706, "lr": 2.9248135045411607e-06, "epoch": 1.5258149779735684, "percentage": 76.29, "elapsed_time": "6:22:33", "remaining_time": "1:58:55"} +{"current_steps": 4331, "total_steps": 5676, "loss": 0.5953024625778198, "lr": 2.9206972227534237e-06, "epoch": 1.5261674008810573, "percentage": 76.3, "elapsed_time": "6:22:37", "remaining_time": "1:58:49"} +{"current_steps": 4332, "total_steps": 5676, "loss": 0.5142296552658081, "lr": 2.916583344168663e-06, "epoch": 1.5265198237885462, "percentage": 76.32, "elapsed_time": "6:22:42", "remaining_time": "1:58:44"} +{"current_steps": 4333, "total_steps": 5676, "loss": 0.5796314477920532, "lr": 2.912471870183411e-06, "epoch": 1.5268722466960352, "percentage": 76.34, "elapsed_time": "6:22:48", "remaining_time": "1:58:39"} +{"current_steps": 4334, "total_steps": 5676, "loss": 0.7202566862106323, "lr": 2.9083628021933886e-06, "epoch": 1.527224669603524, "percentage": 76.36, "elapsed_time": "6:22:53", "remaining_time": "1:58:33"} +{"current_steps": 4335, "total_steps": 5676, "loss": 0.6684188842773438, "lr": 2.9042561415934956e-06, "epoch": 1.5275770925110133, "percentage": 76.37, "elapsed_time": "6:22:57", "remaining_time": "1:58:28"} +{"current_steps": 4336, "total_steps": 5676, "loss": 0.5377634763717651, "lr": 2.9001518897778147e-06, "epoch": 1.5279295154185022, "percentage": 76.39, "elapsed_time": "6:23:03", "remaining_time": "1:58:22"} +{"current_steps": 4337, "total_steps": 5676, "loss": 0.5780486464500427, "lr": 2.8960500481396115e-06, "epoch": 1.5282819383259914, "percentage": 76.41, "elapsed_time": "6:23:09", "remaining_time": "1:58:17"} +{"current_steps": 4338, "total_steps": 5676, "loss": 0.6020476818084717, "lr": 2.891950618071333e-06, "epoch": 1.5286343612334803, "percentage": 76.43, "elapsed_time": "6:23:16", "remaining_time": "1:58:12"} +{"current_steps": 4339, "total_steps": 5676, "loss": 0.6076337099075317, "lr": 2.8878536009646106e-06, "epoch": 1.5289867841409692, "percentage": 76.44, "elapsed_time": "6:23:20", "remaining_time": "1:58:07"} +{"current_steps": 4340, "total_steps": 5676, "loss": 0.6370673179626465, "lr": 2.883758998210251e-06, "epoch": 1.5293392070484582, "percentage": 76.46, "elapsed_time": "6:23:26", "remaining_time": "1:58:02"} +{"current_steps": 4341, "total_steps": 5676, "loss": 0.41594892740249634, "lr": 2.879666811198244e-06, "epoch": 1.529691629955947, "percentage": 76.48, "elapsed_time": "6:23:31", "remaining_time": "1:57:56"} +{"current_steps": 4342, "total_steps": 5676, "loss": 0.4506857693195343, "lr": 2.8755770413177632e-06, "epoch": 1.530044052863436, "percentage": 76.5, "elapsed_time": "6:23:36", "remaining_time": "1:57:51"} +{"current_steps": 4343, "total_steps": 5676, "loss": 0.5883188247680664, "lr": 2.8714896899571575e-06, "epoch": 1.530396475770925, "percentage": 76.52, "elapsed_time": "6:23:40", "remaining_time": "1:57:45"} +{"current_steps": 4344, "total_steps": 5676, "loss": 0.6327757239341736, "lr": 2.8674047585039545e-06, "epoch": 1.5307488986784141, "percentage": 76.53, "elapsed_time": "6:23:47", "remaining_time": "1:57:40"} +{"current_steps": 4345, "total_steps": 5676, "loss": 0.6241307258605957, "lr": 2.863322248344862e-06, "epoch": 1.531101321585903, "percentage": 76.55, "elapsed_time": "6:23:52", "remaining_time": "1:57:35"} +{"current_steps": 4346, "total_steps": 5676, "loss": 0.6982603669166565, "lr": 2.859242160865764e-06, "epoch": 1.5314537444933922, "percentage": 76.57, "elapsed_time": "6:23:57", "remaining_time": "1:57:30"} +{"current_steps": 4347, "total_steps": 5676, "loss": 0.6293624639511108, "lr": 2.8551644974517236e-06, "epoch": 1.5318061674008812, "percentage": 76.59, "elapsed_time": "6:24:02", "remaining_time": "1:57:24"} +{"current_steps": 4348, "total_steps": 5676, "loss": 0.5630898475646973, "lr": 2.85108925948698e-06, "epoch": 1.53215859030837, "percentage": 76.6, "elapsed_time": "6:24:08", "remaining_time": "1:57:19"} +{"current_steps": 4349, "total_steps": 5676, "loss": 0.5300726294517517, "lr": 2.847016448354948e-06, "epoch": 1.532511013215859, "percentage": 76.62, "elapsed_time": "6:24:13", "remaining_time": "1:57:14"} +{"current_steps": 4350, "total_steps": 5676, "loss": 0.6302311420440674, "lr": 2.8429460654382257e-06, "epoch": 1.532863436123348, "percentage": 76.64, "elapsed_time": "6:24:20", "remaining_time": "1:57:09"} +{"current_steps": 4351, "total_steps": 5676, "loss": 0.5063371658325195, "lr": 2.8388781121185815e-06, "epoch": 1.533215859030837, "percentage": 76.66, "elapsed_time": "6:24:23", "remaining_time": "1:57:03"} +{"current_steps": 4352, "total_steps": 5676, "loss": 0.6116877198219299, "lr": 2.8348125897769496e-06, "epoch": 1.5335682819383258, "percentage": 76.67, "elapsed_time": "6:24:29", "remaining_time": "1:56:58"} +{"current_steps": 4353, "total_steps": 5676, "loss": 0.5671982169151306, "lr": 2.830749499793458e-06, "epoch": 1.533920704845815, "percentage": 76.69, "elapsed_time": "6:24:34", "remaining_time": "1:56:53"} +{"current_steps": 4354, "total_steps": 5676, "loss": 0.6537752747535706, "lr": 2.826688843547395e-06, "epoch": 1.534273127753304, "percentage": 76.71, "elapsed_time": "6:24:38", "remaining_time": "1:56:47"} +{"current_steps": 4355, "total_steps": 5676, "loss": 0.6608545780181885, "lr": 2.8226306224172283e-06, "epoch": 1.534625550660793, "percentage": 76.73, "elapsed_time": "6:24:44", "remaining_time": "1:56:42"} +{"current_steps": 4356, "total_steps": 5676, "loss": 0.7038587331771851, "lr": 2.8185748377805977e-06, "epoch": 1.534977973568282, "percentage": 76.74, "elapsed_time": "6:24:48", "remaining_time": "1:56:36"} +{"current_steps": 4357, "total_steps": 5676, "loss": 0.7422336339950562, "lr": 2.8145214910143128e-06, "epoch": 1.535330396475771, "percentage": 76.76, "elapsed_time": "6:24:53", "remaining_time": "1:56:31"} +{"current_steps": 4358, "total_steps": 5676, "loss": 0.5739270448684692, "lr": 2.8104705834943625e-06, "epoch": 1.53568281938326, "percentage": 76.78, "elapsed_time": "6:25:00", "remaining_time": "1:56:26"} +{"current_steps": 4359, "total_steps": 5676, "loss": 0.6429908275604248, "lr": 2.8064221165959073e-06, "epoch": 1.5360352422907488, "percentage": 76.8, "elapsed_time": "6:25:05", "remaining_time": "1:56:20"} +{"current_steps": 4360, "total_steps": 5676, "loss": 0.5660578012466431, "lr": 2.802376091693264e-06, "epoch": 1.5363876651982378, "percentage": 76.81, "elapsed_time": "6:25:09", "remaining_time": "1:56:15"} +{"current_steps": 4361, "total_steps": 5676, "loss": 0.4507398009300232, "lr": 2.798332510159942e-06, "epoch": 1.5367400881057267, "percentage": 76.83, "elapsed_time": "6:25:15", "remaining_time": "1:56:10"} +{"current_steps": 4362, "total_steps": 5676, "loss": 0.5107634663581848, "lr": 2.7942913733686063e-06, "epoch": 1.5370925110132159, "percentage": 76.85, "elapsed_time": "6:25:22", "remaining_time": "1:56:05"} +{"current_steps": 4363, "total_steps": 5676, "loss": 0.505529522895813, "lr": 2.790252682691106e-06, "epoch": 1.5374449339207048, "percentage": 76.87, "elapsed_time": "6:25:26", "remaining_time": "1:55:59"} +{"current_steps": 4364, "total_steps": 5676, "loss": 0.459098219871521, "lr": 2.7862164394984405e-06, "epoch": 1.537797356828194, "percentage": 76.89, "elapsed_time": "6:25:31", "remaining_time": "1:55:54"} +{"current_steps": 4365, "total_steps": 5676, "loss": 0.5200169086456299, "lr": 2.782182645160789e-06, "epoch": 1.538149779735683, "percentage": 76.9, "elapsed_time": "6:25:37", "remaining_time": "1:55:49"} +{"current_steps": 4366, "total_steps": 5676, "loss": 0.6723796725273132, "lr": 2.778151301047506e-06, "epoch": 1.5385022026431718, "percentage": 76.92, "elapsed_time": "6:25:43", "remaining_time": "1:55:44"} +{"current_steps": 4367, "total_steps": 5676, "loss": 0.5385584831237793, "lr": 2.7741224085271067e-06, "epoch": 1.5388546255506608, "percentage": 76.94, "elapsed_time": "6:25:48", "remaining_time": "1:55:38"} +{"current_steps": 4368, "total_steps": 5676, "loss": 0.5766934156417847, "lr": 2.770095968967267e-06, "epoch": 1.5392070484581497, "percentage": 76.96, "elapsed_time": "6:25:54", "remaining_time": "1:55:33"} +{"current_steps": 4369, "total_steps": 5676, "loss": 0.6303011178970337, "lr": 2.766071983734845e-06, "epoch": 1.5395594713656386, "percentage": 76.97, "elapsed_time": "6:26:00", "remaining_time": "1:55:28"} +{"current_steps": 4370, "total_steps": 5676, "loss": 0.6192827224731445, "lr": 2.7620504541958525e-06, "epoch": 1.5399118942731278, "percentage": 76.99, "elapsed_time": "6:26:04", "remaining_time": "1:55:22"} +{"current_steps": 4371, "total_steps": 5676, "loss": 0.543215811252594, "lr": 2.758031381715485e-06, "epoch": 1.5402643171806167, "percentage": 77.01, "elapsed_time": "6:26:11", "remaining_time": "1:55:17"} +{"current_steps": 4372, "total_steps": 5676, "loss": 0.6364312171936035, "lr": 2.7540147676580808e-06, "epoch": 1.5406167400881057, "percentage": 77.03, "elapsed_time": "6:26:15", "remaining_time": "1:55:12"} +{"current_steps": 4373, "total_steps": 5676, "loss": 0.5625254511833191, "lr": 2.750000613387157e-06, "epoch": 1.5409691629955948, "percentage": 77.04, "elapsed_time": "6:26:22", "remaining_time": "1:55:07"} +{"current_steps": 4374, "total_steps": 5676, "loss": 0.7304128408432007, "lr": 2.7459889202654e-06, "epoch": 1.5413215859030838, "percentage": 77.06, "elapsed_time": "6:26:28", "remaining_time": "1:55:02"} +{"current_steps": 4375, "total_steps": 5676, "loss": 0.676097571849823, "lr": 2.7419796896546536e-06, "epoch": 1.5416740088105727, "percentage": 77.08, "elapsed_time": "6:26:33", "remaining_time": "1:54:57"} +{"current_steps": 4376, "total_steps": 5676, "loss": 0.7024539709091187, "lr": 2.7379729229159193e-06, "epoch": 1.5420264317180616, "percentage": 77.1, "elapsed_time": "6:26:38", "remaining_time": "1:54:51"} +{"current_steps": 4377, "total_steps": 5676, "loss": 0.6357964277267456, "lr": 2.7339686214093774e-06, "epoch": 1.5423788546255506, "percentage": 77.11, "elapsed_time": "6:26:42", "remaining_time": "1:54:46"} +{"current_steps": 4378, "total_steps": 5676, "loss": 0.5254555940628052, "lr": 2.729966786494361e-06, "epoch": 1.5427312775330395, "percentage": 77.13, "elapsed_time": "6:26:46", "remaining_time": "1:54:40"} +{"current_steps": 4379, "total_steps": 5676, "loss": 0.4899883270263672, "lr": 2.7259674195293697e-06, "epoch": 1.5430837004405287, "percentage": 77.15, "elapsed_time": "6:26:52", "remaining_time": "1:54:35"} +{"current_steps": 4380, "total_steps": 5676, "loss": 0.5750056505203247, "lr": 2.721970521872063e-06, "epoch": 1.5434361233480176, "percentage": 77.17, "elapsed_time": "6:26:58", "remaining_time": "1:54:30"} +{"current_steps": 4381, "total_steps": 5676, "loss": 0.5852059125900269, "lr": 2.71797609487926e-06, "epoch": 1.5437885462555068, "percentage": 77.18, "elapsed_time": "6:27:03", "remaining_time": "1:54:24"} +{"current_steps": 4382, "total_steps": 5676, "loss": 0.6360914707183838, "lr": 2.71398413990695e-06, "epoch": 1.5441409691629957, "percentage": 77.2, "elapsed_time": "6:27:09", "remaining_time": "1:54:19"} +{"current_steps": 4383, "total_steps": 5676, "loss": 0.5120062828063965, "lr": 2.7099946583102764e-06, "epoch": 1.5444933920704846, "percentage": 77.22, "elapsed_time": "6:27:15", "remaining_time": "1:54:14"} +{"current_steps": 4384, "total_steps": 5676, "loss": 0.5798901319503784, "lr": 2.706007651443543e-06, "epoch": 1.5448458149779736, "percentage": 77.24, "elapsed_time": "6:27:20", "remaining_time": "1:54:09"} +{"current_steps": 4385, "total_steps": 5676, "loss": 0.5112065076828003, "lr": 2.702023120660213e-06, "epoch": 1.5451982378854625, "percentage": 77.26, "elapsed_time": "6:27:25", "remaining_time": "1:54:03"} +{"current_steps": 4386, "total_steps": 5676, "loss": 0.6136611700057983, "lr": 2.6980410673129133e-06, "epoch": 1.5455506607929514, "percentage": 77.27, "elapsed_time": "6:27:30", "remaining_time": "1:53:58"} +{"current_steps": 4387, "total_steps": 5676, "loss": 0.5944457054138184, "lr": 2.694061492753426e-06, "epoch": 1.5459030837004404, "percentage": 77.29, "elapsed_time": "6:27:35", "remaining_time": "1:53:52"} +{"current_steps": 4388, "total_steps": 5676, "loss": 0.5931667685508728, "lr": 2.690084398332692e-06, "epoch": 1.5462555066079295, "percentage": 77.31, "elapsed_time": "6:27:39", "remaining_time": "1:53:47"} +{"current_steps": 4389, "total_steps": 5676, "loss": 0.6112217307090759, "lr": 2.686109785400809e-06, "epoch": 1.5466079295154185, "percentage": 77.33, "elapsed_time": "6:27:45", "remaining_time": "1:53:42"} +{"current_steps": 4390, "total_steps": 5676, "loss": 0.549437940120697, "lr": 2.68213765530704e-06, "epoch": 1.5469603524229076, "percentage": 77.34, "elapsed_time": "6:27:51", "remaining_time": "1:53:36"} +{"current_steps": 4391, "total_steps": 5676, "loss": 0.674758791923523, "lr": 2.6781680093997965e-06, "epoch": 1.5473127753303966, "percentage": 77.36, "elapsed_time": "6:27:55", "remaining_time": "1:53:31"} +{"current_steps": 4392, "total_steps": 5676, "loss": 0.6015446186065674, "lr": 2.6742008490266504e-06, "epoch": 1.5476651982378855, "percentage": 77.38, "elapsed_time": "6:27:59", "remaining_time": "1:53:25"} +{"current_steps": 4393, "total_steps": 5676, "loss": 0.5512514710426331, "lr": 2.6702361755343278e-06, "epoch": 1.5480176211453744, "percentage": 77.4, "elapsed_time": "6:28:04", "remaining_time": "1:53:20"} +{"current_steps": 4394, "total_steps": 5676, "loss": 0.6443158984184265, "lr": 2.666273990268713e-06, "epoch": 1.5483700440528634, "percentage": 77.41, "elapsed_time": "6:28:09", "remaining_time": "1:53:14"} +{"current_steps": 4395, "total_steps": 5676, "loss": 0.5682512521743774, "lr": 2.6623142945748447e-06, "epoch": 1.5487224669603523, "percentage": 77.43, "elapsed_time": "6:28:13", "remaining_time": "1:53:09"} +{"current_steps": 4396, "total_steps": 5676, "loss": 0.5544074773788452, "lr": 2.658357089796917e-06, "epoch": 1.5490748898678413, "percentage": 77.45, "elapsed_time": "6:28:19", "remaining_time": "1:53:04"} +{"current_steps": 4397, "total_steps": 5676, "loss": 0.5811636447906494, "lr": 2.6544023772782736e-06, "epoch": 1.5494273127753304, "percentage": 77.47, "elapsed_time": "6:28:24", "remaining_time": "1:52:58"} +{"current_steps": 4398, "total_steps": 5676, "loss": 0.4696553647518158, "lr": 2.650450158361422e-06, "epoch": 1.5497797356828193, "percentage": 77.48, "elapsed_time": "6:28:30", "remaining_time": "1:52:53"} +{"current_steps": 4399, "total_steps": 5676, "loss": 0.6897521615028381, "lr": 2.6465004343880153e-06, "epoch": 1.5501321585903085, "percentage": 77.5, "elapsed_time": "6:28:36", "remaining_time": "1:52:48"} +{"current_steps": 4400, "total_steps": 5676, "loss": 0.6154924631118774, "lr": 2.6425532066988613e-06, "epoch": 1.5504845814977974, "percentage": 77.52, "elapsed_time": "6:28:40", "remaining_time": "1:52:42"} +{"current_steps": 4401, "total_steps": 5676, "loss": 0.5333596467971802, "lr": 2.6386084766339214e-06, "epoch": 1.5508370044052864, "percentage": 77.54, "elapsed_time": "6:28:50", "remaining_time": "1:52:38"} +{"current_steps": 4402, "total_steps": 5676, "loss": 0.6633985042572021, "lr": 2.634666245532309e-06, "epoch": 1.5511894273127753, "percentage": 77.55, "elapsed_time": "6:28:55", "remaining_time": "1:52:33"} +{"current_steps": 4403, "total_steps": 5676, "loss": 0.7913509607315063, "lr": 2.630726514732289e-06, "epoch": 1.5515418502202643, "percentage": 77.57, "elapsed_time": "6:29:01", "remaining_time": "1:52:28"} +{"current_steps": 4404, "total_steps": 5676, "loss": 0.5776455402374268, "lr": 2.6267892855712763e-06, "epoch": 1.5518942731277532, "percentage": 77.59, "elapsed_time": "6:29:05", "remaining_time": "1:52:22"} +{"current_steps": 4405, "total_steps": 5676, "loss": 0.5912357568740845, "lr": 2.6228545593858357e-06, "epoch": 1.5522466960352423, "percentage": 77.61, "elapsed_time": "6:29:10", "remaining_time": "1:52:17"} +{"current_steps": 4406, "total_steps": 5676, "loss": 0.49319127202033997, "lr": 2.618922337511689e-06, "epoch": 1.5525991189427313, "percentage": 77.63, "elapsed_time": "6:29:14", "remaining_time": "1:52:11"} +{"current_steps": 4407, "total_steps": 5676, "loss": 0.5805023908615112, "lr": 2.6149926212837016e-06, "epoch": 1.5529515418502202, "percentage": 77.64, "elapsed_time": "6:29:19", "remaining_time": "1:52:06"} +{"current_steps": 4408, "total_steps": 5676, "loss": 0.5635806918144226, "lr": 2.6110654120358902e-06, "epoch": 1.5533039647577094, "percentage": 77.66, "elapsed_time": "6:29:26", "remaining_time": "1:52:01"} +{"current_steps": 4409, "total_steps": 5676, "loss": 0.5006709694862366, "lr": 2.6071407111014178e-06, "epoch": 1.5536563876651983, "percentage": 77.68, "elapsed_time": "6:29:32", "remaining_time": "1:51:56"} +{"current_steps": 4410, "total_steps": 5676, "loss": 0.6035311818122864, "lr": 2.6032185198126005e-06, "epoch": 1.5540088105726872, "percentage": 77.7, "elapsed_time": "6:29:36", "remaining_time": "1:51:50"} +{"current_steps": 4411, "total_steps": 5676, "loss": 0.5978977680206299, "lr": 2.599298839500899e-06, "epoch": 1.5543612334801762, "percentage": 77.71, "elapsed_time": "6:29:43", "remaining_time": "1:51:45"} +{"current_steps": 4412, "total_steps": 5676, "loss": 0.6330617070198059, "lr": 2.5953816714969194e-06, "epoch": 1.5547136563876651, "percentage": 77.73, "elapsed_time": "6:29:48", "remaining_time": "1:51:40"} +{"current_steps": 4413, "total_steps": 5676, "loss": 0.6541750431060791, "lr": 2.591467017130426e-06, "epoch": 1.555066079295154, "percentage": 77.75, "elapsed_time": "6:29:55", "remaining_time": "1:51:35"} +{"current_steps": 4414, "total_steps": 5676, "loss": 0.5503655076026917, "lr": 2.5875548777303204e-06, "epoch": 1.5554185022026432, "percentage": 77.77, "elapsed_time": "6:29:59", "remaining_time": "1:51:30"} +{"current_steps": 4415, "total_steps": 5676, "loss": 0.5117509365081787, "lr": 2.583645254624645e-06, "epoch": 1.5557709251101322, "percentage": 77.78, "elapsed_time": "6:30:04", "remaining_time": "1:51:24"} +{"current_steps": 4416, "total_steps": 5676, "loss": 0.6699894070625305, "lr": 2.5797381491406027e-06, "epoch": 1.5561233480176213, "percentage": 77.8, "elapsed_time": "6:30:08", "remaining_time": "1:51:18"} +{"current_steps": 4417, "total_steps": 5676, "loss": 0.6870071291923523, "lr": 2.5758335626045308e-06, "epoch": 1.5564757709251102, "percentage": 77.82, "elapsed_time": "6:30:12", "remaining_time": "1:51:13"} +{"current_steps": 4418, "total_steps": 5676, "loss": 0.7680954933166504, "lr": 2.571931496341916e-06, "epoch": 1.5568281938325992, "percentage": 77.84, "elapsed_time": "6:30:17", "remaining_time": "1:51:08"} +{"current_steps": 4419, "total_steps": 5676, "loss": 0.6504727602005005, "lr": 2.568031951677389e-06, "epoch": 1.5571806167400881, "percentage": 77.85, "elapsed_time": "6:30:22", "remaining_time": "1:51:02"} +{"current_steps": 4420, "total_steps": 5676, "loss": 0.7101249098777771, "lr": 2.5641349299347196e-06, "epoch": 1.557533039647577, "percentage": 77.87, "elapsed_time": "6:30:28", "remaining_time": "1:50:57"} +{"current_steps": 4421, "total_steps": 5676, "loss": 0.5734864473342896, "lr": 2.560240432436831e-06, "epoch": 1.557885462555066, "percentage": 77.89, "elapsed_time": "6:30:33", "remaining_time": "1:50:52"} +{"current_steps": 4422, "total_steps": 5676, "loss": 0.48660311102867126, "lr": 2.5563484605057854e-06, "epoch": 1.558237885462555, "percentage": 77.91, "elapsed_time": "6:30:37", "remaining_time": "1:50:46"} +{"current_steps": 4423, "total_steps": 5676, "loss": 0.6442986726760864, "lr": 2.552459015462776e-06, "epoch": 1.558590308370044, "percentage": 77.92, "elapsed_time": "6:30:43", "remaining_time": "1:50:41"} +{"current_steps": 4424, "total_steps": 5676, "loss": 0.5871995091438293, "lr": 2.548572098628158e-06, "epoch": 1.558942731277533, "percentage": 77.94, "elapsed_time": "6:30:49", "remaining_time": "1:50:36"} +{"current_steps": 4425, "total_steps": 5676, "loss": 0.5899579524993896, "lr": 2.544687711321415e-06, "epoch": 1.5592951541850222, "percentage": 77.96, "elapsed_time": "6:30:56", "remaining_time": "1:50:31"} +{"current_steps": 4426, "total_steps": 5676, "loss": 0.571341872215271, "lr": 2.540805854861177e-06, "epoch": 1.5596475770925111, "percentage": 77.98, "elapsed_time": "6:31:01", "remaining_time": "1:50:26"} +{"current_steps": 4427, "total_steps": 5676, "loss": 0.6297308206558228, "lr": 2.5369265305652112e-06, "epoch": 1.56, "percentage": 78.0, "elapsed_time": "6:31:07", "remaining_time": "1:50:20"} +{"current_steps": 4428, "total_steps": 5676, "loss": 0.6277692317962646, "lr": 2.5330497397504274e-06, "epoch": 1.560352422907489, "percentage": 78.01, "elapsed_time": "6:31:12", "remaining_time": "1:50:15"} +{"current_steps": 4429, "total_steps": 5676, "loss": 0.5124595165252686, "lr": 2.5291754837328787e-06, "epoch": 1.560704845814978, "percentage": 78.03, "elapsed_time": "6:31:18", "remaining_time": "1:50:10"} +{"current_steps": 4430, "total_steps": 5676, "loss": 0.6777669191360474, "lr": 2.5253037638277557e-06, "epoch": 1.5610572687224669, "percentage": 78.05, "elapsed_time": "6:31:22", "remaining_time": "1:50:04"} +{"current_steps": 4431, "total_steps": 5676, "loss": 0.7380247116088867, "lr": 2.521434581349378e-06, "epoch": 1.5614096916299558, "percentage": 78.07, "elapsed_time": "6:31:27", "remaining_time": "1:49:59"} +{"current_steps": 4432, "total_steps": 5676, "loss": 0.6605849266052246, "lr": 2.5175679376112206e-06, "epoch": 1.561762114537445, "percentage": 78.08, "elapsed_time": "6:31:32", "remaining_time": "1:49:54"} +{"current_steps": 4433, "total_steps": 5676, "loss": 0.5688329935073853, "lr": 2.5137038339258837e-06, "epoch": 1.562114537444934, "percentage": 78.1, "elapsed_time": "6:31:37", "remaining_time": "1:49:48"} +{"current_steps": 4434, "total_steps": 5676, "loss": 0.6731508374214172, "lr": 2.5098422716051197e-06, "epoch": 1.562466960352423, "percentage": 78.12, "elapsed_time": "6:31:43", "remaining_time": "1:49:43"} +{"current_steps": 4435, "total_steps": 5676, "loss": 0.5177330374717712, "lr": 2.505983251959798e-06, "epoch": 1.562819383259912, "percentage": 78.14, "elapsed_time": "6:31:49", "remaining_time": "1:49:38"} +{"current_steps": 4436, "total_steps": 5676, "loss": 0.5307918787002563, "lr": 2.502126776299938e-06, "epoch": 1.563171806167401, "percentage": 78.15, "elapsed_time": "6:31:55", "remaining_time": "1:49:33"} +{"current_steps": 4437, "total_steps": 5676, "loss": 0.59647536277771, "lr": 2.4982728459346974e-06, "epoch": 1.5635242290748899, "percentage": 78.17, "elapsed_time": "6:32:00", "remaining_time": "1:49:27"} +{"current_steps": 4438, "total_steps": 5676, "loss": 0.6215553283691406, "lr": 2.494421462172365e-06, "epoch": 1.5638766519823788, "percentage": 78.19, "elapsed_time": "6:32:05", "remaining_time": "1:49:22"} +{"current_steps": 4439, "total_steps": 5676, "loss": 0.49461615085601807, "lr": 2.490572626320359e-06, "epoch": 1.5642290748898677, "percentage": 78.21, "elapsed_time": "6:32:10", "remaining_time": "1:49:17"} +{"current_steps": 4440, "total_steps": 5676, "loss": 0.6625338196754456, "lr": 2.486726339685247e-06, "epoch": 1.5645814977973567, "percentage": 78.22, "elapsed_time": "6:32:15", "remaining_time": "1:49:11"} +{"current_steps": 4441, "total_steps": 5676, "loss": 0.4059983193874359, "lr": 2.4828826035727214e-06, "epoch": 1.5649339207048458, "percentage": 78.24, "elapsed_time": "6:32:19", "remaining_time": "1:49:06"} +{"current_steps": 4442, "total_steps": 5676, "loss": 0.6234895586967468, "lr": 2.47904141928761e-06, "epoch": 1.5652863436123348, "percentage": 78.26, "elapsed_time": "6:32:24", "remaining_time": "1:49:00"} +{"current_steps": 4443, "total_steps": 5676, "loss": 0.513421893119812, "lr": 2.4752027881338757e-06, "epoch": 1.565638766519824, "percentage": 78.28, "elapsed_time": "6:32:30", "remaining_time": "1:48:55"} +{"current_steps": 4444, "total_steps": 5676, "loss": 0.6168510913848877, "lr": 2.4713667114146123e-06, "epoch": 1.5659911894273129, "percentage": 78.29, "elapsed_time": "6:32:34", "remaining_time": "1:48:50"} +{"current_steps": 4445, "total_steps": 5676, "loss": 0.5474672317504883, "lr": 2.4675331904320533e-06, "epoch": 1.5663436123348018, "percentage": 78.31, "elapsed_time": "6:32:40", "remaining_time": "1:48:44"} +{"current_steps": 4446, "total_steps": 5676, "loss": 0.7464281916618347, "lr": 2.46370222648756e-06, "epoch": 1.5666960352422907, "percentage": 78.33, "elapsed_time": "6:32:46", "remaining_time": "1:48:39"} +{"current_steps": 4447, "total_steps": 5676, "loss": 0.5890274047851562, "lr": 2.4598738208816155e-06, "epoch": 1.5670484581497797, "percentage": 78.35, "elapsed_time": "6:32:53", "remaining_time": "1:48:34"} +{"current_steps": 4448, "total_steps": 5676, "loss": 0.7577700018882751, "lr": 2.4560479749138554e-06, "epoch": 1.5674008810572686, "percentage": 78.37, "elapsed_time": "6:32:58", "remaining_time": "1:48:29"} +{"current_steps": 4449, "total_steps": 5676, "loss": 0.5374037027359009, "lr": 2.4522246898830302e-06, "epoch": 1.5677533039647578, "percentage": 78.38, "elapsed_time": "6:33:04", "remaining_time": "1:48:24"} +{"current_steps": 4450, "total_steps": 5676, "loss": 0.44840407371520996, "lr": 2.4484039670870286e-06, "epoch": 1.5681057268722467, "percentage": 78.4, "elapsed_time": "6:33:09", "remaining_time": "1:48:19"} +{"current_steps": 4451, "total_steps": 5676, "loss": 0.5144427418708801, "lr": 2.4445858078228647e-06, "epoch": 1.5684581497797356, "percentage": 78.42, "elapsed_time": "6:33:14", "remaining_time": "1:48:13"} +{"current_steps": 4452, "total_steps": 5676, "loss": 0.39119952917099, "lr": 2.440770213386684e-06, "epoch": 1.5688105726872248, "percentage": 78.44, "elapsed_time": "6:33:19", "remaining_time": "1:48:08"} +{"current_steps": 4453, "total_steps": 5676, "loss": 0.5287434458732605, "lr": 2.436957185073766e-06, "epoch": 1.5691629955947137, "percentage": 78.45, "elapsed_time": "6:33:26", "remaining_time": "1:48:03"} +{"current_steps": 4454, "total_steps": 5676, "loss": 0.568587064743042, "lr": 2.4331467241785157e-06, "epoch": 1.5695154185022027, "percentage": 78.47, "elapsed_time": "6:33:29", "remaining_time": "1:47:57"} +{"current_steps": 4455, "total_steps": 5676, "loss": 0.5522792339324951, "lr": 2.429338831994458e-06, "epoch": 1.5698678414096916, "percentage": 78.49, "elapsed_time": "6:33:34", "remaining_time": "1:47:52"} +{"current_steps": 4456, "total_steps": 5676, "loss": 0.48070845007896423, "lr": 2.425533509814262e-06, "epoch": 1.5702202643171805, "percentage": 78.51, "elapsed_time": "6:33:40", "remaining_time": "1:47:47"} +{"current_steps": 4457, "total_steps": 5676, "loss": 0.44293439388275146, "lr": 2.4217307589297135e-06, "epoch": 1.5705726872246695, "percentage": 78.52, "elapsed_time": "6:33:45", "remaining_time": "1:47:41"} +{"current_steps": 4458, "total_steps": 5676, "loss": 0.5753301382064819, "lr": 2.4179305806317266e-06, "epoch": 1.5709251101321586, "percentage": 78.54, "elapsed_time": "6:33:50", "remaining_time": "1:47:36"} +{"current_steps": 4459, "total_steps": 5676, "loss": 0.5873000025749207, "lr": 2.414132976210346e-06, "epoch": 1.5712775330396476, "percentage": 78.56, "elapsed_time": "6:33:55", "remaining_time": "1:47:30"} +{"current_steps": 4460, "total_steps": 5676, "loss": 0.6084823608398438, "lr": 2.410337946954736e-06, "epoch": 1.5716299559471367, "percentage": 78.58, "elapsed_time": "6:34:01", "remaining_time": "1:47:25"} +{"current_steps": 4461, "total_steps": 5676, "loss": 0.541124165058136, "lr": 2.4065454941531963e-06, "epoch": 1.5719823788546257, "percentage": 78.59, "elapsed_time": "6:34:06", "remaining_time": "1:47:20"} +{"current_steps": 4462, "total_steps": 5676, "loss": 0.5170080661773682, "lr": 2.4027556190931446e-06, "epoch": 1.5723348017621146, "percentage": 78.61, "elapsed_time": "6:34:11", "remaining_time": "1:47:15"} +{"current_steps": 4463, "total_steps": 5676, "loss": 0.5613514184951782, "lr": 2.398968323061125e-06, "epoch": 1.5726872246696035, "percentage": 78.63, "elapsed_time": "6:34:16", "remaining_time": "1:47:09"} +{"current_steps": 4464, "total_steps": 5676, "loss": 0.6645728349685669, "lr": 2.395183607342807e-06, "epoch": 1.5730396475770925, "percentage": 78.65, "elapsed_time": "6:34:22", "remaining_time": "1:47:04"} +{"current_steps": 4465, "total_steps": 5676, "loss": 0.7077093124389648, "lr": 2.391401473222983e-06, "epoch": 1.5733920704845814, "percentage": 78.66, "elapsed_time": "6:34:28", "remaining_time": "1:46:59"} +{"current_steps": 4466, "total_steps": 5676, "loss": 0.5687523484230042, "lr": 2.387621921985571e-06, "epoch": 1.5737444933920703, "percentage": 78.68, "elapsed_time": "6:34:34", "remaining_time": "1:46:54"} +{"current_steps": 4467, "total_steps": 5676, "loss": 0.5837362408638, "lr": 2.38384495491361e-06, "epoch": 1.5740969162995595, "percentage": 78.7, "elapsed_time": "6:34:38", "remaining_time": "1:46:48"} +{"current_steps": 4468, "total_steps": 5676, "loss": 0.5552037358283997, "lr": 2.3800705732892615e-06, "epoch": 1.5744493392070484, "percentage": 78.72, "elapsed_time": "6:34:43", "remaining_time": "1:46:43"} +{"current_steps": 4469, "total_steps": 5676, "loss": 0.5502952337265015, "lr": 2.376298778393814e-06, "epoch": 1.5748017621145376, "percentage": 78.74, "elapsed_time": "6:34:47", "remaining_time": "1:46:37"} +{"current_steps": 4470, "total_steps": 5676, "loss": 0.5621509552001953, "lr": 2.3725295715076734e-06, "epoch": 1.5751541850220265, "percentage": 78.75, "elapsed_time": "6:34:51", "remaining_time": "1:46:31"} +{"current_steps": 4471, "total_steps": 5676, "loss": 0.6703782081604004, "lr": 2.3687629539103676e-06, "epoch": 1.5755066079295155, "percentage": 78.77, "elapsed_time": "6:34:55", "remaining_time": "1:46:26"} +{"current_steps": 4472, "total_steps": 5676, "loss": 0.5681235194206238, "lr": 2.3649989268805453e-06, "epoch": 1.5758590308370044, "percentage": 78.79, "elapsed_time": "6:35:01", "remaining_time": "1:46:21"} +{"current_steps": 4473, "total_steps": 5676, "loss": 0.611667811870575, "lr": 2.361237491695978e-06, "epoch": 1.5762114537444933, "percentage": 78.81, "elapsed_time": "6:35:09", "remaining_time": "1:46:16"} +{"current_steps": 4474, "total_steps": 5676, "loss": 0.5758671760559082, "lr": 2.3574786496335546e-06, "epoch": 1.5765638766519823, "percentage": 78.82, "elapsed_time": "6:35:12", "remaining_time": "1:46:10"} +{"current_steps": 4475, "total_steps": 5676, "loss": 0.4865596294403076, "lr": 2.3537224019692863e-06, "epoch": 1.5769162995594712, "percentage": 78.84, "elapsed_time": "6:35:18", "remaining_time": "1:46:05"} +{"current_steps": 4476, "total_steps": 5676, "loss": 0.6356204152107239, "lr": 2.3499687499782976e-06, "epoch": 1.5772687224669604, "percentage": 78.86, "elapsed_time": "6:35:23", "remaining_time": "1:46:00"} +{"current_steps": 4477, "total_steps": 5676, "loss": 0.7177166938781738, "lr": 2.346217694934847e-06, "epoch": 1.5776211453744493, "percentage": 78.88, "elapsed_time": "6:35:29", "remaining_time": "1:45:55"} +{"current_steps": 4478, "total_steps": 5676, "loss": 0.5727916955947876, "lr": 2.3424692381122882e-06, "epoch": 1.5779735682819385, "percentage": 78.89, "elapsed_time": "6:35:36", "remaining_time": "1:45:50"} +{"current_steps": 4479, "total_steps": 5676, "loss": 0.4904511570930481, "lr": 2.3387233807831144e-06, "epoch": 1.5783259911894274, "percentage": 78.91, "elapsed_time": "6:35:42", "remaining_time": "1:45:45"} +{"current_steps": 4480, "total_steps": 5676, "loss": 0.6029622554779053, "lr": 2.3349801242189262e-06, "epoch": 1.5786784140969163, "percentage": 78.93, "elapsed_time": "6:35:47", "remaining_time": "1:45:39"} +{"current_steps": 4481, "total_steps": 5676, "loss": 0.6462864875793457, "lr": 2.3312394696904404e-06, "epoch": 1.5790308370044053, "percentage": 78.95, "elapsed_time": "6:35:51", "remaining_time": "1:45:34"} +{"current_steps": 4482, "total_steps": 5676, "loss": 0.6000367403030396, "lr": 2.327501418467495e-06, "epoch": 1.5793832599118942, "percentage": 78.96, "elapsed_time": "6:35:57", "remaining_time": "1:45:29"} +{"current_steps": 4483, "total_steps": 5676, "loss": 0.5498829483985901, "lr": 2.3237659718190398e-06, "epoch": 1.5797356828193831, "percentage": 78.98, "elapsed_time": "6:36:02", "remaining_time": "1:45:23"} +{"current_steps": 4484, "total_steps": 5676, "loss": 0.5445006489753723, "lr": 2.320033131013142e-06, "epoch": 1.580088105726872, "percentage": 79.0, "elapsed_time": "6:36:06", "remaining_time": "1:45:17"} +{"current_steps": 4485, "total_steps": 5676, "loss": 0.4878338575363159, "lr": 2.316302897316992e-06, "epoch": 1.5804405286343612, "percentage": 79.02, "elapsed_time": "6:36:12", "remaining_time": "1:45:12"} +{"current_steps": 4486, "total_steps": 5676, "loss": 0.473583459854126, "lr": 2.3125752719968763e-06, "epoch": 1.5807929515418502, "percentage": 79.03, "elapsed_time": "6:36:18", "remaining_time": "1:45:07"} +{"current_steps": 4487, "total_steps": 5676, "loss": 0.6530570983886719, "lr": 2.308850256318218e-06, "epoch": 1.5811453744493393, "percentage": 79.05, "elapsed_time": "6:36:23", "remaining_time": "1:45:02"} +{"current_steps": 4488, "total_steps": 5676, "loss": 0.6925215721130371, "lr": 2.30512785154554e-06, "epoch": 1.5814977973568283, "percentage": 79.07, "elapsed_time": "6:36:29", "remaining_time": "1:44:57"} +{"current_steps": 4489, "total_steps": 5676, "loss": 0.6210705637931824, "lr": 2.3014080589424837e-06, "epoch": 1.5818502202643172, "percentage": 79.09, "elapsed_time": "6:36:34", "remaining_time": "1:44:51"} +{"current_steps": 4490, "total_steps": 5676, "loss": 0.5843231678009033, "lr": 2.2976908797718013e-06, "epoch": 1.5822026431718061, "percentage": 79.11, "elapsed_time": "6:36:40", "remaining_time": "1:44:46"} +{"current_steps": 4491, "total_steps": 5676, "loss": 0.7014307379722595, "lr": 2.2939763152953576e-06, "epoch": 1.582555066079295, "percentage": 79.12, "elapsed_time": "6:36:45", "remaining_time": "1:44:41"} +{"current_steps": 4492, "total_steps": 5676, "loss": 0.563744843006134, "lr": 2.2902643667741386e-06, "epoch": 1.582907488986784, "percentage": 79.14, "elapsed_time": "6:36:50", "remaining_time": "1:44:35"} +{"current_steps": 4493, "total_steps": 5676, "loss": 0.6067275404930115, "lr": 2.286555035468233e-06, "epoch": 1.5832599118942732, "percentage": 79.16, "elapsed_time": "6:36:56", "remaining_time": "1:44:30"} +{"current_steps": 4494, "total_steps": 5676, "loss": 0.5471328496932983, "lr": 2.282848322636836e-06, "epoch": 1.5836123348017621, "percentage": 79.18, "elapsed_time": "6:37:03", "remaining_time": "1:44:25"} +{"current_steps": 4495, "total_steps": 5676, "loss": 0.4994550943374634, "lr": 2.2791442295382693e-06, "epoch": 1.583964757709251, "percentage": 79.19, "elapsed_time": "6:37:08", "remaining_time": "1:44:20"} +{"current_steps": 4496, "total_steps": 5676, "loss": 0.6064262390136719, "lr": 2.275442757429954e-06, "epoch": 1.5843171806167402, "percentage": 79.21, "elapsed_time": "6:37:12", "remaining_time": "1:44:15"} +{"current_steps": 4497, "total_steps": 5676, "loss": 0.5119039416313171, "lr": 2.2717439075684268e-06, "epoch": 1.5846696035242291, "percentage": 79.23, "elapsed_time": "6:37:18", "remaining_time": "1:44:09"} +{"current_steps": 4498, "total_steps": 5676, "loss": 0.7276502251625061, "lr": 2.26804768120933e-06, "epoch": 1.585022026431718, "percentage": 79.25, "elapsed_time": "6:37:23", "remaining_time": "1:44:04"} +{"current_steps": 4499, "total_steps": 5676, "loss": 0.6175409555435181, "lr": 2.264354079607416e-06, "epoch": 1.585374449339207, "percentage": 79.26, "elapsed_time": "6:37:28", "remaining_time": "1:43:59"} +{"current_steps": 4500, "total_steps": 5676, "loss": 0.6289592981338501, "lr": 2.2606631040165517e-06, "epoch": 1.585726872246696, "percentage": 79.28, "elapsed_time": "6:37:33", "remaining_time": "1:43:53"} +{"current_steps": 4501, "total_steps": 5676, "loss": 0.5802761316299438, "lr": 2.2569747556897103e-06, "epoch": 1.5860792951541849, "percentage": 79.3, "elapsed_time": "6:37:44", "remaining_time": "1:43:49"} +{"current_steps": 4502, "total_steps": 5676, "loss": 0.5883978605270386, "lr": 2.2532890358789604e-06, "epoch": 1.586431718061674, "percentage": 79.32, "elapsed_time": "6:37:50", "remaining_time": "1:43:44"} +{"current_steps": 4503, "total_steps": 5676, "loss": 0.6915061473846436, "lr": 2.2496059458355e-06, "epoch": 1.586784140969163, "percentage": 79.33, "elapsed_time": "6:37:56", "remaining_time": "1:43:39"} +{"current_steps": 4504, "total_steps": 5676, "loss": 0.6255539655685425, "lr": 2.2459254868096194e-06, "epoch": 1.5871365638766521, "percentage": 79.35, "elapsed_time": "6:38:02", "remaining_time": "1:43:34"} +{"current_steps": 4505, "total_steps": 5676, "loss": 0.6788307428359985, "lr": 2.2422476600507203e-06, "epoch": 1.587488986784141, "percentage": 79.37, "elapsed_time": "6:38:08", "remaining_time": "1:43:29"} +{"current_steps": 4506, "total_steps": 5676, "loss": 0.5651443004608154, "lr": 2.2385724668073104e-06, "epoch": 1.58784140969163, "percentage": 79.39, "elapsed_time": "6:38:14", "remaining_time": "1:43:24"} +{"current_steps": 4507, "total_steps": 5676, "loss": 0.5308901071548462, "lr": 2.2348999083270005e-06, "epoch": 1.588193832599119, "percentage": 79.4, "elapsed_time": "6:38:18", "remaining_time": "1:43:18"} +{"current_steps": 4508, "total_steps": 5676, "loss": 0.60570228099823, "lr": 2.2312299858565156e-06, "epoch": 1.5885462555066079, "percentage": 79.42, "elapsed_time": "6:38:23", "remaining_time": "1:43:13"} +{"current_steps": 4509, "total_steps": 5676, "loss": 0.6544185876846313, "lr": 2.22756270064168e-06, "epoch": 1.5888986784140968, "percentage": 79.44, "elapsed_time": "6:38:27", "remaining_time": "1:43:07"} +{"current_steps": 4510, "total_steps": 5676, "loss": 0.667883038520813, "lr": 2.2238980539274156e-06, "epoch": 1.5892511013215858, "percentage": 79.46, "elapsed_time": "6:38:33", "remaining_time": "1:43:02"} +{"current_steps": 4511, "total_steps": 5676, "loss": 0.647671103477478, "lr": 2.2202360469577622e-06, "epoch": 1.589603524229075, "percentage": 79.47, "elapsed_time": "6:38:38", "remaining_time": "1:42:57"} +{"current_steps": 4512, "total_steps": 5676, "loss": 0.6990867257118225, "lr": 2.216576680975856e-06, "epoch": 1.5899559471365639, "percentage": 79.49, "elapsed_time": "6:38:42", "remaining_time": "1:42:51"} +{"current_steps": 4513, "total_steps": 5676, "loss": 0.6292023658752441, "lr": 2.212919957223938e-06, "epoch": 1.590308370044053, "percentage": 79.51, "elapsed_time": "6:38:49", "remaining_time": "1:42:46"} +{"current_steps": 4514, "total_steps": 5676, "loss": 0.638721227645874, "lr": 2.2092658769433504e-06, "epoch": 1.590660792951542, "percentage": 79.53, "elapsed_time": "6:38:54", "remaining_time": "1:42:41"} +{"current_steps": 4515, "total_steps": 5676, "loss": 0.5622225403785706, "lr": 2.2056144413745396e-06, "epoch": 1.5910132158590309, "percentage": 79.55, "elapsed_time": "6:38:59", "remaining_time": "1:42:35"} +{"current_steps": 4516, "total_steps": 5676, "loss": 0.44093507528305054, "lr": 2.2019656517570576e-06, "epoch": 1.5913656387665198, "percentage": 79.56, "elapsed_time": "6:39:05", "remaining_time": "1:42:30"} +{"current_steps": 4517, "total_steps": 5676, "loss": 0.6889619827270508, "lr": 2.198319509329556e-06, "epoch": 1.5917180616740088, "percentage": 79.58, "elapsed_time": "6:39:10", "remaining_time": "1:42:25"} +{"current_steps": 4518, "total_steps": 5676, "loss": 0.5873552560806274, "lr": 2.1946760153297773e-06, "epoch": 1.5920704845814977, "percentage": 79.6, "elapsed_time": "6:39:15", "remaining_time": "1:42:19"} +{"current_steps": 4519, "total_steps": 5676, "loss": 0.7172325849533081, "lr": 2.191035170994584e-06, "epoch": 1.5924229074889866, "percentage": 79.62, "elapsed_time": "6:39:20", "remaining_time": "1:42:14"} +{"current_steps": 4520, "total_steps": 5676, "loss": 0.520845890045166, "lr": 2.187396977559927e-06, "epoch": 1.5927753303964758, "percentage": 79.63, "elapsed_time": "6:39:25", "remaining_time": "1:42:09"} +{"current_steps": 4521, "total_steps": 5676, "loss": 0.5241606831550598, "lr": 2.1837614362608574e-06, "epoch": 1.5931277533039647, "percentage": 79.65, "elapsed_time": "6:39:31", "remaining_time": "1:42:04"} +{"current_steps": 4522, "total_steps": 5676, "loss": 0.583808422088623, "lr": 2.1801285483315303e-06, "epoch": 1.5934801762114539, "percentage": 79.67, "elapsed_time": "6:39:36", "remaining_time": "1:41:58"} +{"current_steps": 4523, "total_steps": 5676, "loss": 0.4648814797401428, "lr": 2.1764983150051955e-06, "epoch": 1.5938325991189428, "percentage": 79.69, "elapsed_time": "6:39:39", "remaining_time": "1:41:52"} +{"current_steps": 4524, "total_steps": 5676, "loss": 0.590090274810791, "lr": 2.1728707375142087e-06, "epoch": 1.5941850220264318, "percentage": 79.7, "elapsed_time": "6:39:44", "remaining_time": "1:41:47"} +{"current_steps": 4525, "total_steps": 5676, "loss": 0.6554102897644043, "lr": 2.16924581709002e-06, "epoch": 1.5945374449339207, "percentage": 79.72, "elapsed_time": "6:39:50", "remaining_time": "1:41:42"} +{"current_steps": 4526, "total_steps": 5676, "loss": 0.5880511999130249, "lr": 2.1656235549631677e-06, "epoch": 1.5948898678414096, "percentage": 79.74, "elapsed_time": "6:39:55", "remaining_time": "1:41:37"} +{"current_steps": 4527, "total_steps": 5676, "loss": 0.5779908299446106, "lr": 2.1620039523633074e-06, "epoch": 1.5952422907488986, "percentage": 79.76, "elapsed_time": "6:40:01", "remaining_time": "1:41:31"} +{"current_steps": 4528, "total_steps": 5676, "loss": 0.5030412673950195, "lr": 2.1583870105191775e-06, "epoch": 1.5955947136563877, "percentage": 79.77, "elapsed_time": "6:40:05", "remaining_time": "1:41:26"} +{"current_steps": 4529, "total_steps": 5676, "loss": 0.5667461156845093, "lr": 2.1547727306586173e-06, "epoch": 1.5959471365638767, "percentage": 79.79, "elapsed_time": "6:40:11", "remaining_time": "1:41:21"} +{"current_steps": 4530, "total_steps": 5676, "loss": 0.6820607781410217, "lr": 2.151161114008563e-06, "epoch": 1.5962995594713656, "percentage": 79.81, "elapsed_time": "6:40:16", "remaining_time": "1:41:15"} +{"current_steps": 4531, "total_steps": 5676, "loss": 0.6165209412574768, "lr": 2.1475521617950425e-06, "epoch": 1.5966519823788548, "percentage": 79.83, "elapsed_time": "6:40:21", "remaining_time": "1:41:10"} +{"current_steps": 4532, "total_steps": 5676, "loss": 0.5987168550491333, "lr": 2.1439458752431887e-06, "epoch": 1.5970044052863437, "percentage": 79.84, "elapsed_time": "6:40:27", "remaining_time": "1:41:05"} +{"current_steps": 4533, "total_steps": 5676, "loss": 0.5161086320877075, "lr": 2.1403422555772226e-06, "epoch": 1.5973568281938326, "percentage": 79.86, "elapsed_time": "6:40:33", "remaining_time": "1:41:00"} +{"current_steps": 4534, "total_steps": 5676, "loss": 0.5216903686523438, "lr": 2.1367413040204543e-06, "epoch": 1.5977092511013216, "percentage": 79.88, "elapsed_time": "6:40:38", "remaining_time": "1:40:54"} +{"current_steps": 4535, "total_steps": 5676, "loss": 0.5664666891098022, "lr": 2.133143021795302e-06, "epoch": 1.5980616740088105, "percentage": 79.9, "elapsed_time": "6:40:44", "remaining_time": "1:40:49"} +{"current_steps": 4536, "total_steps": 5676, "loss": 0.501051127910614, "lr": 2.129547410123268e-06, "epoch": 1.5984140969162994, "percentage": 79.92, "elapsed_time": "6:40:51", "remaining_time": "1:40:44"} +{"current_steps": 4537, "total_steps": 5676, "loss": 0.5466792583465576, "lr": 2.1259544702249515e-06, "epoch": 1.5987665198237886, "percentage": 79.93, "elapsed_time": "6:40:56", "remaining_time": "1:40:39"} +{"current_steps": 4538, "total_steps": 5676, "loss": 0.5295613408088684, "lr": 2.122364203320043e-06, "epoch": 1.5991189427312775, "percentage": 79.95, "elapsed_time": "6:41:01", "remaining_time": "1:40:33"} +{"current_steps": 4539, "total_steps": 5676, "loss": 0.5406922101974487, "lr": 2.1187766106273224e-06, "epoch": 1.5994713656387667, "percentage": 79.97, "elapsed_time": "6:41:06", "remaining_time": "1:40:28"} +{"current_steps": 4540, "total_steps": 5676, "loss": 0.5908178687095642, "lr": 2.1151916933646764e-06, "epoch": 1.5998237885462556, "percentage": 79.99, "elapsed_time": "6:41:11", "remaining_time": "1:40:23"} +{"current_steps": 4541, "total_steps": 5676, "loss": 0.6207743883132935, "lr": 2.1116094527490594e-06, "epoch": 1.6001762114537446, "percentage": 80.0, "elapsed_time": "6:41:17", "remaining_time": "1:40:18"} +{"current_steps": 4542, "total_steps": 5676, "loss": 0.5655614137649536, "lr": 2.1080298899965413e-06, "epoch": 1.6005286343612335, "percentage": 80.02, "elapsed_time": "6:41:22", "remaining_time": "1:40:12"} +{"current_steps": 4543, "total_steps": 5676, "loss": 0.6019319295883179, "lr": 2.104453006322268e-06, "epoch": 1.6008810572687224, "percentage": 80.04, "elapsed_time": "6:41:28", "remaining_time": "1:40:07"} +{"current_steps": 4544, "total_steps": 5676, "loss": 0.6109766364097595, "lr": 2.1008788029404794e-06, "epoch": 1.6012334801762114, "percentage": 80.06, "elapsed_time": "6:41:32", "remaining_time": "1:40:02"} +{"current_steps": 4545, "total_steps": 5676, "loss": 0.5309078693389893, "lr": 2.0973072810645078e-06, "epoch": 1.6015859030837003, "percentage": 80.07, "elapsed_time": "6:41:37", "remaining_time": "1:39:56"} +{"current_steps": 4546, "total_steps": 5676, "loss": 0.5440298318862915, "lr": 2.093738441906774e-06, "epoch": 1.6019383259911895, "percentage": 80.09, "elapsed_time": "6:41:41", "remaining_time": "1:39:50"} +{"current_steps": 4547, "total_steps": 5676, "loss": 0.46502384543418884, "lr": 2.0901722866787842e-06, "epoch": 1.6022907488986784, "percentage": 80.11, "elapsed_time": "6:41:48", "remaining_time": "1:39:45"} +{"current_steps": 4548, "total_steps": 5676, "loss": 0.4822906255722046, "lr": 2.086608816591146e-06, "epoch": 1.6026431718061676, "percentage": 80.13, "elapsed_time": "6:41:52", "remaining_time": "1:39:40"} +{"current_steps": 4549, "total_steps": 5676, "loss": 0.6382625699043274, "lr": 2.083048032853534e-06, "epoch": 1.6029955947136565, "percentage": 80.14, "elapsed_time": "6:41:57", "remaining_time": "1:39:34"} +{"current_steps": 4550, "total_steps": 5676, "loss": 0.6070914268493652, "lr": 2.0794899366747334e-06, "epoch": 1.6033480176211454, "percentage": 80.16, "elapsed_time": "6:42:02", "remaining_time": "1:39:29"} +{"current_steps": 4551, "total_steps": 5676, "loss": 0.5953283309936523, "lr": 2.0759345292626045e-06, "epoch": 1.6037004405286344, "percentage": 80.18, "elapsed_time": "6:42:07", "remaining_time": "1:39:24"} +{"current_steps": 4552, "total_steps": 5676, "loss": 0.47553640604019165, "lr": 2.0723818118240958e-06, "epoch": 1.6040528634361233, "percentage": 80.2, "elapsed_time": "6:42:12", "remaining_time": "1:39:18"} +{"current_steps": 4553, "total_steps": 5676, "loss": 0.7020712494850159, "lr": 2.0688317855652463e-06, "epoch": 1.6044052863436122, "percentage": 80.21, "elapsed_time": "6:42:18", "remaining_time": "1:39:13"} +{"current_steps": 4554, "total_steps": 5676, "loss": 0.5998836159706116, "lr": 2.0652844516911776e-06, "epoch": 1.6047577092511012, "percentage": 80.23, "elapsed_time": "6:42:24", "remaining_time": "1:39:08"} +{"current_steps": 4555, "total_steps": 5676, "loss": 0.6501786708831787, "lr": 2.0617398114060983e-06, "epoch": 1.6051101321585903, "percentage": 80.25, "elapsed_time": "6:42:29", "remaining_time": "1:39:03"} +{"current_steps": 4556, "total_steps": 5676, "loss": 0.6444278955459595, "lr": 2.0581978659133097e-06, "epoch": 1.6054625550660793, "percentage": 80.27, "elapsed_time": "6:42:34", "remaining_time": "1:38:57"} +{"current_steps": 4557, "total_steps": 5676, "loss": 0.6756579875946045, "lr": 2.0546586164151827e-06, "epoch": 1.6058149779735684, "percentage": 80.29, "elapsed_time": "6:42:39", "remaining_time": "1:38:52"} +{"current_steps": 4558, "total_steps": 5676, "loss": 0.6043737530708313, "lr": 2.051122064113189e-06, "epoch": 1.6061674008810574, "percentage": 80.3, "elapsed_time": "6:42:45", "remaining_time": "1:38:47"} +{"current_steps": 4559, "total_steps": 5676, "loss": 0.6504104137420654, "lr": 2.047588210207877e-06, "epoch": 1.6065198237885463, "percentage": 80.32, "elapsed_time": "6:42:51", "remaining_time": "1:38:42"} +{"current_steps": 4560, "total_steps": 5676, "loss": 0.6586685180664062, "lr": 2.044057055898879e-06, "epoch": 1.6068722466960352, "percentage": 80.34, "elapsed_time": "6:42:56", "remaining_time": "1:38:36"} +{"current_steps": 4561, "total_steps": 5676, "loss": 0.4463368058204651, "lr": 2.0405286023849125e-06, "epoch": 1.6072246696035242, "percentage": 80.36, "elapsed_time": "6:43:02", "remaining_time": "1:38:31"} +{"current_steps": 4562, "total_steps": 5676, "loss": 0.5208157896995544, "lr": 2.037002850863777e-06, "epoch": 1.607577092511013, "percentage": 80.37, "elapsed_time": "6:43:07", "remaining_time": "1:38:26"} +{"current_steps": 4563, "total_steps": 5676, "loss": 0.612602174282074, "lr": 2.033479802532354e-06, "epoch": 1.607929515418502, "percentage": 80.39, "elapsed_time": "6:43:12", "remaining_time": "1:38:21"} +{"current_steps": 4564, "total_steps": 5676, "loss": 0.6871482133865356, "lr": 2.0299594585866166e-06, "epoch": 1.6082819383259912, "percentage": 80.41, "elapsed_time": "6:43:18", "remaining_time": "1:38:15"} +{"current_steps": 4565, "total_steps": 5676, "loss": 0.5770177245140076, "lr": 2.0264418202215998e-06, "epoch": 1.6086343612334801, "percentage": 80.43, "elapsed_time": "6:43:22", "remaining_time": "1:38:10"} +{"current_steps": 4566, "total_steps": 5676, "loss": 0.600841224193573, "lr": 2.0229268886314413e-06, "epoch": 1.6089867841409693, "percentage": 80.44, "elapsed_time": "6:43:25", "remaining_time": "1:38:04"} +{"current_steps": 4567, "total_steps": 5676, "loss": 0.47742071747779846, "lr": 2.0194146650093494e-06, "epoch": 1.6093392070484582, "percentage": 80.46, "elapsed_time": "6:43:32", "remaining_time": "1:37:59"} +{"current_steps": 4568, "total_steps": 5676, "loss": 0.5528711080551147, "lr": 2.015905150547612e-06, "epoch": 1.6096916299559472, "percentage": 80.48, "elapsed_time": "6:43:37", "remaining_time": "1:37:54"} +{"current_steps": 4569, "total_steps": 5676, "loss": 0.6892603635787964, "lr": 2.0123983464376028e-06, "epoch": 1.610044052863436, "percentage": 80.5, "elapsed_time": "6:43:43", "remaining_time": "1:37:48"} +{"current_steps": 4570, "total_steps": 5676, "loss": 0.593653678894043, "lr": 2.0088942538697687e-06, "epoch": 1.610396475770925, "percentage": 80.51, "elapsed_time": "6:43:47", "remaining_time": "1:37:43"} +{"current_steps": 4571, "total_steps": 5676, "loss": 0.5570813417434692, "lr": 2.005392874033646e-06, "epoch": 1.610748898678414, "percentage": 80.53, "elapsed_time": "6:43:53", "remaining_time": "1:37:38"} +{"current_steps": 4572, "total_steps": 5676, "loss": 0.6723357439041138, "lr": 2.0018942081178426e-06, "epoch": 1.6111013215859031, "percentage": 80.55, "elapsed_time": "6:43:58", "remaining_time": "1:37:32"} +{"current_steps": 4573, "total_steps": 5676, "loss": 0.5333940982818604, "lr": 1.9983982573100413e-06, "epoch": 1.611453744493392, "percentage": 80.57, "elapsed_time": "6:44:03", "remaining_time": "1:37:27"} +{"current_steps": 4574, "total_steps": 5676, "loss": 0.6404193043708801, "lr": 1.9949050227970148e-06, "epoch": 1.611806167400881, "percentage": 80.58, "elapsed_time": "6:44:07", "remaining_time": "1:37:21"} +{"current_steps": 4575, "total_steps": 5676, "loss": 0.6831241250038147, "lr": 1.991414505764605e-06, "epoch": 1.6121585903083702, "percentage": 80.6, "elapsed_time": "6:44:12", "remaining_time": "1:37:16"} +{"current_steps": 4576, "total_steps": 5676, "loss": 0.6741847991943359, "lr": 1.9879267073977337e-06, "epoch": 1.612511013215859, "percentage": 80.62, "elapsed_time": "6:44:18", "remaining_time": "1:37:11"} +{"current_steps": 4577, "total_steps": 5676, "loss": 0.5234787464141846, "lr": 1.9844416288804004e-06, "epoch": 1.612863436123348, "percentage": 80.64, "elapsed_time": "6:44:24", "remaining_time": "1:37:06"} +{"current_steps": 4578, "total_steps": 5676, "loss": 0.6462803483009338, "lr": 1.9809592713956782e-06, "epoch": 1.613215859030837, "percentage": 80.66, "elapsed_time": "6:44:28", "remaining_time": "1:37:00"} +{"current_steps": 4579, "total_steps": 5676, "loss": 0.612025797367096, "lr": 1.977479636125724e-06, "epoch": 1.613568281938326, "percentage": 80.67, "elapsed_time": "6:44:34", "remaining_time": "1:36:55"} +{"current_steps": 4580, "total_steps": 5676, "loss": 0.5065322518348694, "lr": 1.9740027242517668e-06, "epoch": 1.6139207048458148, "percentage": 80.69, "elapsed_time": "6:44:40", "remaining_time": "1:36:50"} +{"current_steps": 4581, "total_steps": 5676, "loss": 0.4986911714076996, "lr": 1.9705285369540994e-06, "epoch": 1.614273127753304, "percentage": 80.71, "elapsed_time": "6:44:45", "remaining_time": "1:36:44"} +{"current_steps": 4582, "total_steps": 5676, "loss": 0.6030969619750977, "lr": 1.967057075412111e-06, "epoch": 1.614625550660793, "percentage": 80.73, "elapsed_time": "6:44:51", "remaining_time": "1:36:39"} +{"current_steps": 4583, "total_steps": 5676, "loss": 0.6116718649864197, "lr": 1.963588340804251e-06, "epoch": 1.614977973568282, "percentage": 80.74, "elapsed_time": "6:44:57", "remaining_time": "1:36:34"} +{"current_steps": 4584, "total_steps": 5676, "loss": 0.8064850568771362, "lr": 1.960122334308047e-06, "epoch": 1.615330396475771, "percentage": 80.76, "elapsed_time": "6:45:03", "remaining_time": "1:36:29"} +{"current_steps": 4585, "total_steps": 5676, "loss": 0.7416974306106567, "lr": 1.9566590571000997e-06, "epoch": 1.61568281938326, "percentage": 80.78, "elapsed_time": "6:45:07", "remaining_time": "1:36:24"} +{"current_steps": 4586, "total_steps": 5676, "loss": 0.48169833421707153, "lr": 1.9531985103560813e-06, "epoch": 1.616035242290749, "percentage": 80.8, "elapsed_time": "6:45:12", "remaining_time": "1:36:18"} +{"current_steps": 4587, "total_steps": 5676, "loss": 0.7766422629356384, "lr": 1.949740695250746e-06, "epoch": 1.6163876651982378, "percentage": 80.81, "elapsed_time": "6:45:17", "remaining_time": "1:36:13"} +{"current_steps": 4588, "total_steps": 5676, "loss": 0.3793888986110687, "lr": 1.9462856129579144e-06, "epoch": 1.6167400881057268, "percentage": 80.83, "elapsed_time": "6:45:22", "remaining_time": "1:36:07"} +{"current_steps": 4589, "total_steps": 5676, "loss": 0.6129955053329468, "lr": 1.94283326465047e-06, "epoch": 1.6170925110132157, "percentage": 80.85, "elapsed_time": "6:45:28", "remaining_time": "1:36:02"} +{"current_steps": 4590, "total_steps": 5676, "loss": 0.7420347929000854, "lr": 1.9393836515003874e-06, "epoch": 1.6174449339207049, "percentage": 80.87, "elapsed_time": "6:45:35", "remaining_time": "1:35:57"} +{"current_steps": 4591, "total_steps": 5676, "loss": 0.49013108015060425, "lr": 1.9359367746786993e-06, "epoch": 1.6177973568281938, "percentage": 80.88, "elapsed_time": "6:45:40", "remaining_time": "1:35:52"} +{"current_steps": 4592, "total_steps": 5676, "loss": 0.5198413133621216, "lr": 1.932492635355513e-06, "epoch": 1.618149779735683, "percentage": 80.9, "elapsed_time": "6:45:46", "remaining_time": "1:35:47"} +{"current_steps": 4593, "total_steps": 5676, "loss": 0.6031092405319214, "lr": 1.929051234700007e-06, "epoch": 1.618502202643172, "percentage": 80.92, "elapsed_time": "6:45:50", "remaining_time": "1:35:41"} +{"current_steps": 4594, "total_steps": 5676, "loss": 0.6269406080245972, "lr": 1.9256125738804264e-06, "epoch": 1.6188546255506608, "percentage": 80.94, "elapsed_time": "6:45:54", "remaining_time": "1:35:36"} +{"current_steps": 4595, "total_steps": 5676, "loss": 0.4518774747848511, "lr": 1.922176654064096e-06, "epoch": 1.6192070484581498, "percentage": 80.95, "elapsed_time": "6:45:59", "remaining_time": "1:35:30"} +{"current_steps": 4596, "total_steps": 5676, "loss": 0.6199424862861633, "lr": 1.9187434764174027e-06, "epoch": 1.6195594713656387, "percentage": 80.97, "elapsed_time": "6:46:04", "remaining_time": "1:35:25"} +{"current_steps": 4597, "total_steps": 5676, "loss": 0.5155355930328369, "lr": 1.9153130421057955e-06, "epoch": 1.6199118942731277, "percentage": 80.99, "elapsed_time": "6:46:09", "remaining_time": "1:35:19"} +{"current_steps": 4598, "total_steps": 5676, "loss": 0.6188424229621887, "lr": 1.9118853522938087e-06, "epoch": 1.6202643171806166, "percentage": 81.01, "elapsed_time": "6:46:16", "remaining_time": "1:35:14"} +{"current_steps": 4599, "total_steps": 5676, "loss": 0.5807337164878845, "lr": 1.908460408145033e-06, "epoch": 1.6206167400881057, "percentage": 81.03, "elapsed_time": "6:46:20", "remaining_time": "1:35:09"} +{"current_steps": 4600, "total_steps": 5676, "loss": 0.5258378982543945, "lr": 1.9050382108221311e-06, "epoch": 1.6209691629955947, "percentage": 81.04, "elapsed_time": "6:46:26", "remaining_time": "1:35:04"} +{"current_steps": 4601, "total_steps": 5676, "loss": 0.6612311601638794, "lr": 1.9016187614868308e-06, "epoch": 1.6213215859030838, "percentage": 81.06, "elapsed_time": "6:46:36", "remaining_time": "1:35:00"} +{"current_steps": 4602, "total_steps": 5676, "loss": 0.611383855342865, "lr": 1.8982020612999285e-06, "epoch": 1.6216740088105728, "percentage": 81.08, "elapsed_time": "6:46:40", "remaining_time": "1:34:54"} +{"current_steps": 4603, "total_steps": 5676, "loss": 0.6111105680465698, "lr": 1.894788111421294e-06, "epoch": 1.6220264317180617, "percentage": 81.1, "elapsed_time": "6:46:45", "remaining_time": "1:34:49"} +{"current_steps": 4604, "total_steps": 5676, "loss": 0.7554557919502258, "lr": 1.8913769130098504e-06, "epoch": 1.6223788546255506, "percentage": 81.11, "elapsed_time": "6:46:52", "remaining_time": "1:34:44"} +{"current_steps": 4605, "total_steps": 5676, "loss": 0.597324013710022, "lr": 1.887968467223591e-06, "epoch": 1.6227312775330396, "percentage": 81.13, "elapsed_time": "6:46:56", "remaining_time": "1:34:38"} +{"current_steps": 4606, "total_steps": 5676, "loss": 0.6232750415802002, "lr": 1.8845627752195839e-06, "epoch": 1.6230837004405285, "percentage": 81.15, "elapsed_time": "6:47:01", "remaining_time": "1:34:33"} +{"current_steps": 4607, "total_steps": 5676, "loss": 0.45699936151504517, "lr": 1.8811598381539543e-06, "epoch": 1.6234361233480175, "percentage": 81.17, "elapsed_time": "6:47:06", "remaining_time": "1:34:27"} +{"current_steps": 4608, "total_steps": 5676, "loss": 0.5307953953742981, "lr": 1.87775965718189e-06, "epoch": 1.6237885462555066, "percentage": 81.18, "elapsed_time": "6:47:10", "remaining_time": "1:34:22"} +{"current_steps": 4609, "total_steps": 5676, "loss": 0.6013764142990112, "lr": 1.8743622334576495e-06, "epoch": 1.6241409691629956, "percentage": 81.2, "elapsed_time": "6:47:15", "remaining_time": "1:34:16"} +{"current_steps": 4610, "total_steps": 5676, "loss": 0.5143340826034546, "lr": 1.8709675681345485e-06, "epoch": 1.6244933920704847, "percentage": 81.22, "elapsed_time": "6:47:20", "remaining_time": "1:34:11"} +{"current_steps": 4611, "total_steps": 5676, "loss": 0.506861686706543, "lr": 1.8675756623649788e-06, "epoch": 1.6248458149779736, "percentage": 81.24, "elapsed_time": "6:47:26", "remaining_time": "1:34:06"} +{"current_steps": 4612, "total_steps": 5676, "loss": 0.6807849407196045, "lr": 1.8641865173003793e-06, "epoch": 1.6251982378854626, "percentage": 81.25, "elapsed_time": "6:47:30", "remaining_time": "1:34:00"} +{"current_steps": 4613, "total_steps": 5676, "loss": 0.592629075050354, "lr": 1.8608001340912573e-06, "epoch": 1.6255506607929515, "percentage": 81.27, "elapsed_time": "6:47:35", "remaining_time": "1:33:55"} +{"current_steps": 4614, "total_steps": 5676, "loss": 0.5669249296188354, "lr": 1.8574165138871925e-06, "epoch": 1.6259030837004405, "percentage": 81.29, "elapsed_time": "6:47:40", "remaining_time": "1:33:50"} +{"current_steps": 4615, "total_steps": 5676, "loss": 0.7123057246208191, "lr": 1.8540356578368135e-06, "epoch": 1.6262555066079294, "percentage": 81.31, "elapsed_time": "6:47:47", "remaining_time": "1:33:45"} +{"current_steps": 4616, "total_steps": 5676, "loss": 0.5844429731369019, "lr": 1.8506575670878168e-06, "epoch": 1.6266079295154185, "percentage": 81.32, "elapsed_time": "6:47:53", "remaining_time": "1:33:39"} +{"current_steps": 4617, "total_steps": 5676, "loss": 0.661457359790802, "lr": 1.8472822427869597e-06, "epoch": 1.6269603524229075, "percentage": 81.34, "elapsed_time": "6:47:59", "remaining_time": "1:33:34"} +{"current_steps": 4618, "total_steps": 5676, "loss": 0.6944575905799866, "lr": 1.8439096860800565e-06, "epoch": 1.6273127753303964, "percentage": 81.36, "elapsed_time": "6:48:04", "remaining_time": "1:33:29"} +{"current_steps": 4619, "total_steps": 5676, "loss": 0.5818712115287781, "lr": 1.8405398981119927e-06, "epoch": 1.6276651982378856, "percentage": 81.38, "elapsed_time": "6:48:10", "remaining_time": "1:33:24"} +{"current_steps": 4620, "total_steps": 5676, "loss": 0.6373921632766724, "lr": 1.8371728800266964e-06, "epoch": 1.6280176211453745, "percentage": 81.4, "elapsed_time": "6:48:14", "remaining_time": "1:33:18"} +{"current_steps": 4621, "total_steps": 5676, "loss": 0.4629862904548645, "lr": 1.8338086329671734e-06, "epoch": 1.6283700440528635, "percentage": 81.41, "elapsed_time": "6:48:19", "remaining_time": "1:33:13"} +{"current_steps": 4622, "total_steps": 5676, "loss": 0.6537790894508362, "lr": 1.8304471580754779e-06, "epoch": 1.6287224669603524, "percentage": 81.43, "elapsed_time": "6:48:24", "remaining_time": "1:33:07"} +{"current_steps": 4623, "total_steps": 5676, "loss": 0.527474582195282, "lr": 1.8270884564927272e-06, "epoch": 1.6290748898678413, "percentage": 81.45, "elapsed_time": "6:48:29", "remaining_time": "1:33:02"} +{"current_steps": 4624, "total_steps": 5676, "loss": 0.48941463232040405, "lr": 1.8237325293590934e-06, "epoch": 1.6294273127753303, "percentage": 81.47, "elapsed_time": "6:48:34", "remaining_time": "1:32:57"} +{"current_steps": 4625, "total_steps": 5676, "loss": 0.6276243925094604, "lr": 1.8203793778138123e-06, "epoch": 1.6297797356828194, "percentage": 81.48, "elapsed_time": "6:48:38", "remaining_time": "1:32:51"} +{"current_steps": 4626, "total_steps": 5676, "loss": 0.6339844465255737, "lr": 1.8170290029951708e-06, "epoch": 1.6301321585903084, "percentage": 81.5, "elapsed_time": "6:48:43", "remaining_time": "1:32:46"} +{"current_steps": 4627, "total_steps": 5676, "loss": 0.517188549041748, "lr": 1.813681406040524e-06, "epoch": 1.6304845814977975, "percentage": 81.52, "elapsed_time": "6:48:50", "remaining_time": "1:32:41"} +{"current_steps": 4628, "total_steps": 5676, "loss": 0.576552152633667, "lr": 1.8103365880862667e-06, "epoch": 1.6308370044052865, "percentage": 81.54, "elapsed_time": "6:48:55", "remaining_time": "1:32:35"} +{"current_steps": 4629, "total_steps": 5676, "loss": 0.5703557729721069, "lr": 1.8069945502678688e-06, "epoch": 1.6311894273127754, "percentage": 81.55, "elapsed_time": "6:49:01", "remaining_time": "1:32:30"} +{"current_steps": 4630, "total_steps": 5676, "loss": 0.538072943687439, "lr": 1.8036552937198447e-06, "epoch": 1.6315418502202643, "percentage": 81.57, "elapsed_time": "6:49:06", "remaining_time": "1:32:25"} +{"current_steps": 4631, "total_steps": 5676, "loss": 0.4144761562347412, "lr": 1.8003188195757693e-06, "epoch": 1.6318942731277533, "percentage": 81.59, "elapsed_time": "6:49:12", "remaining_time": "1:32:20"} +{"current_steps": 4632, "total_steps": 5676, "loss": 0.5357951521873474, "lr": 1.7969851289682704e-06, "epoch": 1.6322466960352422, "percentage": 81.61, "elapsed_time": "6:49:17", "remaining_time": "1:32:14"} +{"current_steps": 4633, "total_steps": 5676, "loss": 0.6158766746520996, "lr": 1.7936542230290333e-06, "epoch": 1.6325991189427311, "percentage": 81.62, "elapsed_time": "6:49:21", "remaining_time": "1:32:09"} +{"current_steps": 4634, "total_steps": 5676, "loss": 0.7278525233268738, "lr": 1.790326102888794e-06, "epoch": 1.6329515418502203, "percentage": 81.64, "elapsed_time": "6:49:26", "remaining_time": "1:32:04"} +{"current_steps": 4635, "total_steps": 5676, "loss": 0.5113881230354309, "lr": 1.787000769677354e-06, "epoch": 1.6333039647577092, "percentage": 81.66, "elapsed_time": "6:49:32", "remaining_time": "1:31:58"} +{"current_steps": 4636, "total_steps": 5676, "loss": 0.6247432827949524, "lr": 1.7836782245235485e-06, "epoch": 1.6336563876651984, "percentage": 81.68, "elapsed_time": "6:49:37", "remaining_time": "1:31:53"} +{"current_steps": 4637, "total_steps": 5676, "loss": 0.513325572013855, "lr": 1.7803584685552877e-06, "epoch": 1.6340088105726873, "percentage": 81.69, "elapsed_time": "6:49:42", "remaining_time": "1:31:48"} +{"current_steps": 4638, "total_steps": 5676, "loss": 0.4980276823043823, "lr": 1.7770415028995213e-06, "epoch": 1.6343612334801763, "percentage": 81.71, "elapsed_time": "6:49:46", "remaining_time": "1:31:42"} +{"current_steps": 4639, "total_steps": 5676, "loss": 0.5832515954971313, "lr": 1.7737273286822565e-06, "epoch": 1.6347136563876652, "percentage": 81.73, "elapsed_time": "6:49:51", "remaining_time": "1:31:37"} +{"current_steps": 4640, "total_steps": 5676, "loss": 0.6030116081237793, "lr": 1.7704159470285532e-06, "epoch": 1.6350660792951541, "percentage": 81.75, "elapsed_time": "6:49:56", "remaining_time": "1:31:31"} +{"current_steps": 4641, "total_steps": 5676, "loss": 0.5494866371154785, "lr": 1.7671073590625188e-06, "epoch": 1.635418502202643, "percentage": 81.77, "elapsed_time": "6:50:01", "remaining_time": "1:31:26"} +{"current_steps": 4642, "total_steps": 5676, "loss": 0.617791473865509, "lr": 1.7638015659073216e-06, "epoch": 1.635770925110132, "percentage": 81.78, "elapsed_time": "6:50:08", "remaining_time": "1:31:21"} +{"current_steps": 4643, "total_steps": 5676, "loss": 0.5213589668273926, "lr": 1.760498568685175e-06, "epoch": 1.6361233480176212, "percentage": 81.8, "elapsed_time": "6:50:13", "remaining_time": "1:31:16"} +{"current_steps": 4644, "total_steps": 5676, "loss": 0.5114192962646484, "lr": 1.7571983685173367e-06, "epoch": 1.63647577092511, "percentage": 81.82, "elapsed_time": "6:50:18", "remaining_time": "1:31:10"} +{"current_steps": 4645, "total_steps": 5676, "loss": 0.6207156181335449, "lr": 1.7539009665241291e-06, "epoch": 1.6368281938325993, "percentage": 81.84, "elapsed_time": "6:50:23", "remaining_time": "1:31:05"} +{"current_steps": 4646, "total_steps": 5676, "loss": 0.5893350839614868, "lr": 1.750606363824915e-06, "epoch": 1.6371806167400882, "percentage": 81.85, "elapsed_time": "6:50:30", "remaining_time": "1:31:00"} +{"current_steps": 4647, "total_steps": 5676, "loss": 0.6453898549079895, "lr": 1.7473145615381092e-06, "epoch": 1.6375330396475771, "percentage": 81.87, "elapsed_time": "6:50:35", "remaining_time": "1:30:55"} +{"current_steps": 4648, "total_steps": 5676, "loss": 0.5098680853843689, "lr": 1.7440255607811773e-06, "epoch": 1.637885462555066, "percentage": 81.89, "elapsed_time": "6:50:41", "remaining_time": "1:30:49"} +{"current_steps": 4649, "total_steps": 5676, "loss": 0.5841408967971802, "lr": 1.7407393626706305e-06, "epoch": 1.638237885462555, "percentage": 81.91, "elapsed_time": "6:50:45", "remaining_time": "1:30:44"} +{"current_steps": 4650, "total_steps": 5676, "loss": 0.5593127012252808, "lr": 1.7374559683220337e-06, "epoch": 1.638590308370044, "percentage": 81.92, "elapsed_time": "6:50:51", "remaining_time": "1:30:39"} +{"current_steps": 4651, "total_steps": 5676, "loss": 0.6885190606117249, "lr": 1.7341753788499983e-06, "epoch": 1.638942731277533, "percentage": 81.94, "elapsed_time": "6:50:55", "remaining_time": "1:30:33"} +{"current_steps": 4652, "total_steps": 5676, "loss": 0.6271092891693115, "lr": 1.730897595368175e-06, "epoch": 1.639295154185022, "percentage": 81.96, "elapsed_time": "6:51:01", "remaining_time": "1:30:28"} +{"current_steps": 4653, "total_steps": 5676, "loss": 0.6035536527633667, "lr": 1.7276226189892763e-06, "epoch": 1.639647577092511, "percentage": 81.98, "elapsed_time": "6:51:06", "remaining_time": "1:30:23"} +{"current_steps": 4654, "total_steps": 5676, "loss": 0.49980080127716064, "lr": 1.724350450825052e-06, "epoch": 1.6400000000000001, "percentage": 81.99, "elapsed_time": "6:51:12", "remaining_time": "1:30:18"} +{"current_steps": 4655, "total_steps": 5676, "loss": 0.6056489944458008, "lr": 1.721081091986303e-06, "epoch": 1.640352422907489, "percentage": 82.01, "elapsed_time": "6:51:17", "remaining_time": "1:30:12"} +{"current_steps": 4656, "total_steps": 5676, "loss": 0.5611459016799927, "lr": 1.717814543582873e-06, "epoch": 1.640704845814978, "percentage": 82.03, "elapsed_time": "6:51:22", "remaining_time": "1:30:07"} +{"current_steps": 4657, "total_steps": 5676, "loss": 0.5655262470245361, "lr": 1.7145508067236515e-06, "epoch": 1.641057268722467, "percentage": 82.05, "elapsed_time": "6:51:27", "remaining_time": "1:30:01"} +{"current_steps": 4658, "total_steps": 5676, "loss": 0.7793601751327515, "lr": 1.7112898825165814e-06, "epoch": 1.6414096916299559, "percentage": 82.06, "elapsed_time": "6:51:32", "remaining_time": "1:29:56"} +{"current_steps": 4659, "total_steps": 5676, "loss": 0.6587018370628357, "lr": 1.7080317720686434e-06, "epoch": 1.6417621145374448, "percentage": 82.08, "elapsed_time": "6:51:36", "remaining_time": "1:29:50"} +{"current_steps": 4660, "total_steps": 5676, "loss": 0.5546305775642395, "lr": 1.7047764764858598e-06, "epoch": 1.642114537444934, "percentage": 82.1, "elapsed_time": "6:51:40", "remaining_time": "1:29:45"} +{"current_steps": 4661, "total_steps": 5676, "loss": 0.6215736865997314, "lr": 1.7015239968733066e-06, "epoch": 1.642466960352423, "percentage": 82.12, "elapsed_time": "6:51:46", "remaining_time": "1:29:40"} +{"current_steps": 4662, "total_steps": 5676, "loss": 0.5772532224655151, "lr": 1.6982743343350983e-06, "epoch": 1.642819383259912, "percentage": 82.14, "elapsed_time": "6:51:52", "remaining_time": "1:29:35"} +{"current_steps": 4663, "total_steps": 5676, "loss": 0.567034900188446, "lr": 1.6950274899743947e-06, "epoch": 1.643171806167401, "percentage": 82.15, "elapsed_time": "6:51:57", "remaining_time": "1:29:29"} +{"current_steps": 4664, "total_steps": 5676, "loss": 0.5306716561317444, "lr": 1.6917834648933985e-06, "epoch": 1.64352422907489, "percentage": 82.17, "elapsed_time": "6:52:03", "remaining_time": "1:29:24"} +{"current_steps": 4665, "total_steps": 5676, "loss": 0.6691634654998779, "lr": 1.688542260193351e-06, "epoch": 1.6438766519823789, "percentage": 82.19, "elapsed_time": "6:52:08", "remaining_time": "1:29:19"} +{"current_steps": 4666, "total_steps": 5676, "loss": 0.6071977615356445, "lr": 1.6853038769745466e-06, "epoch": 1.6442290748898678, "percentage": 82.21, "elapsed_time": "6:52:12", "remaining_time": "1:29:13"} +{"current_steps": 4667, "total_steps": 5676, "loss": 0.743544340133667, "lr": 1.6820683163363161e-06, "epoch": 1.6445814977973567, "percentage": 82.22, "elapsed_time": "6:52:16", "remaining_time": "1:29:08"} +{"current_steps": 4668, "total_steps": 5676, "loss": 0.5745127201080322, "lr": 1.6788355793770238e-06, "epoch": 1.6449339207048457, "percentage": 82.24, "elapsed_time": "6:52:21", "remaining_time": "1:29:02"} +{"current_steps": 4669, "total_steps": 5676, "loss": 0.5153336524963379, "lr": 1.6756056671940902e-06, "epoch": 1.6452863436123348, "percentage": 82.26, "elapsed_time": "6:52:26", "remaining_time": "1:28:57"} +{"current_steps": 4670, "total_steps": 5676, "loss": 0.5780388116836548, "lr": 1.6723785808839666e-06, "epoch": 1.6456387665198238, "percentage": 82.28, "elapsed_time": "6:52:30", "remaining_time": "1:28:51"} +{"current_steps": 4671, "total_steps": 5676, "loss": 0.601921796798706, "lr": 1.6691543215421513e-06, "epoch": 1.645991189427313, "percentage": 82.29, "elapsed_time": "6:52:35", "remaining_time": "1:28:46"} +{"current_steps": 4672, "total_steps": 5676, "loss": 0.6636123657226562, "lr": 1.6659328902631766e-06, "epoch": 1.6463436123348019, "percentage": 82.31, "elapsed_time": "6:52:41", "remaining_time": "1:28:41"} +{"current_steps": 4673, "total_steps": 5676, "loss": 0.45225393772125244, "lr": 1.6627142881406188e-06, "epoch": 1.6466960352422908, "percentage": 82.33, "elapsed_time": "6:52:46", "remaining_time": "1:28:35"} +{"current_steps": 4674, "total_steps": 5676, "loss": 0.6406756043434143, "lr": 1.6594985162670984e-06, "epoch": 1.6470484581497797, "percentage": 82.35, "elapsed_time": "6:52:50", "remaining_time": "1:28:30"} +{"current_steps": 4675, "total_steps": 5676, "loss": 0.6735906600952148, "lr": 1.6562855757342632e-06, "epoch": 1.6474008810572687, "percentage": 82.36, "elapsed_time": "6:52:55", "remaining_time": "1:28:24"} +{"current_steps": 4676, "total_steps": 5676, "loss": 0.515188992023468, "lr": 1.6530754676328064e-06, "epoch": 1.6477533039647576, "percentage": 82.38, "elapsed_time": "6:52:59", "remaining_time": "1:28:19"} +{"current_steps": 4677, "total_steps": 5676, "loss": 0.5976129174232483, "lr": 1.6498681930524652e-06, "epoch": 1.6481057268722465, "percentage": 82.4, "elapsed_time": "6:53:04", "remaining_time": "1:28:13"} +{"current_steps": 4678, "total_steps": 5676, "loss": 0.7367427945137024, "lr": 1.6466637530820074e-06, "epoch": 1.6484581497797357, "percentage": 82.42, "elapsed_time": "6:53:08", "remaining_time": "1:28:08"} +{"current_steps": 4679, "total_steps": 5676, "loss": 0.5173717737197876, "lr": 1.6434621488092385e-06, "epoch": 1.6488105726872246, "percentage": 82.43, "elapsed_time": "6:53:14", "remaining_time": "1:28:03"} +{"current_steps": 4680, "total_steps": 5676, "loss": 0.7961066961288452, "lr": 1.6402633813210056e-06, "epoch": 1.6491629955947138, "percentage": 82.45, "elapsed_time": "6:53:19", "remaining_time": "1:27:57"} +{"current_steps": 4681, "total_steps": 5676, "loss": 0.8271595239639282, "lr": 1.637067451703187e-06, "epoch": 1.6495154185022027, "percentage": 82.47, "elapsed_time": "6:53:23", "remaining_time": "1:27:52"} +{"current_steps": 4682, "total_steps": 5676, "loss": 0.6818888783454895, "lr": 1.6338743610407103e-06, "epoch": 1.6498678414096917, "percentage": 82.49, "elapsed_time": "6:53:29", "remaining_time": "1:27:47"} +{"current_steps": 4683, "total_steps": 5676, "loss": 0.5168178677558899, "lr": 1.6306841104175219e-06, "epoch": 1.6502202643171806, "percentage": 82.51, "elapsed_time": "6:53:35", "remaining_time": "1:27:42"} +{"current_steps": 4684, "total_steps": 5676, "loss": 0.5792043209075928, "lr": 1.627496700916612e-06, "epoch": 1.6505726872246695, "percentage": 82.52, "elapsed_time": "6:53:41", "remaining_time": "1:27:36"} +{"current_steps": 4685, "total_steps": 5676, "loss": 0.6099069118499756, "lr": 1.624312133620013e-06, "epoch": 1.6509251101321585, "percentage": 82.54, "elapsed_time": "6:53:46", "remaining_time": "1:27:31"} +{"current_steps": 4686, "total_steps": 5676, "loss": 0.4562867283821106, "lr": 1.6211304096087832e-06, "epoch": 1.6512775330396474, "percentage": 82.56, "elapsed_time": "6:53:52", "remaining_time": "1:27:26"} +{"current_steps": 4687, "total_steps": 5676, "loss": 0.48648780584335327, "lr": 1.61795152996302e-06, "epoch": 1.6516299559471366, "percentage": 82.58, "elapsed_time": "6:53:57", "remaining_time": "1:27:20"} +{"current_steps": 4688, "total_steps": 5676, "loss": 0.5351820588111877, "lr": 1.6147754957618533e-06, "epoch": 1.6519823788546255, "percentage": 82.59, "elapsed_time": "6:54:03", "remaining_time": "1:27:15"} +{"current_steps": 4689, "total_steps": 5676, "loss": 0.4646923542022705, "lr": 1.6116023080834442e-06, "epoch": 1.6523348017621147, "percentage": 82.61, "elapsed_time": "6:54:09", "remaining_time": "1:27:10"} +{"current_steps": 4690, "total_steps": 5676, "loss": 0.6257984638214111, "lr": 1.608431968005001e-06, "epoch": 1.6526872246696036, "percentage": 82.63, "elapsed_time": "6:54:16", "remaining_time": "1:27:05"} +{"current_steps": 4691, "total_steps": 5676, "loss": 0.5646224021911621, "lr": 1.605264476602747e-06, "epoch": 1.6530396475770925, "percentage": 82.65, "elapsed_time": "6:54:21", "remaining_time": "1:27:00"} +{"current_steps": 4692, "total_steps": 5676, "loss": 0.6074661612510681, "lr": 1.6020998349519457e-06, "epoch": 1.6533920704845815, "percentage": 82.66, "elapsed_time": "6:54:27", "remaining_time": "1:26:55"} +{"current_steps": 4693, "total_steps": 5676, "loss": 0.703096866607666, "lr": 1.598938044126901e-06, "epoch": 1.6537444933920704, "percentage": 82.68, "elapsed_time": "6:54:30", "remaining_time": "1:26:49"} +{"current_steps": 4694, "total_steps": 5676, "loss": 0.6677542924880981, "lr": 1.5957791052009397e-06, "epoch": 1.6540969162995593, "percentage": 82.7, "elapsed_time": "6:54:36", "remaining_time": "1:26:44"} +{"current_steps": 4695, "total_steps": 5676, "loss": 0.755639910697937, "lr": 1.5926230192464232e-06, "epoch": 1.6544493392070485, "percentage": 82.72, "elapsed_time": "6:54:42", "remaining_time": "1:26:39"} +{"current_steps": 4696, "total_steps": 5676, "loss": 0.7008202075958252, "lr": 1.5894697873347442e-06, "epoch": 1.6548017621145374, "percentage": 82.73, "elapsed_time": "6:54:47", "remaining_time": "1:26:33"} +{"current_steps": 4697, "total_steps": 5676, "loss": 0.5049681067466736, "lr": 1.5863194105363244e-06, "epoch": 1.6551541850220264, "percentage": 82.75, "elapsed_time": "6:54:52", "remaining_time": "1:26:28"} +{"current_steps": 4698, "total_steps": 5676, "loss": 0.7415407299995422, "lr": 1.583171889920626e-06, "epoch": 1.6555066079295155, "percentage": 82.77, "elapsed_time": "6:54:57", "remaining_time": "1:26:23"} +{"current_steps": 4699, "total_steps": 5676, "loss": 0.7949470281600952, "lr": 1.5800272265561256e-06, "epoch": 1.6558590308370045, "percentage": 82.79, "elapsed_time": "6:55:03", "remaining_time": "1:26:17"} +{"current_steps": 4700, "total_steps": 5676, "loss": 0.5892510414123535, "lr": 1.5768854215103435e-06, "epoch": 1.6562114537444934, "percentage": 82.8, "elapsed_time": "6:55:07", "remaining_time": "1:26:12"} +{"current_steps": 4701, "total_steps": 5676, "loss": 0.5357394814491272, "lr": 1.5737464758498243e-06, "epoch": 1.6565638766519823, "percentage": 82.82, "elapsed_time": "6:55:17", "remaining_time": "1:26:08"} +{"current_steps": 4702, "total_steps": 5676, "loss": 0.6078016757965088, "lr": 1.5706103906401416e-06, "epoch": 1.6569162995594713, "percentage": 82.84, "elapsed_time": "6:55:24", "remaining_time": "1:26:02"} +{"current_steps": 4703, "total_steps": 5676, "loss": 0.5858899354934692, "lr": 1.5674771669459e-06, "epoch": 1.6572687224669602, "percentage": 82.86, "elapsed_time": "6:55:28", "remaining_time": "1:25:57"} +{"current_steps": 4704, "total_steps": 5676, "loss": 0.5783329010009766, "lr": 1.5643468058307287e-06, "epoch": 1.6576211453744494, "percentage": 82.88, "elapsed_time": "6:55:34", "remaining_time": "1:25:52"} +{"current_steps": 4705, "total_steps": 5676, "loss": 0.5412800312042236, "lr": 1.561219308357288e-06, "epoch": 1.6579735682819383, "percentage": 82.89, "elapsed_time": "6:55:38", "remaining_time": "1:25:46"} +{"current_steps": 4706, "total_steps": 5676, "loss": 0.5609365701675415, "lr": 1.5580946755872727e-06, "epoch": 1.6583259911894275, "percentage": 82.91, "elapsed_time": "6:55:44", "remaining_time": "1:25:41"} +{"current_steps": 4707, "total_steps": 5676, "loss": 0.45193177461624146, "lr": 1.554972908581388e-06, "epoch": 1.6586784140969164, "percentage": 82.93, "elapsed_time": "6:55:49", "remaining_time": "1:25:36"} +{"current_steps": 4708, "total_steps": 5676, "loss": 0.6402257680892944, "lr": 1.5518540083993838e-06, "epoch": 1.6590308370044053, "percentage": 82.95, "elapsed_time": "6:55:54", "remaining_time": "1:25:30"} +{"current_steps": 4709, "total_steps": 5676, "loss": 0.5956071615219116, "lr": 1.5487379761000276e-06, "epoch": 1.6593832599118943, "percentage": 82.96, "elapsed_time": "6:56:00", "remaining_time": "1:25:25"} +{"current_steps": 4710, "total_steps": 5676, "loss": 0.5975273847579956, "lr": 1.5456248127411156e-06, "epoch": 1.6597356828193832, "percentage": 82.98, "elapsed_time": "6:56:06", "remaining_time": "1:25:20"} +{"current_steps": 4711, "total_steps": 5676, "loss": 0.6914929151535034, "lr": 1.54251451937947e-06, "epoch": 1.6600881057268722, "percentage": 83.0, "elapsed_time": "6:56:11", "remaining_time": "1:25:15"} +{"current_steps": 4712, "total_steps": 5676, "loss": 0.5867592096328735, "lr": 1.5394070970709384e-06, "epoch": 1.660440528634361, "percentage": 83.02, "elapsed_time": "6:56:17", "remaining_time": "1:25:10"} +{"current_steps": 4713, "total_steps": 5676, "loss": 0.6448687314987183, "lr": 1.5363025468703917e-06, "epoch": 1.6607929515418502, "percentage": 83.03, "elapsed_time": "6:56:23", "remaining_time": "1:25:04"} +{"current_steps": 4714, "total_steps": 5676, "loss": 0.5870485305786133, "lr": 1.5332008698317348e-06, "epoch": 1.6611453744493392, "percentage": 83.05, "elapsed_time": "6:56:28", "remaining_time": "1:24:59"} +{"current_steps": 4715, "total_steps": 5676, "loss": 0.6336855888366699, "lr": 1.5301020670078803e-06, "epoch": 1.6614977973568283, "percentage": 83.07, "elapsed_time": "6:56:32", "remaining_time": "1:24:53"} +{"current_steps": 4716, "total_steps": 5676, "loss": 0.5924787521362305, "lr": 1.527006139450784e-06, "epoch": 1.6618502202643173, "percentage": 83.09, "elapsed_time": "6:56:38", "remaining_time": "1:24:48"} +{"current_steps": 4717, "total_steps": 5676, "loss": 0.5817830562591553, "lr": 1.523913088211415e-06, "epoch": 1.6622026431718062, "percentage": 83.1, "elapsed_time": "6:56:45", "remaining_time": "1:24:43"} +{"current_steps": 4718, "total_steps": 5676, "loss": 0.5836390852928162, "lr": 1.5208229143397657e-06, "epoch": 1.6625550660792952, "percentage": 83.12, "elapsed_time": "6:56:52", "remaining_time": "1:24:38"} +{"current_steps": 4719, "total_steps": 5676, "loss": 0.47110515832901, "lr": 1.5177356188848558e-06, "epoch": 1.662907488986784, "percentage": 83.14, "elapsed_time": "6:56:57", "remaining_time": "1:24:33"} +{"current_steps": 4720, "total_steps": 5676, "loss": 0.6376635432243347, "lr": 1.5146512028947225e-06, "epoch": 1.663259911894273, "percentage": 83.16, "elapsed_time": "6:57:01", "remaining_time": "1:24:27"} +{"current_steps": 4721, "total_steps": 5676, "loss": 0.6399784088134766, "lr": 1.5115696674164349e-06, "epoch": 1.663612334801762, "percentage": 83.17, "elapsed_time": "6:57:07", "remaining_time": "1:24:22"} +{"current_steps": 4722, "total_steps": 5676, "loss": 0.5891954898834229, "lr": 1.5084910134960773e-06, "epoch": 1.6639647577092511, "percentage": 83.19, "elapsed_time": "6:57:12", "remaining_time": "1:24:17"} +{"current_steps": 4723, "total_steps": 5676, "loss": 0.6358054876327515, "lr": 1.5054152421787505e-06, "epoch": 1.66431718061674, "percentage": 83.21, "elapsed_time": "6:57:19", "remaining_time": "1:24:12"} +{"current_steps": 4724, "total_steps": 5676, "loss": 0.5072367191314697, "lr": 1.5023423545085892e-06, "epoch": 1.6646696035242292, "percentage": 83.23, "elapsed_time": "6:57:25", "remaining_time": "1:24:07"} +{"current_steps": 4725, "total_steps": 5676, "loss": 0.5549881458282471, "lr": 1.4992723515287423e-06, "epoch": 1.6650220264317181, "percentage": 83.25, "elapsed_time": "6:57:29", "remaining_time": "1:24:01"} +{"current_steps": 4726, "total_steps": 5676, "loss": 0.4833364188671112, "lr": 1.4962052342813804e-06, "epoch": 1.665374449339207, "percentage": 83.26, "elapsed_time": "6:57:36", "remaining_time": "1:23:56"} +{"current_steps": 4727, "total_steps": 5676, "loss": 0.6183017492294312, "lr": 1.4931410038076922e-06, "epoch": 1.665726872246696, "percentage": 83.28, "elapsed_time": "6:57:42", "remaining_time": "1:23:51"} +{"current_steps": 4728, "total_steps": 5676, "loss": 0.4956335127353668, "lr": 1.4900796611478885e-06, "epoch": 1.666079295154185, "percentage": 83.3, "elapsed_time": "6:57:46", "remaining_time": "1:23:46"} +{"current_steps": 4729, "total_steps": 5676, "loss": 0.7345337271690369, "lr": 1.4870212073412027e-06, "epoch": 1.666431718061674, "percentage": 83.32, "elapsed_time": "6:57:51", "remaining_time": "1:23:40"} +{"current_steps": 4730, "total_steps": 5676, "loss": 0.5324833393096924, "lr": 1.4839656434258864e-06, "epoch": 1.6667841409691628, "percentage": 83.33, "elapsed_time": "6:57:58", "remaining_time": "1:23:35"} +{"current_steps": 4731, "total_steps": 5676, "loss": 0.5702322125434875, "lr": 1.4809129704392e-06, "epoch": 1.667136563876652, "percentage": 83.35, "elapsed_time": "6:58:04", "remaining_time": "1:23:30"} +{"current_steps": 4732, "total_steps": 5676, "loss": 0.46188884973526, "lr": 1.4778631894174389e-06, "epoch": 1.667488986784141, "percentage": 83.37, "elapsed_time": "6:58:09", "remaining_time": "1:23:25"} +{"current_steps": 4733, "total_steps": 5676, "loss": 0.5505924224853516, "lr": 1.474816301395906e-06, "epoch": 1.66784140969163, "percentage": 83.39, "elapsed_time": "6:58:15", "remaining_time": "1:23:19"} +{"current_steps": 4734, "total_steps": 5676, "loss": 0.5889710187911987, "lr": 1.4717723074089251e-06, "epoch": 1.668193832599119, "percentage": 83.4, "elapsed_time": "6:58:20", "remaining_time": "1:23:14"} +{"current_steps": 4735, "total_steps": 5676, "loss": 0.5794551372528076, "lr": 1.4687312084898387e-06, "epoch": 1.668546255506608, "percentage": 83.42, "elapsed_time": "6:58:27", "remaining_time": "1:23:09"} +{"current_steps": 4736, "total_steps": 5676, "loss": 0.567146897315979, "lr": 1.4656930056710006e-06, "epoch": 1.668898678414097, "percentage": 83.44, "elapsed_time": "6:58:34", "remaining_time": "1:23:04"} +{"current_steps": 4737, "total_steps": 5676, "loss": 0.5330451130867004, "lr": 1.4626576999837938e-06, "epoch": 1.6692511013215858, "percentage": 83.46, "elapsed_time": "6:58:39", "remaining_time": "1:22:59"} +{"current_steps": 4738, "total_steps": 5676, "loss": 0.5631227493286133, "lr": 1.459625292458604e-06, "epoch": 1.6696035242290748, "percentage": 83.47, "elapsed_time": "6:58:43", "remaining_time": "1:22:53"} +{"current_steps": 4739, "total_steps": 5676, "loss": 0.5145374536514282, "lr": 1.456595784124839e-06, "epoch": 1.669955947136564, "percentage": 83.49, "elapsed_time": "6:58:49", "remaining_time": "1:22:48"} +{"current_steps": 4740, "total_steps": 5676, "loss": 0.59378582239151, "lr": 1.453569176010927e-06, "epoch": 1.6703083700440529, "percentage": 83.51, "elapsed_time": "6:58:54", "remaining_time": "1:22:43"} +{"current_steps": 4741, "total_steps": 5676, "loss": 0.5827980041503906, "lr": 1.4505454691443043e-06, "epoch": 1.6706607929515418, "percentage": 83.53, "elapsed_time": "6:58:58", "remaining_time": "1:22:37"} +{"current_steps": 4742, "total_steps": 5676, "loss": 0.5270858407020569, "lr": 1.4475246645514274e-06, "epoch": 1.671013215859031, "percentage": 83.54, "elapsed_time": "6:59:04", "remaining_time": "1:22:32"} +{"current_steps": 4743, "total_steps": 5676, "loss": 0.4708535373210907, "lr": 1.4445067632577625e-06, "epoch": 1.67136563876652, "percentage": 83.56, "elapsed_time": "6:59:10", "remaining_time": "1:22:27"} +{"current_steps": 4744, "total_steps": 5676, "loss": 0.5505239963531494, "lr": 1.4414917662877924e-06, "epoch": 1.6717180616740088, "percentage": 83.58, "elapsed_time": "6:59:15", "remaining_time": "1:22:22"} +{"current_steps": 4745, "total_steps": 5676, "loss": 0.5841302871704102, "lr": 1.4384796746650221e-06, "epoch": 1.6720704845814978, "percentage": 83.6, "elapsed_time": "6:59:20", "remaining_time": "1:22:16"} +{"current_steps": 4746, "total_steps": 5676, "loss": 0.627534031867981, "lr": 1.4354704894119554e-06, "epoch": 1.6724229074889867, "percentage": 83.62, "elapsed_time": "6:59:26", "remaining_time": "1:22:11"} +{"current_steps": 4747, "total_steps": 5676, "loss": 0.6160094738006592, "lr": 1.4324642115501165e-06, "epoch": 1.6727753303964756, "percentage": 83.63, "elapsed_time": "6:59:30", "remaining_time": "1:22:05"} +{"current_steps": 4748, "total_steps": 5676, "loss": 0.5420609712600708, "lr": 1.4294608421000489e-06, "epoch": 1.6731277533039648, "percentage": 83.65, "elapsed_time": "6:59:35", "remaining_time": "1:22:00"} +{"current_steps": 4749, "total_steps": 5676, "loss": 0.7729714512825012, "lr": 1.4264603820813006e-06, "epoch": 1.6734801762114537, "percentage": 83.67, "elapsed_time": "6:59:40", "remaining_time": "1:21:55"} +{"current_steps": 4750, "total_steps": 5676, "loss": 0.6458747386932373, "lr": 1.4234628325124345e-06, "epoch": 1.6738325991189429, "percentage": 83.69, "elapsed_time": "6:59:47", "remaining_time": "1:21:50"} +{"current_steps": 4751, "total_steps": 5676, "loss": 0.5250670313835144, "lr": 1.4204681944110242e-06, "epoch": 1.6741850220264318, "percentage": 83.7, "elapsed_time": "6:59:53", "remaining_time": "1:21:45"} +{"current_steps": 4752, "total_steps": 5676, "loss": 0.4985695779323578, "lr": 1.4174764687936548e-06, "epoch": 1.6745374449339208, "percentage": 83.72, "elapsed_time": "6:59:58", "remaining_time": "1:21:39"} +{"current_steps": 4753, "total_steps": 5676, "loss": 0.6401104927062988, "lr": 1.4144876566759303e-06, "epoch": 1.6748898678414097, "percentage": 83.74, "elapsed_time": "7:00:02", "remaining_time": "1:21:34"} +{"current_steps": 4754, "total_steps": 5676, "loss": 0.5991432666778564, "lr": 1.4115017590724534e-06, "epoch": 1.6752422907488986, "percentage": 83.76, "elapsed_time": "7:00:08", "remaining_time": "1:21:29"} +{"current_steps": 4755, "total_steps": 5676, "loss": 0.7787071466445923, "lr": 1.4085187769968433e-06, "epoch": 1.6755947136563876, "percentage": 83.77, "elapsed_time": "7:00:14", "remaining_time": "1:21:23"} +{"current_steps": 4756, "total_steps": 5676, "loss": 0.6977101564407349, "lr": 1.4055387114617336e-06, "epoch": 1.6759471365638765, "percentage": 83.79, "elapsed_time": "7:00:20", "remaining_time": "1:21:18"} +{"current_steps": 4757, "total_steps": 5676, "loss": 0.541371762752533, "lr": 1.4025615634787616e-06, "epoch": 1.6762995594713657, "percentage": 83.81, "elapsed_time": "7:00:25", "remaining_time": "1:21:13"} +{"current_steps": 4758, "total_steps": 5676, "loss": 0.5548759698867798, "lr": 1.3995873340585765e-06, "epoch": 1.6766519823788546, "percentage": 83.83, "elapsed_time": "7:00:31", "remaining_time": "1:21:08"} +{"current_steps": 4759, "total_steps": 5676, "loss": 0.6022241115570068, "lr": 1.3966160242108373e-06, "epoch": 1.6770044052863438, "percentage": 83.84, "elapsed_time": "7:00:36", "remaining_time": "1:21:02"} +{"current_steps": 4760, "total_steps": 5676, "loss": 0.4965083599090576, "lr": 1.3936476349442074e-06, "epoch": 1.6773568281938327, "percentage": 83.86, "elapsed_time": "7:00:40", "remaining_time": "1:20:57"} +{"current_steps": 4761, "total_steps": 5676, "loss": 0.6712369918823242, "lr": 1.3906821672663706e-06, "epoch": 1.6777092511013216, "percentage": 83.88, "elapsed_time": "7:00:45", "remaining_time": "1:20:51"} +{"current_steps": 4762, "total_steps": 5676, "loss": 0.6236127614974976, "lr": 1.3877196221840038e-06, "epoch": 1.6780616740088106, "percentage": 83.9, "elapsed_time": "7:00:49", "remaining_time": "1:20:46"} +{"current_steps": 4763, "total_steps": 5676, "loss": 0.7144246101379395, "lr": 1.3847600007027996e-06, "epoch": 1.6784140969162995, "percentage": 83.91, "elapsed_time": "7:00:55", "remaining_time": "1:20:41"} +{"current_steps": 4764, "total_steps": 5676, "loss": 0.650489091873169, "lr": 1.3818033038274602e-06, "epoch": 1.6787665198237884, "percentage": 83.93, "elapsed_time": "7:00:59", "remaining_time": "1:20:35"} +{"current_steps": 4765, "total_steps": 5676, "loss": 0.6711791157722473, "lr": 1.3788495325616912e-06, "epoch": 1.6791189427312774, "percentage": 83.95, "elapsed_time": "7:01:04", "remaining_time": "1:20:30"} +{"current_steps": 4766, "total_steps": 5676, "loss": 0.49629515409469604, "lr": 1.375898687908206e-06, "epoch": 1.6794713656387665, "percentage": 83.97, "elapsed_time": "7:01:10", "remaining_time": "1:20:25"} +{"current_steps": 4767, "total_steps": 5676, "loss": 0.5843443870544434, "lr": 1.372950770868724e-06, "epoch": 1.6798237885462555, "percentage": 83.99, "elapsed_time": "7:01:14", "remaining_time": "1:20:19"} +{"current_steps": 4768, "total_steps": 5676, "loss": 0.6201150417327881, "lr": 1.3700057824439694e-06, "epoch": 1.6801762114537446, "percentage": 84.0, "elapsed_time": "7:01:19", "remaining_time": "1:20:14"} +{"current_steps": 4769, "total_steps": 5676, "loss": 0.6149473190307617, "lr": 1.3670637236336815e-06, "epoch": 1.6805286343612336, "percentage": 84.02, "elapsed_time": "7:01:24", "remaining_time": "1:20:08"} +{"current_steps": 4770, "total_steps": 5676, "loss": 0.476188987493515, "lr": 1.3641245954365868e-06, "epoch": 1.6808810572687225, "percentage": 84.04, "elapsed_time": "7:01:29", "remaining_time": "1:20:03"} +{"current_steps": 4771, "total_steps": 5676, "loss": 0.4850924015045166, "lr": 1.361188398850436e-06, "epoch": 1.6812334801762114, "percentage": 84.06, "elapsed_time": "7:01:35", "remaining_time": "1:19:58"} +{"current_steps": 4772, "total_steps": 5676, "loss": 0.6008634567260742, "lr": 1.3582551348719741e-06, "epoch": 1.6815859030837004, "percentage": 84.07, "elapsed_time": "7:01:40", "remaining_time": "1:19:52"} +{"current_steps": 4773, "total_steps": 5676, "loss": 0.5383377075195312, "lr": 1.3553248044969525e-06, "epoch": 1.6819383259911893, "percentage": 84.09, "elapsed_time": "7:01:46", "remaining_time": "1:19:47"} +{"current_steps": 4774, "total_steps": 5676, "loss": 0.5711555480957031, "lr": 1.3523974087201274e-06, "epoch": 1.6822907488986785, "percentage": 84.11, "elapsed_time": "7:01:51", "remaining_time": "1:19:42"} +{"current_steps": 4775, "total_steps": 5676, "loss": 0.5267810821533203, "lr": 1.3494729485352586e-06, "epoch": 1.6826431718061674, "percentage": 84.13, "elapsed_time": "7:01:57", "remaining_time": "1:19:37"} +{"current_steps": 4776, "total_steps": 5676, "loss": 0.6203084588050842, "lr": 1.3465514249351076e-06, "epoch": 1.6829955947136563, "percentage": 84.14, "elapsed_time": "7:02:02", "remaining_time": "1:19:31"} +{"current_steps": 4777, "total_steps": 5676, "loss": 0.46672314405441284, "lr": 1.3436328389114473e-06, "epoch": 1.6833480176211455, "percentage": 84.16, "elapsed_time": "7:02:08", "remaining_time": "1:19:26"} +{"current_steps": 4778, "total_steps": 5676, "loss": 0.6375850439071655, "lr": 1.3407171914550366e-06, "epoch": 1.6837004405286344, "percentage": 84.18, "elapsed_time": "7:02:13", "remaining_time": "1:19:21"} +{"current_steps": 4779, "total_steps": 5676, "loss": 0.6162304282188416, "lr": 1.337804483555656e-06, "epoch": 1.6840528634361234, "percentage": 84.2, "elapsed_time": "7:02:20", "remaining_time": "1:19:16"} +{"current_steps": 4780, "total_steps": 5676, "loss": 0.7814363241195679, "lr": 1.3348947162020741e-06, "epoch": 1.6844052863436123, "percentage": 84.21, "elapsed_time": "7:02:25", "remaining_time": "1:19:11"} +{"current_steps": 4781, "total_steps": 5676, "loss": 0.47330981492996216, "lr": 1.3319878903820682e-06, "epoch": 1.6847577092511012, "percentage": 84.23, "elapsed_time": "7:02:31", "remaining_time": "1:19:05"} +{"current_steps": 4782, "total_steps": 5676, "loss": 0.759529173374176, "lr": 1.3290840070824163e-06, "epoch": 1.6851101321585902, "percentage": 84.25, "elapsed_time": "7:02:37", "remaining_time": "1:19:00"} +{"current_steps": 4783, "total_steps": 5676, "loss": 0.7727639675140381, "lr": 1.326183067288893e-06, "epoch": 1.6854625550660793, "percentage": 84.27, "elapsed_time": "7:02:42", "remaining_time": "1:18:55"} +{"current_steps": 4784, "total_steps": 5676, "loss": 0.6429058313369751, "lr": 1.3232850719862789e-06, "epoch": 1.6858149779735683, "percentage": 84.28, "elapsed_time": "7:02:47", "remaining_time": "1:18:49"} +{"current_steps": 4785, "total_steps": 5676, "loss": 0.5895540714263916, "lr": 1.3203900221583565e-06, "epoch": 1.6861674008810574, "percentage": 84.3, "elapsed_time": "7:02:52", "remaining_time": "1:18:44"} +{"current_steps": 4786, "total_steps": 5676, "loss": 0.5410366058349609, "lr": 1.317497918787899e-06, "epoch": 1.6865198237885464, "percentage": 84.32, "elapsed_time": "7:02:56", "remaining_time": "1:18:38"} +{"current_steps": 4787, "total_steps": 5676, "loss": 0.5144281387329102, "lr": 1.3146087628566894e-06, "epoch": 1.6868722466960353, "percentage": 84.34, "elapsed_time": "7:03:01", "remaining_time": "1:18:33"} +{"current_steps": 4788, "total_steps": 5676, "loss": 0.7025437355041504, "lr": 1.311722555345506e-06, "epoch": 1.6872246696035242, "percentage": 84.36, "elapsed_time": "7:03:06", "remaining_time": "1:18:28"} +{"current_steps": 4789, "total_steps": 5676, "loss": 0.6000303030014038, "lr": 1.3088392972341256e-06, "epoch": 1.6875770925110132, "percentage": 84.37, "elapsed_time": "7:03:11", "remaining_time": "1:18:22"} +{"current_steps": 4790, "total_steps": 5676, "loss": 0.5732883214950562, "lr": 1.3059589895013248e-06, "epoch": 1.6879295154185021, "percentage": 84.39, "elapsed_time": "7:03:15", "remaining_time": "1:18:17"} +{"current_steps": 4791, "total_steps": 5676, "loss": 0.5908600091934204, "lr": 1.3030816331248785e-06, "epoch": 1.688281938325991, "percentage": 84.41, "elapsed_time": "7:03:21", "remaining_time": "1:18:12"} +{"current_steps": 4792, "total_steps": 5676, "loss": 0.5579310655593872, "lr": 1.3002072290815571e-06, "epoch": 1.6886343612334802, "percentage": 84.43, "elapsed_time": "7:03:27", "remaining_time": "1:18:07"} +{"current_steps": 4793, "total_steps": 5676, "loss": 0.6439195871353149, "lr": 1.2973357783471385e-06, "epoch": 1.6889867841409691, "percentage": 84.44, "elapsed_time": "7:03:33", "remaining_time": "1:18:01"} +{"current_steps": 4794, "total_steps": 5676, "loss": 0.6213329434394836, "lr": 1.2944672818963822e-06, "epoch": 1.6893392070484583, "percentage": 84.46, "elapsed_time": "7:03:39", "remaining_time": "1:17:56"} +{"current_steps": 4795, "total_steps": 5676, "loss": 0.5875385999679565, "lr": 1.291601740703059e-06, "epoch": 1.6896916299559472, "percentage": 84.48, "elapsed_time": "7:03:43", "remaining_time": "1:17:51"} +{"current_steps": 4796, "total_steps": 5676, "loss": 0.7071372270584106, "lr": 1.2887391557399287e-06, "epoch": 1.6900440528634362, "percentage": 84.5, "elapsed_time": "7:03:49", "remaining_time": "1:17:46"} +{"current_steps": 4797, "total_steps": 5676, "loss": 0.504144549369812, "lr": 1.2858795279787517e-06, "epoch": 1.690396475770925, "percentage": 84.51, "elapsed_time": "7:03:55", "remaining_time": "1:17:40"} +{"current_steps": 4798, "total_steps": 5676, "loss": 0.7081021070480347, "lr": 1.2830228583902816e-06, "epoch": 1.690748898678414, "percentage": 84.53, "elapsed_time": "7:04:02", "remaining_time": "1:17:35"} +{"current_steps": 4799, "total_steps": 5676, "loss": 0.5843057632446289, "lr": 1.2801691479442658e-06, "epoch": 1.691101321585903, "percentage": 84.55, "elapsed_time": "7:04:08", "remaining_time": "1:17:30"} +{"current_steps": 4800, "total_steps": 5676, "loss": 0.6437872052192688, "lr": 1.2773183976094571e-06, "epoch": 1.691453744493392, "percentage": 84.57, "elapsed_time": "7:04:12", "remaining_time": "1:17:25"} +{"current_steps": 4801, "total_steps": 5676, "loss": 0.6945379972457886, "lr": 1.2744706083535907e-06, "epoch": 1.691806167400881, "percentage": 84.58, "elapsed_time": "7:04:22", "remaining_time": "1:17:20"} +{"current_steps": 4802, "total_steps": 5676, "loss": 0.5827867984771729, "lr": 1.2716257811434019e-06, "epoch": 1.69215859030837, "percentage": 84.6, "elapsed_time": "7:04:28", "remaining_time": "1:17:15"} +{"current_steps": 4803, "total_steps": 5676, "loss": 0.5298784971237183, "lr": 1.2687839169446259e-06, "epoch": 1.6925110132158592, "percentage": 84.62, "elapsed_time": "7:04:33", "remaining_time": "1:17:10"} +{"current_steps": 4804, "total_steps": 5676, "loss": 0.6885675191879272, "lr": 1.2659450167219834e-06, "epoch": 1.692863436123348, "percentage": 84.64, "elapsed_time": "7:04:38", "remaining_time": "1:17:04"} +{"current_steps": 4805, "total_steps": 5676, "loss": 0.6902546882629395, "lr": 1.2631090814391945e-06, "epoch": 1.693215859030837, "percentage": 84.65, "elapsed_time": "7:04:43", "remaining_time": "1:16:59"} +{"current_steps": 4806, "total_steps": 5676, "loss": 0.5972022414207458, "lr": 1.2602761120589713e-06, "epoch": 1.693568281938326, "percentage": 84.67, "elapsed_time": "7:04:50", "remaining_time": "1:16:54"} +{"current_steps": 4807, "total_steps": 5676, "loss": 0.5527150630950928, "lr": 1.2574461095430145e-06, "epoch": 1.693920704845815, "percentage": 84.69, "elapsed_time": "7:04:55", "remaining_time": "1:16:49"} +{"current_steps": 4808, "total_steps": 5676, "loss": 0.5898724794387817, "lr": 1.2546190748520294e-06, "epoch": 1.6942731277533039, "percentage": 84.71, "elapsed_time": "7:05:01", "remaining_time": "1:16:43"} +{"current_steps": 4809, "total_steps": 5676, "loss": 0.7023364901542664, "lr": 1.2517950089457e-06, "epoch": 1.6946255506607928, "percentage": 84.73, "elapsed_time": "7:05:07", "remaining_time": "1:16:38"} +{"current_steps": 4810, "total_steps": 5676, "loss": 0.6016935706138611, "lr": 1.2489739127827083e-06, "epoch": 1.694977973568282, "percentage": 84.74, "elapsed_time": "7:05:11", "remaining_time": "1:16:33"} +{"current_steps": 4811, "total_steps": 5676, "loss": 0.5724658966064453, "lr": 1.246155787320733e-06, "epoch": 1.6953303964757709, "percentage": 84.76, "elapsed_time": "7:05:17", "remaining_time": "1:16:27"} +{"current_steps": 4812, "total_steps": 5676, "loss": 0.5886126160621643, "lr": 1.2433406335164389e-06, "epoch": 1.69568281938326, "percentage": 84.78, "elapsed_time": "7:05:22", "remaining_time": "1:16:22"} +{"current_steps": 4813, "total_steps": 5676, "loss": 0.6656844615936279, "lr": 1.2405284523254823e-06, "epoch": 1.696035242290749, "percentage": 84.8, "elapsed_time": "7:05:28", "remaining_time": "1:16:17"} +{"current_steps": 4814, "total_steps": 5676, "loss": 0.6208533644676208, "lr": 1.237719244702511e-06, "epoch": 1.696387665198238, "percentage": 84.81, "elapsed_time": "7:05:32", "remaining_time": "1:16:11"} +{"current_steps": 4815, "total_steps": 5676, "loss": 0.6895248889923096, "lr": 1.234913011601162e-06, "epoch": 1.6967400881057269, "percentage": 84.83, "elapsed_time": "7:05:37", "remaining_time": "1:16:06"} +{"current_steps": 4816, "total_steps": 5676, "loss": 0.5525833368301392, "lr": 1.23210975397407e-06, "epoch": 1.6970925110132158, "percentage": 84.85, "elapsed_time": "7:05:43", "remaining_time": "1:16:01"} +{"current_steps": 4817, "total_steps": 5676, "loss": 0.5958225727081299, "lr": 1.2293094727728471e-06, "epoch": 1.6974449339207047, "percentage": 84.87, "elapsed_time": "7:05:50", "remaining_time": "1:15:56"} +{"current_steps": 4818, "total_steps": 5676, "loss": 0.7570905089378357, "lr": 1.226512168948103e-06, "epoch": 1.6977973568281939, "percentage": 84.88, "elapsed_time": "7:05:55", "remaining_time": "1:15:50"} +{"current_steps": 4819, "total_steps": 5676, "loss": 0.586568295955658, "lr": 1.2237178434494379e-06, "epoch": 1.6981497797356828, "percentage": 84.9, "elapsed_time": "7:05:59", "remaining_time": "1:15:45"} +{"current_steps": 4820, "total_steps": 5676, "loss": 0.574797511100769, "lr": 1.2209264972254365e-06, "epoch": 1.6985022026431718, "percentage": 84.92, "elapsed_time": "7:06:04", "remaining_time": "1:15:40"} +{"current_steps": 4821, "total_steps": 5676, "loss": 0.6277909278869629, "lr": 1.2181381312236751e-06, "epoch": 1.698854625550661, "percentage": 84.94, "elapsed_time": "7:06:09", "remaining_time": "1:15:34"} +{"current_steps": 4822, "total_steps": 5676, "loss": 0.6308181285858154, "lr": 1.2153527463907155e-06, "epoch": 1.6992070484581498, "percentage": 84.95, "elapsed_time": "7:06:14", "remaining_time": "1:15:29"} +{"current_steps": 4823, "total_steps": 5676, "loss": 0.5076426267623901, "lr": 1.2125703436721091e-06, "epoch": 1.6995594713656388, "percentage": 84.97, "elapsed_time": "7:06:20", "remaining_time": "1:15:24"} +{"current_steps": 4824, "total_steps": 5676, "loss": 0.7712167501449585, "lr": 1.2097909240123994e-06, "epoch": 1.6999118942731277, "percentage": 84.99, "elapsed_time": "7:06:25", "remaining_time": "1:15:18"} +{"current_steps": 4825, "total_steps": 5676, "loss": 0.5964892506599426, "lr": 1.2070144883551072e-06, "epoch": 1.7002643171806167, "percentage": 85.01, "elapsed_time": "7:06:30", "remaining_time": "1:15:13"} +{"current_steps": 4826, "total_steps": 5676, "loss": 0.7473628520965576, "lr": 1.2042410376427472e-06, "epoch": 1.7006167400881056, "percentage": 85.02, "elapsed_time": "7:06:35", "remaining_time": "1:15:08"} +{"current_steps": 4827, "total_steps": 5676, "loss": 0.6897937059402466, "lr": 1.2014705728168219e-06, "epoch": 1.7009691629955948, "percentage": 85.04, "elapsed_time": "7:06:42", "remaining_time": "1:15:03"} +{"current_steps": 4828, "total_steps": 5676, "loss": 0.5955557823181152, "lr": 1.198703094817817e-06, "epoch": 1.7013215859030837, "percentage": 85.06, "elapsed_time": "7:06:47", "remaining_time": "1:14:57"} +{"current_steps": 4829, "total_steps": 5676, "loss": 0.7051092386245728, "lr": 1.195938604585205e-06, "epoch": 1.7016740088105728, "percentage": 85.08, "elapsed_time": "7:06:52", "remaining_time": "1:14:52"} +{"current_steps": 4830, "total_steps": 5676, "loss": 0.584572434425354, "lr": 1.1931771030574446e-06, "epoch": 1.7020264317180618, "percentage": 85.1, "elapsed_time": "7:06:59", "remaining_time": "1:14:47"} +{"current_steps": 4831, "total_steps": 5676, "loss": 0.5691804885864258, "lr": 1.1904185911719768e-06, "epoch": 1.7023788546255507, "percentage": 85.11, "elapsed_time": "7:07:03", "remaining_time": "1:14:41"} +{"current_steps": 4832, "total_steps": 5676, "loss": 0.5539572834968567, "lr": 1.187663069865237e-06, "epoch": 1.7027312775330397, "percentage": 85.13, "elapsed_time": "7:07:08", "remaining_time": "1:14:36"} +{"current_steps": 4833, "total_steps": 5676, "loss": 0.6229352951049805, "lr": 1.1849105400726324e-06, "epoch": 1.7030837004405286, "percentage": 85.15, "elapsed_time": "7:07:14", "remaining_time": "1:14:31"} +{"current_steps": 4834, "total_steps": 5676, "loss": 0.7302919626235962, "lr": 1.1821610027285613e-06, "epoch": 1.7034361233480175, "percentage": 85.17, "elapsed_time": "7:07:18", "remaining_time": "1:14:25"} +{"current_steps": 4835, "total_steps": 5676, "loss": 0.6802065372467041, "lr": 1.1794144587664113e-06, "epoch": 1.7037885462555065, "percentage": 85.18, "elapsed_time": "7:07:25", "remaining_time": "1:14:20"} +{"current_steps": 4836, "total_steps": 5676, "loss": 0.6448635458946228, "lr": 1.1766709091185447e-06, "epoch": 1.7041409691629956, "percentage": 85.2, "elapsed_time": "7:07:29", "remaining_time": "1:14:15"} +{"current_steps": 4837, "total_steps": 5676, "loss": 0.6141834259033203, "lr": 1.1739303547163138e-06, "epoch": 1.7044933920704846, "percentage": 85.22, "elapsed_time": "7:07:35", "remaining_time": "1:14:10"} +{"current_steps": 4838, "total_steps": 5676, "loss": 0.5634737014770508, "lr": 1.1711927964900482e-06, "epoch": 1.7048458149779737, "percentage": 85.24, "elapsed_time": "7:07:40", "remaining_time": "1:14:04"} +{"current_steps": 4839, "total_steps": 5676, "loss": 0.6786668300628662, "lr": 1.1684582353690642e-06, "epoch": 1.7051982378854627, "percentage": 85.25, "elapsed_time": "7:07:45", "remaining_time": "1:13:59"} +{"current_steps": 4840, "total_steps": 5676, "loss": 0.6117962002754211, "lr": 1.1657266722816663e-06, "epoch": 1.7055506607929516, "percentage": 85.27, "elapsed_time": "7:07:52", "remaining_time": "1:13:54"} +{"current_steps": 4841, "total_steps": 5676, "loss": 0.6115782260894775, "lr": 1.1629981081551278e-06, "epoch": 1.7059030837004405, "percentage": 85.29, "elapsed_time": "7:07:58", "remaining_time": "1:13:49"} +{"current_steps": 4842, "total_steps": 5676, "loss": 0.6526266932487488, "lr": 1.1602725439157114e-06, "epoch": 1.7062555066079295, "percentage": 85.31, "elapsed_time": "7:08:04", "remaining_time": "1:13:43"} +{"current_steps": 4843, "total_steps": 5676, "loss": 0.5449249148368835, "lr": 1.1575499804886658e-06, "epoch": 1.7066079295154184, "percentage": 85.32, "elapsed_time": "7:08:10", "remaining_time": "1:13:38"} +{"current_steps": 4844, "total_steps": 5676, "loss": 0.5793930292129517, "lr": 1.1548304187982152e-06, "epoch": 1.7069603524229073, "percentage": 85.34, "elapsed_time": "7:08:15", "remaining_time": "1:13:33"} +{"current_steps": 4845, "total_steps": 5676, "loss": 0.5133150815963745, "lr": 1.152113859767565e-06, "epoch": 1.7073127753303965, "percentage": 85.36, "elapsed_time": "7:08:21", "remaining_time": "1:13:28"} +{"current_steps": 4846, "total_steps": 5676, "loss": 0.6771460771560669, "lr": 1.1494003043189028e-06, "epoch": 1.7076651982378854, "percentage": 85.38, "elapsed_time": "7:08:25", "remaining_time": "1:13:22"} +{"current_steps": 4847, "total_steps": 5676, "loss": 0.5680071115493774, "lr": 1.1466897533733945e-06, "epoch": 1.7080176211453746, "percentage": 85.39, "elapsed_time": "7:08:30", "remaining_time": "1:13:17"} +{"current_steps": 4848, "total_steps": 5676, "loss": 0.58878493309021, "lr": 1.1439822078511941e-06, "epoch": 1.7083700440528635, "percentage": 85.41, "elapsed_time": "7:08:36", "remaining_time": "1:13:12"} +{"current_steps": 4849, "total_steps": 5676, "loss": 0.6441792249679565, "lr": 1.14127766867142e-06, "epoch": 1.7087224669603525, "percentage": 85.43, "elapsed_time": "7:08:40", "remaining_time": "1:13:06"} +{"current_steps": 4850, "total_steps": 5676, "loss": 0.4910963773727417, "lr": 1.1385761367521865e-06, "epoch": 1.7090748898678414, "percentage": 85.45, "elapsed_time": "7:08:45", "remaining_time": "1:13:01"} +{"current_steps": 4851, "total_steps": 5676, "loss": 0.5878154635429382, "lr": 1.1358776130105765e-06, "epoch": 1.7094273127753303, "percentage": 85.47, "elapsed_time": "7:08:51", "remaining_time": "1:12:56"} +{"current_steps": 4852, "total_steps": 5676, "loss": 0.5835394859313965, "lr": 1.133182098362654e-06, "epoch": 1.7097797356828193, "percentage": 85.48, "elapsed_time": "7:08:56", "remaining_time": "1:12:50"} +{"current_steps": 4853, "total_steps": 5676, "loss": 0.6612577438354492, "lr": 1.130489593723465e-06, "epoch": 1.7101321585903082, "percentage": 85.5, "elapsed_time": "7:09:01", "remaining_time": "1:12:45"} +{"current_steps": 4854, "total_steps": 5676, "loss": 0.5892096757888794, "lr": 1.1278001000070282e-06, "epoch": 1.7104845814977974, "percentage": 85.52, "elapsed_time": "7:09:07", "remaining_time": "1:12:40"} +{"current_steps": 4855, "total_steps": 5676, "loss": 0.6196271181106567, "lr": 1.1251136181263433e-06, "epoch": 1.7108370044052863, "percentage": 85.54, "elapsed_time": "7:09:12", "remaining_time": "1:12:34"} +{"current_steps": 4856, "total_steps": 5676, "loss": 0.5365586280822754, "lr": 1.122430148993392e-06, "epoch": 1.7111894273127755, "percentage": 85.55, "elapsed_time": "7:09:18", "remaining_time": "1:12:29"} +{"current_steps": 4857, "total_steps": 5676, "loss": 0.6006353497505188, "lr": 1.119749693519121e-06, "epoch": 1.7115418502202644, "percentage": 85.57, "elapsed_time": "7:09:23", "remaining_time": "1:12:24"} +{"current_steps": 4858, "total_steps": 5676, "loss": 0.5645362138748169, "lr": 1.117072252613467e-06, "epoch": 1.7118942731277533, "percentage": 85.59, "elapsed_time": "7:09:29", "remaining_time": "1:12:19"} +{"current_steps": 4859, "total_steps": 5676, "loss": 0.46408799290657043, "lr": 1.1143978271853362e-06, "epoch": 1.7122466960352423, "percentage": 85.61, "elapsed_time": "7:09:36", "remaining_time": "1:12:14"} +{"current_steps": 4860, "total_steps": 5676, "loss": 0.7798513770103455, "lr": 1.1117264181426134e-06, "epoch": 1.7125991189427312, "percentage": 85.62, "elapsed_time": "7:09:42", "remaining_time": "1:12:08"} +{"current_steps": 4861, "total_steps": 5676, "loss": 0.739770770072937, "lr": 1.109058026392158e-06, "epoch": 1.7129515418502201, "percentage": 85.64, "elapsed_time": "7:09:48", "remaining_time": "1:12:03"} +{"current_steps": 4862, "total_steps": 5676, "loss": 0.567306637763977, "lr": 1.1063926528398062e-06, "epoch": 1.7133039647577093, "percentage": 85.66, "elapsed_time": "7:09:52", "remaining_time": "1:11:58"} +{"current_steps": 4863, "total_steps": 5676, "loss": 0.5730962753295898, "lr": 1.1037302983903686e-06, "epoch": 1.7136563876651982, "percentage": 85.68, "elapsed_time": "7:09:56", "remaining_time": "1:11:52"} +{"current_steps": 4864, "total_steps": 5676, "loss": 0.6311475038528442, "lr": 1.1010709639476335e-06, "epoch": 1.7140088105726872, "percentage": 85.69, "elapsed_time": "7:10:01", "remaining_time": "1:11:47"} +{"current_steps": 4865, "total_steps": 5676, "loss": 0.5867577791213989, "lr": 1.098414650414359e-06, "epoch": 1.7143612334801763, "percentage": 85.71, "elapsed_time": "7:10:06", "remaining_time": "1:11:41"} +{"current_steps": 4866, "total_steps": 5676, "loss": 0.6291393637657166, "lr": 1.0957613586922844e-06, "epoch": 1.7147136563876653, "percentage": 85.73, "elapsed_time": "7:10:11", "remaining_time": "1:11:36"} +{"current_steps": 4867, "total_steps": 5676, "loss": 0.5811575651168823, "lr": 1.0931110896821184e-06, "epoch": 1.7150660792951542, "percentage": 85.75, "elapsed_time": "7:10:17", "remaining_time": "1:11:31"} +{"current_steps": 4868, "total_steps": 5676, "loss": 0.6340835690498352, "lr": 1.0904638442835459e-06, "epoch": 1.7154185022026431, "percentage": 85.76, "elapsed_time": "7:10:23", "remaining_time": "1:11:26"} +{"current_steps": 4869, "total_steps": 5676, "loss": 0.6543419361114502, "lr": 1.087819623395222e-06, "epoch": 1.715770925110132, "percentage": 85.78, "elapsed_time": "7:10:28", "remaining_time": "1:11:20"} +{"current_steps": 4870, "total_steps": 5676, "loss": 0.5669729709625244, "lr": 1.0851784279147793e-06, "epoch": 1.716123348017621, "percentage": 85.8, "elapsed_time": "7:10:34", "remaining_time": "1:11:15"} +{"current_steps": 4871, "total_steps": 5676, "loss": 0.5422554612159729, "lr": 1.08254025873882e-06, "epoch": 1.7164757709251102, "percentage": 85.82, "elapsed_time": "7:10:40", "remaining_time": "1:11:10"} +{"current_steps": 4872, "total_steps": 5676, "loss": 0.6154215335845947, "lr": 1.0799051167629215e-06, "epoch": 1.716828193832599, "percentage": 85.84, "elapsed_time": "7:10:45", "remaining_time": "1:11:05"} +{"current_steps": 4873, "total_steps": 5676, "loss": 0.6306319236755371, "lr": 1.0772730028816304e-06, "epoch": 1.7171806167400883, "percentage": 85.85, "elapsed_time": "7:10:50", "remaining_time": "1:10:59"} +{"current_steps": 4874, "total_steps": 5676, "loss": 0.6301003694534302, "lr": 1.0746439179884716e-06, "epoch": 1.7175330396475772, "percentage": 85.87, "elapsed_time": "7:10:56", "remaining_time": "1:10:54"} +{"current_steps": 4875, "total_steps": 5676, "loss": 0.5730071067810059, "lr": 1.0720178629759347e-06, "epoch": 1.7178854625550661, "percentage": 85.89, "elapsed_time": "7:11:02", "remaining_time": "1:10:49"} +{"current_steps": 4876, "total_steps": 5676, "loss": 0.5330506563186646, "lr": 1.0693948387354836e-06, "epoch": 1.718237885462555, "percentage": 85.91, "elapsed_time": "7:11:07", "remaining_time": "1:10:43"} +{"current_steps": 4877, "total_steps": 5676, "loss": 0.7724611759185791, "lr": 1.0667748461575544e-06, "epoch": 1.718590308370044, "percentage": 85.92, "elapsed_time": "7:11:12", "remaining_time": "1:10:38"} +{"current_steps": 4878, "total_steps": 5676, "loss": 0.5415126085281372, "lr": 1.0641578861315517e-06, "epoch": 1.718942731277533, "percentage": 85.94, "elapsed_time": "7:11:17", "remaining_time": "1:10:33"} +{"current_steps": 4879, "total_steps": 5676, "loss": 0.4895828664302826, "lr": 1.0615439595458554e-06, "epoch": 1.7192951541850219, "percentage": 85.96, "elapsed_time": "7:11:22", "remaining_time": "1:10:27"} +{"current_steps": 4880, "total_steps": 5676, "loss": 0.6049074530601501, "lr": 1.0589330672878084e-06, "epoch": 1.719647577092511, "percentage": 85.98, "elapsed_time": "7:11:26", "remaining_time": "1:10:22"} +{"current_steps": 4881, "total_steps": 5676, "loss": 0.5733378529548645, "lr": 1.056325210243726e-06, "epoch": 1.72, "percentage": 85.99, "elapsed_time": "7:11:32", "remaining_time": "1:10:17"} +{"current_steps": 4882, "total_steps": 5676, "loss": 0.6034674644470215, "lr": 1.0537203892989e-06, "epoch": 1.7203524229074891, "percentage": 86.01, "elapsed_time": "7:11:37", "remaining_time": "1:10:11"} +{"current_steps": 4883, "total_steps": 5676, "loss": 0.5282200574874878, "lr": 1.0511186053375833e-06, "epoch": 1.720704845814978, "percentage": 86.03, "elapsed_time": "7:11:42", "remaining_time": "1:10:06"} +{"current_steps": 4884, "total_steps": 5676, "loss": 0.6331876516342163, "lr": 1.0485198592430001e-06, "epoch": 1.721057268722467, "percentage": 86.05, "elapsed_time": "7:11:48", "remaining_time": "1:10:01"} +{"current_steps": 4885, "total_steps": 5676, "loss": 0.5194844007492065, "lr": 1.045924151897344e-06, "epoch": 1.721409691629956, "percentage": 86.06, "elapsed_time": "7:11:52", "remaining_time": "1:09:55"} +{"current_steps": 4886, "total_steps": 5676, "loss": 0.5496135354042053, "lr": 1.0433314841817755e-06, "epoch": 1.7217621145374449, "percentage": 86.08, "elapsed_time": "7:11:57", "remaining_time": "1:09:50"} +{"current_steps": 4887, "total_steps": 5676, "loss": 0.7871953248977661, "lr": 1.0407418569764304e-06, "epoch": 1.7221145374449338, "percentage": 86.1, "elapsed_time": "7:12:03", "remaining_time": "1:09:45"} +{"current_steps": 4888, "total_steps": 5676, "loss": 0.7199628353118896, "lr": 1.0381552711604004e-06, "epoch": 1.7224669603524227, "percentage": 86.12, "elapsed_time": "7:12:06", "remaining_time": "1:09:39"} +{"current_steps": 4889, "total_steps": 5676, "loss": 0.5876469612121582, "lr": 1.0355717276117506e-06, "epoch": 1.722819383259912, "percentage": 86.13, "elapsed_time": "7:12:13", "remaining_time": "1:09:34"} +{"current_steps": 4890, "total_steps": 5676, "loss": 0.5543426275253296, "lr": 1.0329912272075181e-06, "epoch": 1.7231718061674008, "percentage": 86.15, "elapsed_time": "7:12:17", "remaining_time": "1:09:29"} +{"current_steps": 4891, "total_steps": 5676, "loss": 0.5118540525436401, "lr": 1.0304137708236994e-06, "epoch": 1.72352422907489, "percentage": 86.17, "elapsed_time": "7:12:23", "remaining_time": "1:09:23"} +{"current_steps": 4892, "total_steps": 5676, "loss": 0.628477931022644, "lr": 1.0278393593352631e-06, "epoch": 1.723876651982379, "percentage": 86.19, "elapsed_time": "7:12:28", "remaining_time": "1:09:18"} +{"current_steps": 4893, "total_steps": 5676, "loss": 0.6119322776794434, "lr": 1.0252679936161392e-06, "epoch": 1.7242290748898679, "percentage": 86.21, "elapsed_time": "7:12:34", "remaining_time": "1:09:13"} +{"current_steps": 4894, "total_steps": 5676, "loss": 0.7661763429641724, "lr": 1.0226996745392259e-06, "epoch": 1.7245814977973568, "percentage": 86.22, "elapsed_time": "7:12:40", "remaining_time": "1:09:08"} +{"current_steps": 4895, "total_steps": 5676, "loss": 0.6431440114974976, "lr": 1.0201344029763927e-06, "epoch": 1.7249339207048457, "percentage": 86.24, "elapsed_time": "7:12:44", "remaining_time": "1:09:02"} +{"current_steps": 4896, "total_steps": 5676, "loss": 0.7295387983322144, "lr": 1.0175721797984639e-06, "epoch": 1.7252863436123347, "percentage": 86.26, "elapsed_time": "7:12:50", "remaining_time": "1:08:57"} +{"current_steps": 4897, "total_steps": 5676, "loss": 0.58225017786026, "lr": 1.015013005875235e-06, "epoch": 1.7256387665198238, "percentage": 86.28, "elapsed_time": "7:12:57", "remaining_time": "1:08:52"} +{"current_steps": 4898, "total_steps": 5676, "loss": 0.5467473864555359, "lr": 1.0124568820754689e-06, "epoch": 1.7259911894273128, "percentage": 86.29, "elapsed_time": "7:13:01", "remaining_time": "1:08:46"} +{"current_steps": 4899, "total_steps": 5676, "loss": 0.7637814283370972, "lr": 1.00990380926689e-06, "epoch": 1.7263436123348017, "percentage": 86.31, "elapsed_time": "7:13:06", "remaining_time": "1:08:41"} +{"current_steps": 4900, "total_steps": 5676, "loss": 0.5354464650154114, "lr": 1.0073537883161821e-06, "epoch": 1.7266960352422909, "percentage": 86.33, "elapsed_time": "7:13:10", "remaining_time": "1:08:36"} +{"current_steps": 4901, "total_steps": 5676, "loss": 0.5213606357574463, "lr": 1.0048068200890037e-06, "epoch": 1.7270484581497798, "percentage": 86.35, "elapsed_time": "7:13:19", "remaining_time": "1:08:31"} +{"current_steps": 4902, "total_steps": 5676, "loss": 0.6073330640792847, "lr": 1.0022629054499678e-06, "epoch": 1.7274008810572687, "percentage": 86.36, "elapsed_time": "7:13:25", "remaining_time": "1:08:26"} +{"current_steps": 4903, "total_steps": 5676, "loss": 0.5711998343467712, "lr": 9.997220452626587e-07, "epoch": 1.7277533039647577, "percentage": 86.38, "elapsed_time": "7:13:29", "remaining_time": "1:08:20"} +{"current_steps": 4904, "total_steps": 5676, "loss": 0.6824701428413391, "lr": 9.971842403896137e-07, "epoch": 1.7281057268722466, "percentage": 86.4, "elapsed_time": "7:13:34", "remaining_time": "1:08:15"} +{"current_steps": 4905, "total_steps": 5676, "loss": 0.528059184551239, "lr": 9.9464949169234e-07, "epoch": 1.7284581497797356, "percentage": 86.42, "elapsed_time": "7:13:39", "remaining_time": "1:08:09"} +{"current_steps": 4906, "total_steps": 5676, "loss": 0.45617133378982544, "lr": 9.92117800031308e-07, "epoch": 1.7288105726872247, "percentage": 86.43, "elapsed_time": "7:13:43", "remaining_time": "1:08:04"} +{"current_steps": 4907, "total_steps": 5676, "loss": 0.6186379194259644, "lr": 9.895891662659485e-07, "epoch": 1.7291629955947136, "percentage": 86.45, "elapsed_time": "7:13:47", "remaining_time": "1:07:58"} +{"current_steps": 4908, "total_steps": 5676, "loss": 0.622776985168457, "lr": 9.870635912546511e-07, "epoch": 1.7295154185022028, "percentage": 86.47, "elapsed_time": "7:13:52", "remaining_time": "1:07:53"} +{"current_steps": 4909, "total_steps": 5676, "loss": 0.6322426199913025, "lr": 9.845410758547724e-07, "epoch": 1.7298678414096917, "percentage": 86.49, "elapsed_time": "7:13:56", "remaining_time": "1:07:48"} +{"current_steps": 4910, "total_steps": 5676, "loss": 0.565685510635376, "lr": 9.82021620922624e-07, "epoch": 1.7302202643171807, "percentage": 86.5, "elapsed_time": "7:14:01", "remaining_time": "1:07:42"} +{"current_steps": 4911, "total_steps": 5676, "loss": 0.670723557472229, "lr": 9.795052273134908e-07, "epoch": 1.7305726872246696, "percentage": 86.52, "elapsed_time": "7:14:07", "remaining_time": "1:07:37"} +{"current_steps": 4912, "total_steps": 5676, "loss": 0.627914309501648, "lr": 9.769918958816017e-07, "epoch": 1.7309251101321586, "percentage": 86.54, "elapsed_time": "7:14:13", "remaining_time": "1:07:32"} +{"current_steps": 4913, "total_steps": 5676, "loss": 0.613754391670227, "lr": 9.74481627480156e-07, "epoch": 1.7312775330396475, "percentage": 86.56, "elapsed_time": "7:14:18", "remaining_time": "1:07:26"} +{"current_steps": 4914, "total_steps": 5676, "loss": 0.7128336429595947, "lr": 9.719744229613148e-07, "epoch": 1.7316299559471364, "percentage": 86.58, "elapsed_time": "7:14:25", "remaining_time": "1:07:21"} +{"current_steps": 4915, "total_steps": 5676, "loss": 0.692448079586029, "lr": 9.694702831761937e-07, "epoch": 1.7319823788546256, "percentage": 86.59, "elapsed_time": "7:14:31", "remaining_time": "1:07:16"} +{"current_steps": 4916, "total_steps": 5676, "loss": 0.5722585916519165, "lr": 9.669692089748717e-07, "epoch": 1.7323348017621145, "percentage": 86.61, "elapsed_time": "7:14:36", "remaining_time": "1:07:11"} +{"current_steps": 4917, "total_steps": 5676, "loss": 0.5267904996871948, "lr": 9.64471201206385e-07, "epoch": 1.7326872246696037, "percentage": 86.63, "elapsed_time": "7:14:42", "remaining_time": "1:07:06"} +{"current_steps": 4918, "total_steps": 5676, "loss": 0.6290950179100037, "lr": 9.619762607187277e-07, "epoch": 1.7330396475770926, "percentage": 86.65, "elapsed_time": "7:14:47", "remaining_time": "1:07:00"} +{"current_steps": 4919, "total_steps": 5676, "loss": 0.5137144327163696, "lr": 9.594843883588588e-07, "epoch": 1.7333920704845815, "percentage": 86.66, "elapsed_time": "7:14:52", "remaining_time": "1:06:55"} +{"current_steps": 4920, "total_steps": 5676, "loss": 0.6110765337944031, "lr": 9.569955849726875e-07, "epoch": 1.7337444933920705, "percentage": 86.68, "elapsed_time": "7:14:58", "remaining_time": "1:06:50"} +{"current_steps": 4921, "total_steps": 5676, "loss": 0.5097514390945435, "lr": 9.545098514050844e-07, "epoch": 1.7340969162995594, "percentage": 86.7, "elapsed_time": "7:15:03", "remaining_time": "1:06:44"} +{"current_steps": 4922, "total_steps": 5676, "loss": 0.7220968008041382, "lr": 9.520271884998822e-07, "epoch": 1.7344493392070484, "percentage": 86.72, "elapsed_time": "7:15:08", "remaining_time": "1:06:39"} +{"current_steps": 4923, "total_steps": 5676, "loss": 0.4790550470352173, "lr": 9.495475970998669e-07, "epoch": 1.7348017621145373, "percentage": 86.73, "elapsed_time": "7:15:13", "remaining_time": "1:06:34"} +{"current_steps": 4924, "total_steps": 5676, "loss": 0.5440540909767151, "lr": 9.470710780467818e-07, "epoch": 1.7351541850220265, "percentage": 86.75, "elapsed_time": "7:15:18", "remaining_time": "1:06:28"} +{"current_steps": 4925, "total_steps": 5676, "loss": 0.6351054310798645, "lr": 9.445976321813277e-07, "epoch": 1.7355066079295154, "percentage": 86.77, "elapsed_time": "7:15:23", "remaining_time": "1:06:23"} +{"current_steps": 4926, "total_steps": 5676, "loss": 0.597430944442749, "lr": 9.421272603431619e-07, "epoch": 1.7358590308370045, "percentage": 86.79, "elapsed_time": "7:15:28", "remaining_time": "1:06:18"} +{"current_steps": 4927, "total_steps": 5676, "loss": 0.5826110243797302, "lr": 9.396599633709013e-07, "epoch": 1.7362114537444935, "percentage": 86.8, "elapsed_time": "7:15:33", "remaining_time": "1:06:12"} +{"current_steps": 4928, "total_steps": 5676, "loss": 0.61531662940979, "lr": 9.371957421021116e-07, "epoch": 1.7365638766519824, "percentage": 86.82, "elapsed_time": "7:15:38", "remaining_time": "1:06:07"} +{"current_steps": 4929, "total_steps": 5676, "loss": 0.5286549925804138, "lr": 9.347345973733257e-07, "epoch": 1.7369162995594714, "percentage": 86.84, "elapsed_time": "7:15:44", "remaining_time": "1:06:02"} +{"current_steps": 4930, "total_steps": 5676, "loss": 0.6923980712890625, "lr": 9.322765300200209e-07, "epoch": 1.7372687224669603, "percentage": 86.86, "elapsed_time": "7:15:49", "remaining_time": "1:05:56"} +{"current_steps": 4931, "total_steps": 5676, "loss": 0.5408697128295898, "lr": 9.298215408766376e-07, "epoch": 1.7376211453744492, "percentage": 86.87, "elapsed_time": "7:15:55", "remaining_time": "1:05:51"} +{"current_steps": 4932, "total_steps": 5676, "loss": 0.6360228061676025, "lr": 9.273696307765656e-07, "epoch": 1.7379735682819382, "percentage": 86.89, "elapsed_time": "7:16:00", "remaining_time": "1:05:46"} +{"current_steps": 4933, "total_steps": 5676, "loss": 0.46559634804725647, "lr": 9.249208005521538e-07, "epoch": 1.7383259911894273, "percentage": 86.91, "elapsed_time": "7:16:05", "remaining_time": "1:05:40"} +{"current_steps": 4934, "total_steps": 5676, "loss": 0.6065478324890137, "lr": 9.224750510347036e-07, "epoch": 1.7386784140969163, "percentage": 86.93, "elapsed_time": "7:16:11", "remaining_time": "1:05:35"} +{"current_steps": 4935, "total_steps": 5676, "loss": 0.6777745485305786, "lr": 9.2003238305447e-07, "epoch": 1.7390308370044054, "percentage": 86.95, "elapsed_time": "7:16:16", "remaining_time": "1:05:30"} +{"current_steps": 4936, "total_steps": 5676, "loss": 0.568982720375061, "lr": 9.175927974406607e-07, "epoch": 1.7393832599118944, "percentage": 86.96, "elapsed_time": "7:16:21", "remaining_time": "1:05:25"} +{"current_steps": 4937, "total_steps": 5676, "loss": 0.6014461517333984, "lr": 9.151562950214443e-07, "epoch": 1.7397356828193833, "percentage": 86.98, "elapsed_time": "7:16:27", "remaining_time": "1:05:19"} +{"current_steps": 4938, "total_steps": 5676, "loss": 0.6312133073806763, "lr": 9.127228766239349e-07, "epoch": 1.7400881057268722, "percentage": 87.0, "elapsed_time": "7:16:33", "remaining_time": "1:05:14"} +{"current_steps": 4939, "total_steps": 5676, "loss": 0.5440298318862915, "lr": 9.102925430742015e-07, "epoch": 1.7404405286343612, "percentage": 87.02, "elapsed_time": "7:16:38", "remaining_time": "1:05:09"} +{"current_steps": 4940, "total_steps": 5676, "loss": 0.6599005460739136, "lr": 9.078652951972688e-07, "epoch": 1.74079295154185, "percentage": 87.03, "elapsed_time": "7:16:44", "remaining_time": "1:05:04"} +{"current_steps": 4941, "total_steps": 5676, "loss": 0.6719228625297546, "lr": 9.054411338171099e-07, "epoch": 1.7411453744493393, "percentage": 87.05, "elapsed_time": "7:16:51", "remaining_time": "1:04:59"} +{"current_steps": 4942, "total_steps": 5676, "loss": 0.5771356821060181, "lr": 9.030200597566529e-07, "epoch": 1.7414977973568282, "percentage": 87.07, "elapsed_time": "7:16:56", "remaining_time": "1:04:53"} +{"current_steps": 4943, "total_steps": 5676, "loss": 0.5066591501235962, "lr": 9.006020738377764e-07, "epoch": 1.7418502202643171, "percentage": 87.09, "elapsed_time": "7:17:02", "remaining_time": "1:04:48"} +{"current_steps": 4944, "total_steps": 5676, "loss": 0.5091663002967834, "lr": 8.981871768813111e-07, "epoch": 1.7422026431718063, "percentage": 87.1, "elapsed_time": "7:17:07", "remaining_time": "1:04:43"} +{"current_steps": 4945, "total_steps": 5676, "loss": 0.6594514846801758, "lr": 8.957753697070415e-07, "epoch": 1.7425550660792952, "percentage": 87.12, "elapsed_time": "7:17:13", "remaining_time": "1:04:37"} +{"current_steps": 4946, "total_steps": 5676, "loss": 0.5485379695892334, "lr": 8.933666531337004e-07, "epoch": 1.7429074889867842, "percentage": 87.14, "elapsed_time": "7:17:17", "remaining_time": "1:04:32"} +{"current_steps": 4947, "total_steps": 5676, "loss": 0.6079416871070862, "lr": 8.909610279789716e-07, "epoch": 1.743259911894273, "percentage": 87.16, "elapsed_time": "7:17:24", "remaining_time": "1:04:27"} +{"current_steps": 4948, "total_steps": 5676, "loss": 0.4980606436729431, "lr": 8.885584950594894e-07, "epoch": 1.743612334801762, "percentage": 87.17, "elapsed_time": "7:17:30", "remaining_time": "1:04:22"} +{"current_steps": 4949, "total_steps": 5676, "loss": 0.47701022028923035, "lr": 8.861590551908405e-07, "epoch": 1.743964757709251, "percentage": 87.19, "elapsed_time": "7:17:37", "remaining_time": "1:04:17"} +{"current_steps": 4950, "total_steps": 5676, "loss": 0.5041281580924988, "lr": 8.837627091875578e-07, "epoch": 1.7443171806167401, "percentage": 87.21, "elapsed_time": "7:17:42", "remaining_time": "1:04:11"} +{"current_steps": 4951, "total_steps": 5676, "loss": 0.5477255582809448, "lr": 8.813694578631283e-07, "epoch": 1.744669603524229, "percentage": 87.23, "elapsed_time": "7:17:47", "remaining_time": "1:04:06"} +{"current_steps": 4952, "total_steps": 5676, "loss": 0.6377973556518555, "lr": 8.78979302029983e-07, "epoch": 1.7450220264317182, "percentage": 87.24, "elapsed_time": "7:17:52", "remaining_time": "1:04:01"} +{"current_steps": 4953, "total_steps": 5676, "loss": 0.6688166856765747, "lr": 8.76592242499511e-07, "epoch": 1.7453744493392072, "percentage": 87.26, "elapsed_time": "7:17:57", "remaining_time": "1:03:55"} +{"current_steps": 4954, "total_steps": 5676, "loss": 0.6236848831176758, "lr": 8.742082800820406e-07, "epoch": 1.745726872246696, "percentage": 87.28, "elapsed_time": "7:18:00", "remaining_time": "1:03:50"} +{"current_steps": 4955, "total_steps": 5676, "loss": 0.653768002986908, "lr": 8.718274155868545e-07, "epoch": 1.746079295154185, "percentage": 87.3, "elapsed_time": "7:18:05", "remaining_time": "1:03:44"} +{"current_steps": 4956, "total_steps": 5676, "loss": 0.5647604465484619, "lr": 8.694496498221805e-07, "epoch": 1.746431718061674, "percentage": 87.32, "elapsed_time": "7:18:11", "remaining_time": "1:03:39"} +{"current_steps": 4957, "total_steps": 5676, "loss": 0.4960663914680481, "lr": 8.670749835951964e-07, "epoch": 1.746784140969163, "percentage": 87.33, "elapsed_time": "7:18:16", "remaining_time": "1:03:34"} +{"current_steps": 4958, "total_steps": 5676, "loss": 0.6271536350250244, "lr": 8.647034177120317e-07, "epoch": 1.7471365638766518, "percentage": 87.35, "elapsed_time": "7:18:21", "remaining_time": "1:03:28"} +{"current_steps": 4959, "total_steps": 5676, "loss": 0.6323459148406982, "lr": 8.623349529777525e-07, "epoch": 1.747488986784141, "percentage": 87.37, "elapsed_time": "7:18:27", "remaining_time": "1:03:23"} +{"current_steps": 4960, "total_steps": 5676, "loss": 0.6084197163581848, "lr": 8.599695901963811e-07, "epoch": 1.74784140969163, "percentage": 87.39, "elapsed_time": "7:18:31", "remaining_time": "1:03:18"} +{"current_steps": 4961, "total_steps": 5676, "loss": 0.48974379897117615, "lr": 8.576073301708876e-07, "epoch": 1.748193832599119, "percentage": 87.4, "elapsed_time": "7:18:36", "remaining_time": "1:03:12"} +{"current_steps": 4962, "total_steps": 5676, "loss": 0.5985081195831299, "lr": 8.552481737031859e-07, "epoch": 1.748546255506608, "percentage": 87.42, "elapsed_time": "7:18:42", "remaining_time": "1:03:07"} +{"current_steps": 4963, "total_steps": 5676, "loss": 0.507872998714447, "lr": 8.528921215941299e-07, "epoch": 1.748898678414097, "percentage": 87.44, "elapsed_time": "7:18:48", "remaining_time": "1:03:02"} +{"current_steps": 4964, "total_steps": 5676, "loss": 0.5772356986999512, "lr": 8.50539174643531e-07, "epoch": 1.749251101321586, "percentage": 87.46, "elapsed_time": "7:18:54", "remaining_time": "1:02:57"} +{"current_steps": 4965, "total_steps": 5676, "loss": 0.675100564956665, "lr": 8.48189333650139e-07, "epoch": 1.7496035242290748, "percentage": 87.47, "elapsed_time": "7:19:00", "remaining_time": "1:02:51"} +{"current_steps": 4966, "total_steps": 5676, "loss": 0.5571645498275757, "lr": 8.458425994116582e-07, "epoch": 1.7499559471365638, "percentage": 87.49, "elapsed_time": "7:19:05", "remaining_time": "1:02:46"} +{"current_steps": 4967, "total_steps": 5676, "loss": 0.5842185020446777, "lr": 8.434989727247233e-07, "epoch": 1.7503083700440527, "percentage": 87.51, "elapsed_time": "7:19:10", "remaining_time": "1:02:41"} +{"current_steps": 4968, "total_steps": 5676, "loss": 0.5693016648292542, "lr": 8.41158454384925e-07, "epoch": 1.7506607929515419, "percentage": 87.53, "elapsed_time": "7:19:17", "remaining_time": "1:02:36"} +{"current_steps": 4969, "total_steps": 5676, "loss": 0.5791449546813965, "lr": 8.388210451868006e-07, "epoch": 1.7510132158590308, "percentage": 87.54, "elapsed_time": "7:19:22", "remaining_time": "1:02:30"} +{"current_steps": 4970, "total_steps": 5676, "loss": 0.4873960018157959, "lr": 8.364867459238257e-07, "epoch": 1.75136563876652, "percentage": 87.56, "elapsed_time": "7:19:27", "remaining_time": "1:02:25"} +{"current_steps": 4971, "total_steps": 5676, "loss": 0.609403669834137, "lr": 8.341555573884175e-07, "epoch": 1.751718061674009, "percentage": 87.58, "elapsed_time": "7:19:34", "remaining_time": "1:02:20"} +{"current_steps": 4972, "total_steps": 5676, "loss": 0.5676242113113403, "lr": 8.318274803719483e-07, "epoch": 1.7520704845814978, "percentage": 87.6, "elapsed_time": "7:19:38", "remaining_time": "1:02:15"} +{"current_steps": 4973, "total_steps": 5676, "loss": 0.7692728638648987, "lr": 8.29502515664723e-07, "epoch": 1.7524229074889868, "percentage": 87.61, "elapsed_time": "7:19:44", "remaining_time": "1:02:09"} +{"current_steps": 4974, "total_steps": 5676, "loss": 0.7940253019332886, "lr": 8.27180664056001e-07, "epoch": 1.7527753303964757, "percentage": 87.63, "elapsed_time": "7:19:48", "remaining_time": "1:02:04"} +{"current_steps": 4975, "total_steps": 5676, "loss": 0.5784735083580017, "lr": 8.24861926333973e-07, "epoch": 1.7531277533039646, "percentage": 87.65, "elapsed_time": "7:19:54", "remaining_time": "1:01:59"} +{"current_steps": 4976, "total_steps": 5676, "loss": 0.6493539810180664, "lr": 8.225463032857783e-07, "epoch": 1.7534801762114536, "percentage": 87.67, "elapsed_time": "7:19:58", "remaining_time": "1:01:53"} +{"current_steps": 4977, "total_steps": 5676, "loss": 0.615519106388092, "lr": 8.202337956975026e-07, "epoch": 1.7538325991189427, "percentage": 87.68, "elapsed_time": "7:20:05", "remaining_time": "1:01:48"} +{"current_steps": 4978, "total_steps": 5676, "loss": 0.5369104146957397, "lr": 8.179244043541678e-07, "epoch": 1.7541850220264317, "percentage": 87.7, "elapsed_time": "7:20:11", "remaining_time": "1:01:43"} +{"current_steps": 4979, "total_steps": 5676, "loss": 0.5527158975601196, "lr": 8.156181300397414e-07, "epoch": 1.7545374449339208, "percentage": 87.72, "elapsed_time": "7:20:15", "remaining_time": "1:01:37"} +{"current_steps": 4980, "total_steps": 5676, "loss": 0.5870147943496704, "lr": 8.133149735371316e-07, "epoch": 1.7548898678414098, "percentage": 87.74, "elapsed_time": "7:20:21", "remaining_time": "1:01:32"} +{"current_steps": 4981, "total_steps": 5676, "loss": 0.7235025763511658, "lr": 8.110149356281848e-07, "epoch": 1.7552422907488987, "percentage": 87.76, "elapsed_time": "7:20:27", "remaining_time": "1:01:27"} +{"current_steps": 4982, "total_steps": 5676, "loss": 0.5630521774291992, "lr": 8.087180170937004e-07, "epoch": 1.7555947136563876, "percentage": 87.77, "elapsed_time": "7:20:32", "remaining_time": "1:01:22"} +{"current_steps": 4983, "total_steps": 5676, "loss": 0.5005021691322327, "lr": 8.06424218713403e-07, "epoch": 1.7559471365638766, "percentage": 87.79, "elapsed_time": "7:20:37", "remaining_time": "1:01:16"} +{"current_steps": 4984, "total_steps": 5676, "loss": 0.7267229557037354, "lr": 8.041335412659679e-07, "epoch": 1.7562995594713655, "percentage": 87.81, "elapsed_time": "7:20:42", "remaining_time": "1:01:11"} +{"current_steps": 4985, "total_steps": 5676, "loss": 0.6494802236557007, "lr": 8.018459855290107e-07, "epoch": 1.7566519823788547, "percentage": 87.83, "elapsed_time": "7:20:46", "remaining_time": "1:01:05"} +{"current_steps": 4986, "total_steps": 5676, "loss": 0.5637267827987671, "lr": 7.995615522790845e-07, "epoch": 1.7570044052863436, "percentage": 87.84, "elapsed_time": "7:20:52", "remaining_time": "1:01:00"} +{"current_steps": 4987, "total_steps": 5676, "loss": 0.5143958330154419, "lr": 7.972802422916826e-07, "epoch": 1.7573568281938328, "percentage": 87.86, "elapsed_time": "7:20:58", "remaining_time": "1:00:55"} +{"current_steps": 4988, "total_steps": 5676, "loss": 0.607841968536377, "lr": 7.950020563412398e-07, "epoch": 1.7577092511013217, "percentage": 87.88, "elapsed_time": "7:21:04", "remaining_time": "1:00:50"} +{"current_steps": 4989, "total_steps": 5676, "loss": 0.6066895723342896, "lr": 7.927269952011285e-07, "epoch": 1.7580616740088106, "percentage": 87.9, "elapsed_time": "7:21:09", "remaining_time": "1:00:44"} +{"current_steps": 4990, "total_steps": 5676, "loss": 0.6686232686042786, "lr": 7.904550596436611e-07, "epoch": 1.7584140969162996, "percentage": 87.91, "elapsed_time": "7:21:15", "remaining_time": "1:00:39"} +{"current_steps": 4991, "total_steps": 5676, "loss": 0.589708685874939, "lr": 7.881862504400884e-07, "epoch": 1.7587665198237885, "percentage": 87.93, "elapsed_time": "7:21:22", "remaining_time": "1:00:34"} +{"current_steps": 4992, "total_steps": 5676, "loss": 0.7008450031280518, "lr": 7.859205683606008e-07, "epoch": 1.7591189427312774, "percentage": 87.95, "elapsed_time": "7:21:26", "remaining_time": "1:00:29"} +{"current_steps": 4993, "total_steps": 5676, "loss": 0.5754648447036743, "lr": 7.836580141743289e-07, "epoch": 1.7594713656387664, "percentage": 87.97, "elapsed_time": "7:21:32", "remaining_time": "1:00:23"} +{"current_steps": 4994, "total_steps": 5676, "loss": 0.5756049156188965, "lr": 7.81398588649338e-07, "epoch": 1.7598237885462555, "percentage": 87.98, "elapsed_time": "7:21:37", "remaining_time": "1:00:18"} +{"current_steps": 4995, "total_steps": 5676, "loss": 0.6143715381622314, "lr": 7.791422925526326e-07, "epoch": 1.7601762114537445, "percentage": 88.0, "elapsed_time": "7:21:43", "remaining_time": "1:00:13"} +{"current_steps": 4996, "total_steps": 5676, "loss": 0.700069010257721, "lr": 7.768891266501544e-07, "epoch": 1.7605286343612336, "percentage": 88.02, "elapsed_time": "7:21:49", "remaining_time": "1:00:08"} +{"current_steps": 4997, "total_steps": 5676, "loss": 0.5200002193450928, "lr": 7.746390917067847e-07, "epoch": 1.7608810572687226, "percentage": 88.04, "elapsed_time": "7:21:53", "remaining_time": "1:00:02"} +{"current_steps": 4998, "total_steps": 5676, "loss": 0.6963525414466858, "lr": 7.723921884863395e-07, "epoch": 1.7612334801762115, "percentage": 88.05, "elapsed_time": "7:21:58", "remaining_time": "0:59:57"} +{"current_steps": 4999, "total_steps": 5676, "loss": 0.6329556703567505, "lr": 7.701484177515717e-07, "epoch": 1.7615859030837004, "percentage": 88.07, "elapsed_time": "7:22:04", "remaining_time": "0:59:52"} +{"current_steps": 5000, "total_steps": 5676, "loss": 0.6980677247047424, "lr": 7.67907780264171e-07, "epoch": 1.7619383259911894, "percentage": 88.09, "elapsed_time": "7:22:09", "remaining_time": "0:59:46"} +{"current_steps": 5001, "total_steps": 5676, "loss": 0.5244314670562744, "lr": 7.656702767847679e-07, "epoch": 1.7622907488986783, "percentage": 88.11, "elapsed_time": "7:22:20", "remaining_time": "0:59:42"} +{"current_steps": 5002, "total_steps": 5676, "loss": 0.6679523587226868, "lr": 7.634359080729215e-07, "epoch": 1.7626431718061673, "percentage": 88.13, "elapsed_time": "7:22:24", "remaining_time": "0:59:36"} +{"current_steps": 5003, "total_steps": 5676, "loss": 0.6168316602706909, "lr": 7.612046748871327e-07, "epoch": 1.7629955947136564, "percentage": 88.14, "elapsed_time": "7:22:31", "remaining_time": "0:59:31"} +{"current_steps": 5004, "total_steps": 5676, "loss": 0.5892738699913025, "lr": 7.589765779848346e-07, "epoch": 1.7633480176211453, "percentage": 88.16, "elapsed_time": "7:22:36", "remaining_time": "0:59:26"} +{"current_steps": 5005, "total_steps": 5676, "loss": 0.6714082956314087, "lr": 7.567516181223966e-07, "epoch": 1.7637004405286345, "percentage": 88.18, "elapsed_time": "7:22:41", "remaining_time": "0:59:20"} +{"current_steps": 5006, "total_steps": 5676, "loss": 0.6327016353607178, "lr": 7.545297960551245e-07, "epoch": 1.7640528634361234, "percentage": 88.2, "elapsed_time": "7:22:45", "remaining_time": "0:59:15"} +{"current_steps": 5007, "total_steps": 5676, "loss": 0.5114126205444336, "lr": 7.52311112537254e-07, "epoch": 1.7644052863436124, "percentage": 88.21, "elapsed_time": "7:22:51", "remaining_time": "0:59:10"} +{"current_steps": 5008, "total_steps": 5676, "loss": 0.5701695084571838, "lr": 7.500955683219646e-07, "epoch": 1.7647577092511013, "percentage": 88.23, "elapsed_time": "7:22:56", "remaining_time": "0:59:04"} +{"current_steps": 5009, "total_steps": 5676, "loss": 0.5966283082962036, "lr": 7.478831641613616e-07, "epoch": 1.7651101321585903, "percentage": 88.25, "elapsed_time": "7:23:01", "remaining_time": "0:58:59"} +{"current_steps": 5010, "total_steps": 5676, "loss": 0.6219101548194885, "lr": 7.456739008064883e-07, "epoch": 1.7654625550660792, "percentage": 88.27, "elapsed_time": "7:23:07", "remaining_time": "0:58:54"} +{"current_steps": 5011, "total_steps": 5676, "loss": 0.6516324877738953, "lr": 7.434677790073197e-07, "epoch": 1.7658149779735681, "percentage": 88.28, "elapsed_time": "7:23:13", "remaining_time": "0:58:49"} +{"current_steps": 5012, "total_steps": 5676, "loss": 0.4623621106147766, "lr": 7.412647995127664e-07, "epoch": 1.7661674008810573, "percentage": 88.3, "elapsed_time": "7:23:19", "remaining_time": "0:58:43"} +{"current_steps": 5013, "total_steps": 5676, "loss": 0.5661109685897827, "lr": 7.390649630706703e-07, "epoch": 1.7665198237885462, "percentage": 88.32, "elapsed_time": "7:23:26", "remaining_time": "0:58:38"} +{"current_steps": 5014, "total_steps": 5676, "loss": 0.47063148021698, "lr": 7.368682704278096e-07, "epoch": 1.7668722466960354, "percentage": 88.34, "elapsed_time": "7:23:30", "remaining_time": "0:58:33"} +{"current_steps": 5015, "total_steps": 5676, "loss": 0.5684597492218018, "lr": 7.346747223298889e-07, "epoch": 1.7672246696035243, "percentage": 88.35, "elapsed_time": "7:23:36", "remaining_time": "0:58:28"} +{"current_steps": 5016, "total_steps": 5676, "loss": 0.5614477396011353, "lr": 7.324843195215548e-07, "epoch": 1.7675770925110132, "percentage": 88.37, "elapsed_time": "7:23:42", "remaining_time": "0:58:22"} +{"current_steps": 5017, "total_steps": 5676, "loss": 0.508664608001709, "lr": 7.302970627463779e-07, "epoch": 1.7679295154185022, "percentage": 88.39, "elapsed_time": "7:23:47", "remaining_time": "0:58:17"} +{"current_steps": 5018, "total_steps": 5676, "loss": 0.5348209142684937, "lr": 7.281129527468645e-07, "epoch": 1.7682819383259911, "percentage": 88.41, "elapsed_time": "7:23:52", "remaining_time": "0:58:12"} +{"current_steps": 5019, "total_steps": 5676, "loss": 0.6441121101379395, "lr": 7.259319902644513e-07, "epoch": 1.76863436123348, "percentage": 88.42, "elapsed_time": "7:23:57", "remaining_time": "0:58:06"} +{"current_steps": 5020, "total_steps": 5676, "loss": 0.6454842686653137, "lr": 7.237541760395083e-07, "epoch": 1.7689867841409692, "percentage": 88.44, "elapsed_time": "7:24:02", "remaining_time": "0:58:01"} +{"current_steps": 5021, "total_steps": 5676, "loss": 0.4822286367416382, "lr": 7.215795108113343e-07, "epoch": 1.7693392070484582, "percentage": 88.46, "elapsed_time": "7:24:07", "remaining_time": "0:57:56"} +{"current_steps": 5022, "total_steps": 5676, "loss": 0.6078327894210815, "lr": 7.19407995318162e-07, "epoch": 1.769691629955947, "percentage": 88.48, "elapsed_time": "7:24:12", "remaining_time": "0:57:50"} +{"current_steps": 5023, "total_steps": 5676, "loss": 0.6394459009170532, "lr": 7.172396302971507e-07, "epoch": 1.7700440528634362, "percentage": 88.5, "elapsed_time": "7:24:16", "remaining_time": "0:57:45"} +{"current_steps": 5024, "total_steps": 5676, "loss": 0.646416425704956, "lr": 7.150744164843959e-07, "epoch": 1.7703964757709252, "percentage": 88.51, "elapsed_time": "7:24:21", "remaining_time": "0:57:40"} +{"current_steps": 5025, "total_steps": 5676, "loss": 0.6265356540679932, "lr": 7.129123546149208e-07, "epoch": 1.7707488986784141, "percentage": 88.53, "elapsed_time": "7:24:27", "remaining_time": "0:57:34"} +{"current_steps": 5026, "total_steps": 5676, "loss": 0.5074717998504639, "lr": 7.107534454226728e-07, "epoch": 1.771101321585903, "percentage": 88.55, "elapsed_time": "7:24:32", "remaining_time": "0:57:29"} +{"current_steps": 5027, "total_steps": 5676, "loss": 0.7036402821540833, "lr": 7.0859768964054e-07, "epoch": 1.771453744493392, "percentage": 88.57, "elapsed_time": "7:24:38", "remaining_time": "0:57:24"} +{"current_steps": 5028, "total_steps": 5676, "loss": 0.6098893880844116, "lr": 7.064450880003327e-07, "epoch": 1.771806167400881, "percentage": 88.58, "elapsed_time": "7:24:42", "remaining_time": "0:57:18"} +{"current_steps": 5029, "total_steps": 5676, "loss": 0.582880973815918, "lr": 7.042956412327917e-07, "epoch": 1.77215859030837, "percentage": 88.6, "elapsed_time": "7:24:47", "remaining_time": "0:57:13"} +{"current_steps": 5030, "total_steps": 5676, "loss": 0.6003242135047913, "lr": 7.021493500675869e-07, "epoch": 1.772511013215859, "percentage": 88.62, "elapsed_time": "7:24:52", "remaining_time": "0:57:08"} +{"current_steps": 5031, "total_steps": 5676, "loss": 0.4999944865703583, "lr": 7.000062152333165e-07, "epoch": 1.7728634361233482, "percentage": 88.64, "elapsed_time": "7:24:57", "remaining_time": "0:57:02"} +{"current_steps": 5032, "total_steps": 5676, "loss": 0.5569149255752563, "lr": 6.978662374575107e-07, "epoch": 1.7732158590308371, "percentage": 88.65, "elapsed_time": "7:25:02", "remaining_time": "0:56:57"} +{"current_steps": 5033, "total_steps": 5676, "loss": 0.5600287914276123, "lr": 6.957294174666263e-07, "epoch": 1.773568281938326, "percentage": 88.67, "elapsed_time": "7:25:07", "remaining_time": "0:56:52"} +{"current_steps": 5034, "total_steps": 5676, "loss": 0.5412951707839966, "lr": 6.935957559860418e-07, "epoch": 1.773920704845815, "percentage": 88.69, "elapsed_time": "7:25:12", "remaining_time": "0:56:46"} +{"current_steps": 5035, "total_steps": 5676, "loss": 0.5881151556968689, "lr": 6.914652537400735e-07, "epoch": 1.774273127753304, "percentage": 88.71, "elapsed_time": "7:25:17", "remaining_time": "0:56:41"} +{"current_steps": 5036, "total_steps": 5676, "loss": 0.5975406169891357, "lr": 6.893379114519572e-07, "epoch": 1.7746255506607929, "percentage": 88.72, "elapsed_time": "7:25:22", "remaining_time": "0:56:36"} +{"current_steps": 5037, "total_steps": 5676, "loss": 0.6266802549362183, "lr": 6.872137298438653e-07, "epoch": 1.7749779735682818, "percentage": 88.74, "elapsed_time": "7:25:28", "remaining_time": "0:56:30"} +{"current_steps": 5038, "total_steps": 5676, "loss": 0.6825709939002991, "lr": 6.850927096368854e-07, "epoch": 1.775330396475771, "percentage": 88.76, "elapsed_time": "7:25:33", "remaining_time": "0:56:25"} +{"current_steps": 5039, "total_steps": 5676, "loss": 0.6035742163658142, "lr": 6.829748515510381e-07, "epoch": 1.77568281938326, "percentage": 88.78, "elapsed_time": "7:25:39", "remaining_time": "0:56:20"} +{"current_steps": 5040, "total_steps": 5676, "loss": 0.6665611267089844, "lr": 6.808601563052742e-07, "epoch": 1.776035242290749, "percentage": 88.79, "elapsed_time": "7:25:45", "remaining_time": "0:56:15"} +{"current_steps": 5041, "total_steps": 5676, "loss": 0.8202367424964905, "lr": 6.787486246174657e-07, "epoch": 1.776387665198238, "percentage": 88.81, "elapsed_time": "7:25:50", "remaining_time": "0:56:09"} +{"current_steps": 5042, "total_steps": 5676, "loss": 0.6516656875610352, "lr": 6.766402572044084e-07, "epoch": 1.776740088105727, "percentage": 88.83, "elapsed_time": "7:25:56", "remaining_time": "0:56:04"} +{"current_steps": 5043, "total_steps": 5676, "loss": 0.663591742515564, "lr": 6.745350547818307e-07, "epoch": 1.7770925110132159, "percentage": 88.85, "elapsed_time": "7:26:02", "remaining_time": "0:55:59"} +{"current_steps": 5044, "total_steps": 5676, "loss": 0.6025142669677734, "lr": 6.724330180643824e-07, "epoch": 1.7774449339207048, "percentage": 88.87, "elapsed_time": "7:26:07", "remaining_time": "0:55:53"} +{"current_steps": 5045, "total_steps": 5676, "loss": 0.5704027414321899, "lr": 6.703341477656422e-07, "epoch": 1.7777973568281937, "percentage": 88.88, "elapsed_time": "7:26:11", "remaining_time": "0:55:48"} +{"current_steps": 5046, "total_steps": 5676, "loss": 0.6518473625183105, "lr": 6.682384445981071e-07, "epoch": 1.7781497797356827, "percentage": 88.9, "elapsed_time": "7:26:16", "remaining_time": "0:55:43"} +{"current_steps": 5047, "total_steps": 5676, "loss": 0.5547574758529663, "lr": 6.661459092732037e-07, "epoch": 1.7785022026431718, "percentage": 88.92, "elapsed_time": "7:26:22", "remaining_time": "0:55:37"} +{"current_steps": 5048, "total_steps": 5676, "loss": 0.6248831748962402, "lr": 6.640565425012846e-07, "epoch": 1.7788546255506608, "percentage": 88.94, "elapsed_time": "7:26:27", "remaining_time": "0:55:32"} +{"current_steps": 5049, "total_steps": 5676, "loss": 0.5899701118469238, "lr": 6.619703449916259e-07, "epoch": 1.77920704845815, "percentage": 88.95, "elapsed_time": "7:26:33", "remaining_time": "0:55:27"} +{"current_steps": 5050, "total_steps": 5676, "loss": 0.41864174604415894, "lr": 6.598873174524223e-07, "epoch": 1.7795594713656389, "percentage": 88.97, "elapsed_time": "7:26:38", "remaining_time": "0:55:21"} +{"current_steps": 5051, "total_steps": 5676, "loss": 0.7473436594009399, "lr": 6.578074605908002e-07, "epoch": 1.7799118942731278, "percentage": 88.99, "elapsed_time": "7:26:43", "remaining_time": "0:55:16"} +{"current_steps": 5052, "total_steps": 5676, "loss": 0.49480879306793213, "lr": 6.557307751128051e-07, "epoch": 1.7802643171806167, "percentage": 89.01, "elapsed_time": "7:26:49", "remaining_time": "0:55:11"} +{"current_steps": 5053, "total_steps": 5676, "loss": 0.5619323253631592, "lr": 6.536572617234082e-07, "epoch": 1.7806167400881057, "percentage": 89.02, "elapsed_time": "7:26:53", "remaining_time": "0:55:05"} +{"current_steps": 5054, "total_steps": 5676, "loss": 0.5271984338760376, "lr": 6.515869211265013e-07, "epoch": 1.7809691629955946, "percentage": 89.04, "elapsed_time": "7:26:58", "remaining_time": "0:55:00"} +{"current_steps": 5055, "total_steps": 5676, "loss": 0.6544383764266968, "lr": 6.495197540248999e-07, "epoch": 1.7813215859030835, "percentage": 89.06, "elapsed_time": "7:27:03", "remaining_time": "0:54:55"} +{"current_steps": 5056, "total_steps": 5676, "loss": 0.6525388956069946, "lr": 6.474557611203458e-07, "epoch": 1.7816740088105727, "percentage": 89.08, "elapsed_time": "7:27:08", "remaining_time": "0:54:49"} +{"current_steps": 5057, "total_steps": 5676, "loss": 0.5509910583496094, "lr": 6.453949431134987e-07, "epoch": 1.7820264317180616, "percentage": 89.09, "elapsed_time": "7:27:13", "remaining_time": "0:54:44"} +{"current_steps": 5058, "total_steps": 5676, "loss": 0.5030776262283325, "lr": 6.433373007039412e-07, "epoch": 1.7823788546255508, "percentage": 89.11, "elapsed_time": "7:27:19", "remaining_time": "0:54:39"} +{"current_steps": 5059, "total_steps": 5676, "loss": 0.6743696331977844, "lr": 6.412828345901811e-07, "epoch": 1.7827312775330397, "percentage": 89.13, "elapsed_time": "7:27:24", "remaining_time": "0:54:33"} +{"current_steps": 5060, "total_steps": 5676, "loss": 0.5395437479019165, "lr": 6.392315454696452e-07, "epoch": 1.7830837004405287, "percentage": 89.15, "elapsed_time": "7:27:28", "remaining_time": "0:54:28"} +{"current_steps": 5061, "total_steps": 5676, "loss": 0.5773402452468872, "lr": 6.371834340386807e-07, "epoch": 1.7834361233480176, "percentage": 89.16, "elapsed_time": "7:27:35", "remaining_time": "0:54:23"} +{"current_steps": 5062, "total_steps": 5676, "loss": 0.6014268398284912, "lr": 6.351385009925582e-07, "epoch": 1.7837885462555065, "percentage": 89.18, "elapsed_time": "7:27:41", "remaining_time": "0:54:18"} +{"current_steps": 5063, "total_steps": 5676, "loss": 0.5519139170646667, "lr": 6.33096747025469e-07, "epoch": 1.7841409691629955, "percentage": 89.2, "elapsed_time": "7:27:47", "remaining_time": "0:54:12"} +{"current_steps": 5064, "total_steps": 5676, "loss": 0.5407502055168152, "lr": 6.310581728305254e-07, "epoch": 1.7844933920704846, "percentage": 89.22, "elapsed_time": "7:27:51", "remaining_time": "0:54:07"} +{"current_steps": 5065, "total_steps": 5676, "loss": 0.61688232421875, "lr": 6.290227790997605e-07, "epoch": 1.7848458149779736, "percentage": 89.24, "elapsed_time": "7:27:57", "remaining_time": "0:54:02"} +{"current_steps": 5066, "total_steps": 5676, "loss": 0.5212849974632263, "lr": 6.269905665241271e-07, "epoch": 1.7851982378854625, "percentage": 89.25, "elapsed_time": "7:28:02", "remaining_time": "0:53:56"} +{"current_steps": 5067, "total_steps": 5676, "loss": 0.6827710866928101, "lr": 6.249615357934968e-07, "epoch": 1.7855506607929517, "percentage": 89.27, "elapsed_time": "7:28:07", "remaining_time": "0:53:51"} +{"current_steps": 5068, "total_steps": 5676, "loss": 0.6907633543014526, "lr": 6.22935687596663e-07, "epoch": 1.7859030837004406, "percentage": 89.29, "elapsed_time": "7:28:12", "remaining_time": "0:53:46"} +{"current_steps": 5069, "total_steps": 5676, "loss": 0.5707769989967346, "lr": 6.209130226213378e-07, "epoch": 1.7862555066079295, "percentage": 89.31, "elapsed_time": "7:28:19", "remaining_time": "0:53:41"} +{"current_steps": 5070, "total_steps": 5676, "loss": 0.6062690019607544, "lr": 6.188935415541541e-07, "epoch": 1.7866079295154185, "percentage": 89.32, "elapsed_time": "7:28:24", "remaining_time": "0:53:35"} +{"current_steps": 5071, "total_steps": 5676, "loss": 0.5291163921356201, "lr": 6.168772450806604e-07, "epoch": 1.7869603524229074, "percentage": 89.34, "elapsed_time": "7:28:29", "remaining_time": "0:53:30"} +{"current_steps": 5072, "total_steps": 5676, "loss": 0.6324198246002197, "lr": 6.148641338853301e-07, "epoch": 1.7873127753303963, "percentage": 89.36, "elapsed_time": "7:28:34", "remaining_time": "0:53:25"} +{"current_steps": 5073, "total_steps": 5676, "loss": 0.5516111850738525, "lr": 6.128542086515499e-07, "epoch": 1.7876651982378855, "percentage": 89.38, "elapsed_time": "7:28:39", "remaining_time": "0:53:19"} +{"current_steps": 5074, "total_steps": 5676, "loss": 0.6384079456329346, "lr": 6.108474700616263e-07, "epoch": 1.7880176211453744, "percentage": 89.39, "elapsed_time": "7:28:45", "remaining_time": "0:53:14"} +{"current_steps": 5075, "total_steps": 5676, "loss": 0.5699876546859741, "lr": 6.088439187967865e-07, "epoch": 1.7883700440528636, "percentage": 89.41, "elapsed_time": "7:28:51", "remaining_time": "0:53:09"} +{"current_steps": 5076, "total_steps": 5676, "loss": 0.6068697571754456, "lr": 6.06843555537171e-07, "epoch": 1.7887224669603525, "percentage": 89.43, "elapsed_time": "7:28:56", "remaining_time": "0:53:03"} +{"current_steps": 5077, "total_steps": 5676, "loss": 0.6254304647445679, "lr": 6.048463809618444e-07, "epoch": 1.7890748898678415, "percentage": 89.45, "elapsed_time": "7:29:00", "remaining_time": "0:52:58"} +{"current_steps": 5078, "total_steps": 5676, "loss": 0.6779477596282959, "lr": 6.02852395748782e-07, "epoch": 1.7894273127753304, "percentage": 89.46, "elapsed_time": "7:29:06", "remaining_time": "0:52:53"} +{"current_steps": 5079, "total_steps": 5676, "loss": 0.6139817833900452, "lr": 6.008616005748802e-07, "epoch": 1.7897797356828193, "percentage": 89.48, "elapsed_time": "7:29:13", "remaining_time": "0:52:48"} +{"current_steps": 5080, "total_steps": 5676, "loss": 0.553310215473175, "lr": 5.988739961159539e-07, "epoch": 1.7901321585903083, "percentage": 89.5, "elapsed_time": "7:29:19", "remaining_time": "0:52:42"} +{"current_steps": 5081, "total_steps": 5676, "loss": 0.6093542575836182, "lr": 5.968895830467325e-07, "epoch": 1.7904845814977972, "percentage": 89.52, "elapsed_time": "7:29:24", "remaining_time": "0:52:37"} +{"current_steps": 5082, "total_steps": 5676, "loss": 0.6224432587623596, "lr": 5.949083620408614e-07, "epoch": 1.7908370044052864, "percentage": 89.53, "elapsed_time": "7:29:28", "remaining_time": "0:52:32"} +{"current_steps": 5083, "total_steps": 5676, "loss": 0.6155597567558289, "lr": 5.929303337709047e-07, "epoch": 1.7911894273127753, "percentage": 89.55, "elapsed_time": "7:29:33", "remaining_time": "0:52:26"} +{"current_steps": 5084, "total_steps": 5676, "loss": 0.5742098093032837, "lr": 5.909554989083411e-07, "epoch": 1.7915418502202645, "percentage": 89.57, "elapsed_time": "7:29:38", "remaining_time": "0:52:21"} +{"current_steps": 5085, "total_steps": 5676, "loss": 0.7427949905395508, "lr": 5.889838581235641e-07, "epoch": 1.7918942731277534, "percentage": 89.59, "elapsed_time": "7:29:44", "remaining_time": "0:52:16"} +{"current_steps": 5086, "total_steps": 5676, "loss": 0.48208528757095337, "lr": 5.870154120858851e-07, "epoch": 1.7922466960352423, "percentage": 89.61, "elapsed_time": "7:29:48", "remaining_time": "0:52:10"} +{"current_steps": 5087, "total_steps": 5676, "loss": 0.48402148485183716, "lr": 5.850501614635318e-07, "epoch": 1.7925991189427313, "percentage": 89.62, "elapsed_time": "7:29:53", "remaining_time": "0:52:05"} +{"current_steps": 5088, "total_steps": 5676, "loss": 0.6808921694755554, "lr": 5.83088106923646e-07, "epoch": 1.7929515418502202, "percentage": 89.64, "elapsed_time": "7:29:58", "remaining_time": "0:52:00"} +{"current_steps": 5089, "total_steps": 5676, "loss": 0.48358428478240967, "lr": 5.811292491322795e-07, "epoch": 1.7933039647577091, "percentage": 89.66, "elapsed_time": "7:30:05", "remaining_time": "0:51:54"} +{"current_steps": 5090, "total_steps": 5676, "loss": 0.6492827534675598, "lr": 5.791735887544081e-07, "epoch": 1.793656387665198, "percentage": 89.68, "elapsed_time": "7:30:10", "remaining_time": "0:51:49"} +{"current_steps": 5091, "total_steps": 5676, "loss": 0.5453791618347168, "lr": 5.772211264539162e-07, "epoch": 1.7940088105726872, "percentage": 89.69, "elapsed_time": "7:30:16", "remaining_time": "0:51:44"} +{"current_steps": 5092, "total_steps": 5676, "loss": 0.5901151895523071, "lr": 5.75271862893605e-07, "epoch": 1.7943612334801762, "percentage": 89.71, "elapsed_time": "7:30:22", "remaining_time": "0:51:39"} +{"current_steps": 5093, "total_steps": 5676, "loss": 0.616302490234375, "lr": 5.73325798735187e-07, "epoch": 1.7947136563876653, "percentage": 89.73, "elapsed_time": "7:30:27", "remaining_time": "0:51:33"} +{"current_steps": 5094, "total_steps": 5676, "loss": 0.616886556148529, "lr": 5.713829346392907e-07, "epoch": 1.7950660792951543, "percentage": 89.75, "elapsed_time": "7:30:32", "remaining_time": "0:51:28"} +{"current_steps": 5095, "total_steps": 5676, "loss": 0.5552375316619873, "lr": 5.694432712654597e-07, "epoch": 1.7954185022026432, "percentage": 89.76, "elapsed_time": "7:30:36", "remaining_time": "0:51:23"} +{"current_steps": 5096, "total_steps": 5676, "loss": 0.5956143736839294, "lr": 5.675068092721491e-07, "epoch": 1.7957709251101321, "percentage": 89.78, "elapsed_time": "7:30:42", "remaining_time": "0:51:17"} +{"current_steps": 5097, "total_steps": 5676, "loss": 0.5870288610458374, "lr": 5.655735493167247e-07, "epoch": 1.796123348017621, "percentage": 89.8, "elapsed_time": "7:30:47", "remaining_time": "0:51:12"} +{"current_steps": 5098, "total_steps": 5676, "loss": 0.5325669646263123, "lr": 5.636434920554701e-07, "epoch": 1.79647577092511, "percentage": 89.82, "elapsed_time": "7:30:51", "remaining_time": "0:51:07"} +{"current_steps": 5099, "total_steps": 5676, "loss": 0.5931425094604492, "lr": 5.617166381435813e-07, "epoch": 1.7968281938325992, "percentage": 89.83, "elapsed_time": "7:30:57", "remaining_time": "0:51:01"} +{"current_steps": 5100, "total_steps": 5676, "loss": 0.5755603313446045, "lr": 5.597929882351627e-07, "epoch": 1.797180616740088, "percentage": 89.85, "elapsed_time": "7:31:03", "remaining_time": "0:50:56"} +{"current_steps": 5101, "total_steps": 5676, "loss": 0.5780980587005615, "lr": 5.578725429832344e-07, "epoch": 1.797533039647577, "percentage": 89.87, "elapsed_time": "7:31:13", "remaining_time": "0:50:51"} +{"current_steps": 5102, "total_steps": 5676, "loss": 0.5863890647888184, "lr": 5.559553030397258e-07, "epoch": 1.7978854625550662, "percentage": 89.89, "elapsed_time": "7:31:17", "remaining_time": "0:50:46"} +{"current_steps": 5103, "total_steps": 5676, "loss": 0.5577390789985657, "lr": 5.540412690554842e-07, "epoch": 1.7982378854625551, "percentage": 89.9, "elapsed_time": "7:31:23", "remaining_time": "0:50:41"} +{"current_steps": 5104, "total_steps": 5676, "loss": 0.5994857549667358, "lr": 5.521304416802642e-07, "epoch": 1.798590308370044, "percentage": 89.92, "elapsed_time": "7:31:30", "remaining_time": "0:50:35"} +{"current_steps": 5105, "total_steps": 5676, "loss": 0.6065348982810974, "lr": 5.502228215627281e-07, "epoch": 1.798942731277533, "percentage": 89.94, "elapsed_time": "7:31:35", "remaining_time": "0:50:30"} +{"current_steps": 5106, "total_steps": 5676, "loss": 0.5390498638153076, "lr": 5.483184093504568e-07, "epoch": 1.799295154185022, "percentage": 89.96, "elapsed_time": "7:31:40", "remaining_time": "0:50:25"} +{"current_steps": 5107, "total_steps": 5676, "loss": 0.5826783180236816, "lr": 5.464172056899364e-07, "epoch": 1.7996475770925109, "percentage": 89.98, "elapsed_time": "7:31:45", "remaining_time": "0:50:20"} +{"current_steps": 5108, "total_steps": 5676, "loss": 0.5429874658584595, "lr": 5.445192112265718e-07, "epoch": 1.8, "percentage": 89.99, "elapsed_time": "7:31:52", "remaining_time": "0:50:14"} +{"current_steps": 5109, "total_steps": 5676, "loss": 0.5591466426849365, "lr": 5.426244266046676e-07, "epoch": 1.800352422907489, "percentage": 90.01, "elapsed_time": "7:31:58", "remaining_time": "0:50:09"} +{"current_steps": 5110, "total_steps": 5676, "loss": 0.5351911187171936, "lr": 5.407328524674449e-07, "epoch": 1.8007048458149781, "percentage": 90.03, "elapsed_time": "7:32:05", "remaining_time": "0:50:04"} +{"current_steps": 5111, "total_steps": 5676, "loss": 0.6095720529556274, "lr": 5.388444894570378e-07, "epoch": 1.801057268722467, "percentage": 90.05, "elapsed_time": "7:32:10", "remaining_time": "0:49:59"} +{"current_steps": 5112, "total_steps": 5676, "loss": 0.6278849840164185, "lr": 5.369593382144844e-07, "epoch": 1.801409691629956, "percentage": 90.06, "elapsed_time": "7:32:14", "remaining_time": "0:49:53"} +{"current_steps": 5113, "total_steps": 5676, "loss": 0.6787056922912598, "lr": 5.350773993797332e-07, "epoch": 1.801762114537445, "percentage": 90.08, "elapsed_time": "7:32:21", "remaining_time": "0:49:48"} +{"current_steps": 5114, "total_steps": 5676, "loss": 0.6054684519767761, "lr": 5.331986735916461e-07, "epoch": 1.8021145374449339, "percentage": 90.1, "elapsed_time": "7:32:27", "remaining_time": "0:49:43"} +{"current_steps": 5115, "total_steps": 5676, "loss": 0.5039973855018616, "lr": 5.31323161487991e-07, "epoch": 1.8024669603524228, "percentage": 90.12, "elapsed_time": "7:32:33", "remaining_time": "0:49:38"} +{"current_steps": 5116, "total_steps": 5676, "loss": 0.6306504011154175, "lr": 5.294508637054474e-07, "epoch": 1.8028193832599118, "percentage": 90.13, "elapsed_time": "7:32:38", "remaining_time": "0:49:32"} +{"current_steps": 5117, "total_steps": 5676, "loss": 0.5654761791229248, "lr": 5.275817808796013e-07, "epoch": 1.803171806167401, "percentage": 90.15, "elapsed_time": "7:32:43", "remaining_time": "0:49:27"} +{"current_steps": 5118, "total_steps": 5676, "loss": 0.5801905989646912, "lr": 5.257159136449452e-07, "epoch": 1.8035242290748899, "percentage": 90.17, "elapsed_time": "7:32:48", "remaining_time": "0:49:22"} +{"current_steps": 5119, "total_steps": 5676, "loss": 0.6565619707107544, "lr": 5.238532626348891e-07, "epoch": 1.803876651982379, "percentage": 90.19, "elapsed_time": "7:32:55", "remaining_time": "0:49:16"} +{"current_steps": 5120, "total_steps": 5676, "loss": 0.5923253297805786, "lr": 5.219938284817416e-07, "epoch": 1.804229074889868, "percentage": 90.2, "elapsed_time": "7:33:00", "remaining_time": "0:49:11"} +{"current_steps": 5121, "total_steps": 5676, "loss": 0.6697949171066284, "lr": 5.2013761181672e-07, "epoch": 1.8045814977973569, "percentage": 90.22, "elapsed_time": "7:33:05", "remaining_time": "0:49:06"} +{"current_steps": 5122, "total_steps": 5676, "loss": 0.5146230459213257, "lr": 5.182846132699571e-07, "epoch": 1.8049339207048458, "percentage": 90.24, "elapsed_time": "7:33:09", "remaining_time": "0:49:00"} +{"current_steps": 5123, "total_steps": 5676, "loss": 0.5928882360458374, "lr": 5.16434833470485e-07, "epoch": 1.8052863436123348, "percentage": 90.26, "elapsed_time": "7:33:16", "remaining_time": "0:48:55"} +{"current_steps": 5124, "total_steps": 5676, "loss": 0.6114771366119385, "lr": 5.145882730462481e-07, "epoch": 1.8056387665198237, "percentage": 90.27, "elapsed_time": "7:33:22", "remaining_time": "0:48:50"} +{"current_steps": 5125, "total_steps": 5676, "loss": 0.6624642014503479, "lr": 5.127449326240952e-07, "epoch": 1.8059911894273126, "percentage": 90.29, "elapsed_time": "7:33:26", "remaining_time": "0:48:45"} +{"current_steps": 5126, "total_steps": 5676, "loss": 0.6277980208396912, "lr": 5.109048128297822e-07, "epoch": 1.8063436123348018, "percentage": 90.31, "elapsed_time": "7:33:31", "remaining_time": "0:48:39"} +{"current_steps": 5127, "total_steps": 5676, "loss": 0.6470246911048889, "lr": 5.090679142879751e-07, "epoch": 1.8066960352422907, "percentage": 90.33, "elapsed_time": "7:33:37", "remaining_time": "0:48:34"} +{"current_steps": 5128, "total_steps": 5676, "loss": 0.6418337821960449, "lr": 5.072342376222438e-07, "epoch": 1.8070484581497799, "percentage": 90.35, "elapsed_time": "7:33:42", "remaining_time": "0:48:29"} +{"current_steps": 5129, "total_steps": 5676, "loss": 0.6013847589492798, "lr": 5.054037834550596e-07, "epoch": 1.8074008810572688, "percentage": 90.36, "elapsed_time": "7:33:48", "remaining_time": "0:48:23"} +{"current_steps": 5130, "total_steps": 5676, "loss": 0.5354605913162231, "lr": 5.035765524078095e-07, "epoch": 1.8077533039647578, "percentage": 90.38, "elapsed_time": "7:33:52", "remaining_time": "0:48:18"} +{"current_steps": 5131, "total_steps": 5676, "loss": 0.6017459034919739, "lr": 5.01752545100781e-07, "epoch": 1.8081057268722467, "percentage": 90.4, "elapsed_time": "7:33:56", "remaining_time": "0:48:13"} +{"current_steps": 5132, "total_steps": 5676, "loss": 0.5929696559906006, "lr": 4.999317621531663e-07, "epoch": 1.8084581497797356, "percentage": 90.42, "elapsed_time": "7:34:02", "remaining_time": "0:48:07"} +{"current_steps": 5133, "total_steps": 5676, "loss": 0.6444251537322998, "lr": 4.981142041830645e-07, "epoch": 1.8088105726872246, "percentage": 90.43, "elapsed_time": "7:34:09", "remaining_time": "0:48:02"} +{"current_steps": 5134, "total_steps": 5676, "loss": 0.5854116678237915, "lr": 4.962998718074807e-07, "epoch": 1.8091629955947135, "percentage": 90.45, "elapsed_time": "7:34:15", "remaining_time": "0:47:57"} +{"current_steps": 5135, "total_steps": 5676, "loss": 0.5145394206047058, "lr": 4.944887656423248e-07, "epoch": 1.8095154185022027, "percentage": 90.47, "elapsed_time": "7:34:22", "remaining_time": "0:47:52"} +{"current_steps": 5136, "total_steps": 5676, "loss": 0.5733104348182678, "lr": 4.926808863024102e-07, "epoch": 1.8098678414096916, "percentage": 90.49, "elapsed_time": "7:34:26", "remaining_time": "0:47:46"} +{"current_steps": 5137, "total_steps": 5676, "loss": 0.5925072431564331, "lr": 4.908762344014573e-07, "epoch": 1.8102202643171807, "percentage": 90.5, "elapsed_time": "7:34:30", "remaining_time": "0:47:41"} +{"current_steps": 5138, "total_steps": 5676, "loss": 0.5346912145614624, "lr": 4.890748105520859e-07, "epoch": 1.8105726872246697, "percentage": 90.52, "elapsed_time": "7:34:36", "remaining_time": "0:47:36"} +{"current_steps": 5139, "total_steps": 5676, "loss": 0.6206755638122559, "lr": 4.87276615365827e-07, "epoch": 1.8109251101321586, "percentage": 90.54, "elapsed_time": "7:34:41", "remaining_time": "0:47:30"} +{"current_steps": 5140, "total_steps": 5676, "loss": 0.5998660326004028, "lr": 4.854816494531089e-07, "epoch": 1.8112775330396476, "percentage": 90.56, "elapsed_time": "7:34:46", "remaining_time": "0:47:25"} +{"current_steps": 5141, "total_steps": 5676, "loss": 0.44545644521713257, "lr": 4.836899134232687e-07, "epoch": 1.8116299559471365, "percentage": 90.57, "elapsed_time": "7:34:51", "remaining_time": "0:47:20"} +{"current_steps": 5142, "total_steps": 5676, "loss": 0.701204776763916, "lr": 4.81901407884543e-07, "epoch": 1.8119823788546254, "percentage": 90.59, "elapsed_time": "7:34:57", "remaining_time": "0:47:14"} +{"current_steps": 5143, "total_steps": 5676, "loss": 0.6103897094726562, "lr": 4.801161334440762e-07, "epoch": 1.8123348017621146, "percentage": 90.61, "elapsed_time": "7:35:02", "remaining_time": "0:47:09"} +{"current_steps": 5144, "total_steps": 5676, "loss": 0.5864719152450562, "lr": 4.783340907079126e-07, "epoch": 1.8126872246696035, "percentage": 90.63, "elapsed_time": "7:35:08", "remaining_time": "0:47:04"} +{"current_steps": 5145, "total_steps": 5676, "loss": 0.46949082612991333, "lr": 4.7655528028099916e-07, "epoch": 1.8130396475770925, "percentage": 90.64, "elapsed_time": "7:35:14", "remaining_time": "0:46:59"} +{"current_steps": 5146, "total_steps": 5676, "loss": 0.6371885538101196, "lr": 4.7477970276718855e-07, "epoch": 1.8133920704845816, "percentage": 90.66, "elapsed_time": "7:35:19", "remaining_time": "0:46:53"} +{"current_steps": 5147, "total_steps": 5676, "loss": 0.6819220781326294, "lr": 4.730073587692319e-07, "epoch": 1.8137444933920706, "percentage": 90.68, "elapsed_time": "7:35:24", "remaining_time": "0:46:48"} +{"current_steps": 5148, "total_steps": 5676, "loss": 0.5230735540390015, "lr": 4.712382488887868e-07, "epoch": 1.8140969162995595, "percentage": 90.7, "elapsed_time": "7:35:28", "remaining_time": "0:46:42"} +{"current_steps": 5149, "total_steps": 5676, "loss": 0.5194997787475586, "lr": 4.6947237372640954e-07, "epoch": 1.8144493392070484, "percentage": 90.72, "elapsed_time": "7:35:35", "remaining_time": "0:46:37"} +{"current_steps": 5150, "total_steps": 5676, "loss": 0.6025055050849915, "lr": 4.677097338815595e-07, "epoch": 1.8148017621145374, "percentage": 90.73, "elapsed_time": "7:35:40", "remaining_time": "0:46:32"} +{"current_steps": 5151, "total_steps": 5676, "loss": 0.649467945098877, "lr": 4.6595032995260135e-07, "epoch": 1.8151541850220263, "percentage": 90.75, "elapsed_time": "7:35:44", "remaining_time": "0:46:27"} +{"current_steps": 5152, "total_steps": 5676, "loss": 0.5216347575187683, "lr": 4.641941625367918e-07, "epoch": 1.8155066079295155, "percentage": 90.77, "elapsed_time": "7:35:50", "remaining_time": "0:46:21"} +{"current_steps": 5153, "total_steps": 5676, "loss": 0.5135647058486938, "lr": 4.6244123223030177e-07, "epoch": 1.8158590308370044, "percentage": 90.79, "elapsed_time": "7:35:54", "remaining_time": "0:46:16"} +{"current_steps": 5154, "total_steps": 5676, "loss": 0.5526058673858643, "lr": 4.6069153962819193e-07, "epoch": 1.8162114537444936, "percentage": 90.8, "elapsed_time": "7:35:59", "remaining_time": "0:46:10"} +{"current_steps": 5155, "total_steps": 5676, "loss": 0.5897486209869385, "lr": 4.589450853244315e-07, "epoch": 1.8165638766519825, "percentage": 90.82, "elapsed_time": "7:36:04", "remaining_time": "0:46:05"} +{"current_steps": 5156, "total_steps": 5676, "loss": 0.5698407888412476, "lr": 4.5720186991188517e-07, "epoch": 1.8169162995594714, "percentage": 90.84, "elapsed_time": "7:36:10", "remaining_time": "0:46:00"} +{"current_steps": 5157, "total_steps": 5676, "loss": 0.579573392868042, "lr": 4.5546189398232075e-07, "epoch": 1.8172687224669604, "percentage": 90.86, "elapsed_time": "7:36:16", "remaining_time": "0:45:55"} +{"current_steps": 5158, "total_steps": 5676, "loss": 0.41852182149887085, "lr": 4.5372515812640573e-07, "epoch": 1.8176211453744493, "percentage": 90.87, "elapsed_time": "7:36:20", "remaining_time": "0:45:49"} +{"current_steps": 5159, "total_steps": 5676, "loss": 0.6081204414367676, "lr": 4.519916629337107e-07, "epoch": 1.8179735682819382, "percentage": 90.89, "elapsed_time": "7:36:26", "remaining_time": "0:45:44"} +{"current_steps": 5160, "total_steps": 5676, "loss": 0.5725652575492859, "lr": 4.502614089926982e-07, "epoch": 1.8183259911894272, "percentage": 90.91, "elapsed_time": "7:36:31", "remaining_time": "0:45:39"} +{"current_steps": 5161, "total_steps": 5676, "loss": 0.5109303593635559, "lr": 4.4853439689073965e-07, "epoch": 1.8186784140969163, "percentage": 90.93, "elapsed_time": "7:36:37", "remaining_time": "0:45:33"} +{"current_steps": 5162, "total_steps": 5676, "loss": 0.5647833347320557, "lr": 4.468106272141004e-07, "epoch": 1.8190308370044053, "percentage": 90.94, "elapsed_time": "7:36:43", "remaining_time": "0:45:28"} +{"current_steps": 5163, "total_steps": 5676, "loss": 0.6074738502502441, "lr": 4.450901005479469e-07, "epoch": 1.8193832599118944, "percentage": 90.96, "elapsed_time": "7:36:48", "remaining_time": "0:45:23"} +{"current_steps": 5164, "total_steps": 5676, "loss": 0.647289514541626, "lr": 4.433728174763452e-07, "epoch": 1.8197356828193834, "percentage": 90.98, "elapsed_time": "7:36:55", "remaining_time": "0:45:18"} +{"current_steps": 5165, "total_steps": 5676, "loss": 0.5817590951919556, "lr": 4.416587785822568e-07, "epoch": 1.8200881057268723, "percentage": 91.0, "elapsed_time": "7:37:00", "remaining_time": "0:45:12"} +{"current_steps": 5166, "total_steps": 5676, "loss": 0.6483672857284546, "lr": 4.399479844475485e-07, "epoch": 1.8204405286343612, "percentage": 91.01, "elapsed_time": "7:37:05", "remaining_time": "0:45:07"} +{"current_steps": 5167, "total_steps": 5676, "loss": 0.5439441204071045, "lr": 4.382404356529801e-07, "epoch": 1.8207929515418502, "percentage": 91.03, "elapsed_time": "7:37:09", "remaining_time": "0:45:02"} +{"current_steps": 5168, "total_steps": 5676, "loss": 0.5835710167884827, "lr": 4.3653613277820804e-07, "epoch": 1.821145374449339, "percentage": 91.05, "elapsed_time": "7:37:14", "remaining_time": "0:44:56"} +{"current_steps": 5169, "total_steps": 5676, "loss": 0.7024152874946594, "lr": 4.3483507640179503e-07, "epoch": 1.821497797356828, "percentage": 91.07, "elapsed_time": "7:37:19", "remaining_time": "0:44:51"} +{"current_steps": 5170, "total_steps": 5676, "loss": 0.5223513841629028, "lr": 4.331372671011935e-07, "epoch": 1.8218502202643172, "percentage": 91.09, "elapsed_time": "7:37:24", "remaining_time": "0:44:46"} +{"current_steps": 5171, "total_steps": 5676, "loss": 0.5975688099861145, "lr": 4.3144270545275814e-07, "epoch": 1.8222026431718061, "percentage": 91.1, "elapsed_time": "7:37:29", "remaining_time": "0:44:40"} +{"current_steps": 5172, "total_steps": 5676, "loss": 0.5459109544754028, "lr": 4.2975139203173977e-07, "epoch": 1.8225550660792953, "percentage": 91.12, "elapsed_time": "7:37:36", "remaining_time": "0:44:35"} +{"current_steps": 5173, "total_steps": 5676, "loss": 0.6155862808227539, "lr": 4.2806332741228586e-07, "epoch": 1.8229074889867842, "percentage": 91.14, "elapsed_time": "7:37:40", "remaining_time": "0:44:30"} +{"current_steps": 5174, "total_steps": 5676, "loss": 0.6505374908447266, "lr": 4.263785121674435e-07, "epoch": 1.8232599118942732, "percentage": 91.16, "elapsed_time": "7:37:44", "remaining_time": "0:44:24"} +{"current_steps": 5175, "total_steps": 5676, "loss": 0.5243734121322632, "lr": 4.246969468691553e-07, "epoch": 1.823612334801762, "percentage": 91.17, "elapsed_time": "7:37:49", "remaining_time": "0:44:19"} +{"current_steps": 5176, "total_steps": 5676, "loss": 0.6931817531585693, "lr": 4.2301863208825676e-07, "epoch": 1.823964757709251, "percentage": 91.19, "elapsed_time": "7:37:55", "remaining_time": "0:44:14"} +{"current_steps": 5177, "total_steps": 5676, "loss": 0.5312765836715698, "lr": 4.2134356839448665e-07, "epoch": 1.82431718061674, "percentage": 91.21, "elapsed_time": "7:37:59", "remaining_time": "0:44:08"} +{"current_steps": 5178, "total_steps": 5676, "loss": 0.598992109298706, "lr": 4.1967175635647674e-07, "epoch": 1.824669603524229, "percentage": 91.23, "elapsed_time": "7:38:05", "remaining_time": "0:44:03"} +{"current_steps": 5179, "total_steps": 5676, "loss": 0.5844708681106567, "lr": 4.1800319654175413e-07, "epoch": 1.825022026431718, "percentage": 91.24, "elapsed_time": "7:38:11", "remaining_time": "0:43:58"} +{"current_steps": 5180, "total_steps": 5676, "loss": 0.5884612798690796, "lr": 4.1633788951674357e-07, "epoch": 1.825374449339207, "percentage": 91.26, "elapsed_time": "7:38:16", "remaining_time": "0:43:52"} +{"current_steps": 5181, "total_steps": 5676, "loss": 0.6038404107093811, "lr": 4.1467583584676395e-07, "epoch": 1.8257268722466962, "percentage": 91.28, "elapsed_time": "7:38:21", "remaining_time": "0:43:47"} +{"current_steps": 5182, "total_steps": 5676, "loss": 0.6511296033859253, "lr": 4.130170360960317e-07, "epoch": 1.826079295154185, "percentage": 91.3, "elapsed_time": "7:38:27", "remaining_time": "0:43:42"} +{"current_steps": 5183, "total_steps": 5676, "loss": 0.5884404182434082, "lr": 4.113614908276609e-07, "epoch": 1.826431718061674, "percentage": 91.31, "elapsed_time": "7:38:34", "remaining_time": "0:43:37"} +{"current_steps": 5184, "total_steps": 5676, "loss": 0.5549901723861694, "lr": 4.097092006036507e-07, "epoch": 1.826784140969163, "percentage": 91.33, "elapsed_time": "7:38:40", "remaining_time": "0:43:31"} +{"current_steps": 5185, "total_steps": 5676, "loss": 0.561951756477356, "lr": 4.0806016598490707e-07, "epoch": 1.827136563876652, "percentage": 91.35, "elapsed_time": "7:38:44", "remaining_time": "0:43:26"} +{"current_steps": 5186, "total_steps": 5676, "loss": 0.6412413120269775, "lr": 4.064143875312254e-07, "epoch": 1.8274889867841408, "percentage": 91.37, "elapsed_time": "7:38:50", "remaining_time": "0:43:21"} +{"current_steps": 5187, "total_steps": 5676, "loss": 0.6295674443244934, "lr": 4.0477186580129447e-07, "epoch": 1.82784140969163, "percentage": 91.38, "elapsed_time": "7:38:56", "remaining_time": "0:43:15"} +{"current_steps": 5188, "total_steps": 5676, "loss": 0.6700723767280579, "lr": 4.031326013527015e-07, "epoch": 1.828193832599119, "percentage": 91.4, "elapsed_time": "7:39:01", "remaining_time": "0:43:10"} +{"current_steps": 5189, "total_steps": 5676, "loss": 0.5758254528045654, "lr": 4.014965947419236e-07, "epoch": 1.8285462555066079, "percentage": 91.42, "elapsed_time": "7:39:06", "remaining_time": "0:43:05"} +{"current_steps": 5190, "total_steps": 5676, "loss": 0.6663509607315063, "lr": 3.9986384652433654e-07, "epoch": 1.828898678414097, "percentage": 91.44, "elapsed_time": "7:39:12", "remaining_time": "0:43:00"} +{"current_steps": 5191, "total_steps": 5676, "loss": 0.6459337472915649, "lr": 3.982343572542069e-07, "epoch": 1.829251101321586, "percentage": 91.46, "elapsed_time": "7:39:17", "remaining_time": "0:42:54"} +{"current_steps": 5192, "total_steps": 5676, "loss": 0.6411766409873962, "lr": 3.9660812748469336e-07, "epoch": 1.829603524229075, "percentage": 91.47, "elapsed_time": "7:39:21", "remaining_time": "0:42:49"} +{"current_steps": 5193, "total_steps": 5676, "loss": 0.711888313293457, "lr": 3.9498515776785207e-07, "epoch": 1.8299559471365638, "percentage": 91.49, "elapsed_time": "7:39:26", "remaining_time": "0:42:43"} +{"current_steps": 5194, "total_steps": 5676, "loss": 0.63288813829422, "lr": 3.933654486546312e-07, "epoch": 1.8303083700440528, "percentage": 91.51, "elapsed_time": "7:39:31", "remaining_time": "0:42:38"} +{"current_steps": 5195, "total_steps": 5676, "loss": 0.6330822706222534, "lr": 3.9174900069486985e-07, "epoch": 1.8306607929515417, "percentage": 91.53, "elapsed_time": "7:39:36", "remaining_time": "0:42:33"} +{"current_steps": 5196, "total_steps": 5676, "loss": 0.7242149114608765, "lr": 3.901358144373035e-07, "epoch": 1.8310132158590309, "percentage": 91.54, "elapsed_time": "7:39:40", "remaining_time": "0:42:27"} +{"current_steps": 5197, "total_steps": 5676, "loss": 0.6741703748703003, "lr": 3.885258904295575e-07, "epoch": 1.8313656387665198, "percentage": 91.56, "elapsed_time": "7:39:45", "remaining_time": "0:42:22"} +{"current_steps": 5198, "total_steps": 5676, "loss": 0.625057578086853, "lr": 3.8691922921815226e-07, "epoch": 1.831718061674009, "percentage": 91.58, "elapsed_time": "7:39:48", "remaining_time": "0:42:17"} +{"current_steps": 5199, "total_steps": 5676, "loss": 0.673669159412384, "lr": 3.853158313484995e-07, "epoch": 1.832070484581498, "percentage": 91.6, "elapsed_time": "7:39:54", "remaining_time": "0:42:11"} +{"current_steps": 5200, "total_steps": 5676, "loss": 0.5981203317642212, "lr": 3.837156973648992e-07, "epoch": 1.8324229074889868, "percentage": 91.61, "elapsed_time": "7:40:00", "remaining_time": "0:42:06"} +{"current_steps": 5201, "total_steps": 5676, "loss": 0.6577199697494507, "lr": 3.821188278105514e-07, "epoch": 1.8327753303964758, "percentage": 91.63, "elapsed_time": "7:40:09", "remaining_time": "0:42:01"} +{"current_steps": 5202, "total_steps": 5676, "loss": 0.6951043605804443, "lr": 3.805252232275414e-07, "epoch": 1.8331277533039647, "percentage": 91.65, "elapsed_time": "7:40:15", "remaining_time": "0:41:56"} +{"current_steps": 5203, "total_steps": 5676, "loss": 0.572435200214386, "lr": 3.7893488415684964e-07, "epoch": 1.8334801762114536, "percentage": 91.67, "elapsed_time": "7:40:20", "remaining_time": "0:41:50"} +{"current_steps": 5204, "total_steps": 5676, "loss": 0.5849496126174927, "lr": 3.773478111383455e-07, "epoch": 1.8338325991189426, "percentage": 91.68, "elapsed_time": "7:40:25", "remaining_time": "0:41:45"} +{"current_steps": 5205, "total_steps": 5676, "loss": 0.5380967855453491, "lr": 3.7576400471079023e-07, "epoch": 1.8341850220264317, "percentage": 91.7, "elapsed_time": "7:40:29", "remaining_time": "0:41:40"} +{"current_steps": 5206, "total_steps": 5676, "loss": 0.5681222677230835, "lr": 3.7418346541183923e-07, "epoch": 1.8345374449339207, "percentage": 91.72, "elapsed_time": "7:40:34", "remaining_time": "0:41:34"} +{"current_steps": 5207, "total_steps": 5676, "loss": 0.5012099146842957, "lr": 3.7260619377803677e-07, "epoch": 1.8348898678414098, "percentage": 91.74, "elapsed_time": "7:40:39", "remaining_time": "0:41:29"} +{"current_steps": 5208, "total_steps": 5676, "loss": 0.6175205707550049, "lr": 3.710321903448133e-07, "epoch": 1.8352422907488988, "percentage": 91.75, "elapsed_time": "7:40:45", "remaining_time": "0:41:24"} +{"current_steps": 5209, "total_steps": 5676, "loss": 0.6190954446792603, "lr": 3.6946145564649817e-07, "epoch": 1.8355947136563877, "percentage": 91.77, "elapsed_time": "7:40:51", "remaining_time": "0:41:18"} +{"current_steps": 5210, "total_steps": 5676, "loss": 0.6820691823959351, "lr": 3.678939902163048e-07, "epoch": 1.8359471365638766, "percentage": 91.79, "elapsed_time": "7:40:56", "remaining_time": "0:41:13"} +{"current_steps": 5211, "total_steps": 5676, "loss": 0.5309683084487915, "lr": 3.6632979458633867e-07, "epoch": 1.8362995594713656, "percentage": 91.81, "elapsed_time": "7:41:02", "remaining_time": "0:41:08"} +{"current_steps": 5212, "total_steps": 5676, "loss": 0.5110820531845093, "lr": 3.6476886928759726e-07, "epoch": 1.8366519823788545, "percentage": 91.83, "elapsed_time": "7:41:07", "remaining_time": "0:41:03"} +{"current_steps": 5213, "total_steps": 5676, "loss": 0.6226333975791931, "lr": 3.6321121484996447e-07, "epoch": 1.8370044052863435, "percentage": 91.84, "elapsed_time": "7:41:12", "remaining_time": "0:40:57"} +{"current_steps": 5214, "total_steps": 5676, "loss": 0.6287777423858643, "lr": 3.6165683180221735e-07, "epoch": 1.8373568281938326, "percentage": 91.86, "elapsed_time": "7:41:18", "remaining_time": "0:40:52"} +{"current_steps": 5215, "total_steps": 5676, "loss": 0.7033661603927612, "lr": 3.601057206720182e-07, "epoch": 1.8377092511013216, "percentage": 91.88, "elapsed_time": "7:41:23", "remaining_time": "0:40:47"} +{"current_steps": 5216, "total_steps": 5676, "loss": 0.5841168165206909, "lr": 3.5855788198592257e-07, "epoch": 1.8380616740088107, "percentage": 91.9, "elapsed_time": "7:41:29", "remaining_time": "0:40:41"} +{"current_steps": 5217, "total_steps": 5676, "loss": 0.6797176599502563, "lr": 3.570133162693734e-07, "epoch": 1.8384140969162996, "percentage": 91.91, "elapsed_time": "7:41:35", "remaining_time": "0:40:36"} +{"current_steps": 5218, "total_steps": 5676, "loss": 0.4317880868911743, "lr": 3.5547202404670246e-07, "epoch": 1.8387665198237886, "percentage": 91.93, "elapsed_time": "7:41:41", "remaining_time": "0:40:31"} +{"current_steps": 5219, "total_steps": 5676, "loss": 0.4757443368434906, "lr": 3.5393400584113004e-07, "epoch": 1.8391189427312775, "percentage": 91.95, "elapsed_time": "7:41:46", "remaining_time": "0:40:26"} +{"current_steps": 5220, "total_steps": 5676, "loss": 0.6341856718063354, "lr": 3.5239926217476627e-07, "epoch": 1.8394713656387665, "percentage": 91.97, "elapsed_time": "7:41:51", "remaining_time": "0:40:20"} +{"current_steps": 5221, "total_steps": 5676, "loss": 0.5401504039764404, "lr": 3.5086779356860777e-07, "epoch": 1.8398237885462554, "percentage": 91.98, "elapsed_time": "7:41:57", "remaining_time": "0:40:15"} +{"current_steps": 5222, "total_steps": 5676, "loss": 0.507185697555542, "lr": 3.4933960054254314e-07, "epoch": 1.8401762114537445, "percentage": 92.0, "elapsed_time": "7:42:01", "remaining_time": "0:40:10"} +{"current_steps": 5223, "total_steps": 5676, "loss": 0.544599175453186, "lr": 3.478146836153418e-07, "epoch": 1.8405286343612335, "percentage": 92.02, "elapsed_time": "7:42:07", "remaining_time": "0:40:04"} +{"current_steps": 5224, "total_steps": 5676, "loss": 0.5231183767318726, "lr": 3.4629304330466964e-07, "epoch": 1.8408810572687224, "percentage": 92.04, "elapsed_time": "7:42:11", "remaining_time": "0:39:59"} +{"current_steps": 5225, "total_steps": 5676, "loss": 0.5505118370056152, "lr": 3.447746801270746e-07, "epoch": 1.8412334801762116, "percentage": 92.05, "elapsed_time": "7:42:18", "remaining_time": "0:39:54"} +{"current_steps": 5226, "total_steps": 5676, "loss": 0.6056097149848938, "lr": 3.432595945979944e-07, "epoch": 1.8415859030837005, "percentage": 92.07, "elapsed_time": "7:42:22", "remaining_time": "0:39:48"} +{"current_steps": 5227, "total_steps": 5676, "loss": 0.6292518377304077, "lr": 3.4174778723175204e-07, "epoch": 1.8419383259911895, "percentage": 92.09, "elapsed_time": "7:42:26", "remaining_time": "0:39:43"} +{"current_steps": 5228, "total_steps": 5676, "loss": 0.6821235418319702, "lr": 3.4023925854156035e-07, "epoch": 1.8422907488986784, "percentage": 92.11, "elapsed_time": "7:42:32", "remaining_time": "0:39:38"} +{"current_steps": 5229, "total_steps": 5676, "loss": 0.6663388013839722, "lr": 3.3873400903951636e-07, "epoch": 1.8426431718061673, "percentage": 92.12, "elapsed_time": "7:42:37", "remaining_time": "0:39:32"} +{"current_steps": 5230, "total_steps": 5676, "loss": 0.5283368825912476, "lr": 3.3723203923660795e-07, "epoch": 1.8429955947136563, "percentage": 92.14, "elapsed_time": "7:42:43", "remaining_time": "0:39:27"} +{"current_steps": 5231, "total_steps": 5676, "loss": 0.6193508505821228, "lr": 3.35733349642704e-07, "epoch": 1.8433480176211454, "percentage": 92.16, "elapsed_time": "7:42:48", "remaining_time": "0:39:22"} +{"current_steps": 5232, "total_steps": 5676, "loss": 0.5790667533874512, "lr": 3.3423794076656635e-07, "epoch": 1.8437004405286344, "percentage": 92.18, "elapsed_time": "7:42:53", "remaining_time": "0:39:16"} +{"current_steps": 5233, "total_steps": 5676, "loss": 0.5774649381637573, "lr": 3.3274581311583786e-07, "epoch": 1.8440528634361235, "percentage": 92.2, "elapsed_time": "7:42:59", "remaining_time": "0:39:11"} +{"current_steps": 5234, "total_steps": 5676, "loss": 0.7818938493728638, "lr": 3.312569671970489e-07, "epoch": 1.8444052863436124, "percentage": 92.21, "elapsed_time": "7:43:05", "remaining_time": "0:39:06"} +{"current_steps": 5235, "total_steps": 5676, "loss": 0.7140024900436401, "lr": 3.297714035156174e-07, "epoch": 1.8447577092511014, "percentage": 92.23, "elapsed_time": "7:43:11", "remaining_time": "0:39:01"} +{"current_steps": 5236, "total_steps": 5676, "loss": 0.526549220085144, "lr": 3.2828912257584664e-07, "epoch": 1.8451101321585903, "percentage": 92.25, "elapsed_time": "7:43:16", "remaining_time": "0:38:55"} +{"current_steps": 5237, "total_steps": 5676, "loss": 0.5497986078262329, "lr": 3.268101248809219e-07, "epoch": 1.8454625550660793, "percentage": 92.27, "elapsed_time": "7:43:22", "remaining_time": "0:38:50"} +{"current_steps": 5238, "total_steps": 5676, "loss": 0.587260901927948, "lr": 3.2533441093292153e-07, "epoch": 1.8458149779735682, "percentage": 92.28, "elapsed_time": "7:43:28", "remaining_time": "0:38:45"} +{"current_steps": 5239, "total_steps": 5676, "loss": 0.6064329147338867, "lr": 3.238619812327992e-07, "epoch": 1.8461674008810571, "percentage": 92.3, "elapsed_time": "7:43:34", "remaining_time": "0:38:40"} +{"current_steps": 5240, "total_steps": 5676, "loss": 0.5427783727645874, "lr": 3.22392836280403e-07, "epoch": 1.8465198237885463, "percentage": 92.32, "elapsed_time": "7:43:39", "remaining_time": "0:38:34"} +{"current_steps": 5241, "total_steps": 5676, "loss": 0.6315155029296875, "lr": 3.209269765744605e-07, "epoch": 1.8468722466960352, "percentage": 92.34, "elapsed_time": "7:43:43", "remaining_time": "0:38:29"} +{"current_steps": 5242, "total_steps": 5676, "loss": 0.47614991664886475, "lr": 3.194644026125848e-07, "epoch": 1.8472246696035244, "percentage": 92.35, "elapsed_time": "7:43:48", "remaining_time": "0:38:24"} +{"current_steps": 5243, "total_steps": 5676, "loss": 0.4671345353126526, "lr": 3.1800511489127553e-07, "epoch": 1.8475770925110133, "percentage": 92.37, "elapsed_time": "7:43:53", "remaining_time": "0:38:18"} +{"current_steps": 5244, "total_steps": 5676, "loss": 0.5751510262489319, "lr": 3.1654911390591404e-07, "epoch": 1.8479295154185023, "percentage": 92.39, "elapsed_time": "7:43:59", "remaining_time": "0:38:13"} +{"current_steps": 5245, "total_steps": 5676, "loss": 0.41024816036224365, "lr": 3.1509640015076946e-07, "epoch": 1.8482819383259912, "percentage": 92.41, "elapsed_time": "7:44:04", "remaining_time": "0:38:08"} +{"current_steps": 5246, "total_steps": 5676, "loss": 0.5401195287704468, "lr": 3.136469741189918e-07, "epoch": 1.8486343612334801, "percentage": 92.42, "elapsed_time": "7:44:10", "remaining_time": "0:38:02"} +{"current_steps": 5247, "total_steps": 5676, "loss": 0.526515007019043, "lr": 3.1220083630261413e-07, "epoch": 1.848986784140969, "percentage": 92.44, "elapsed_time": "7:44:14", "remaining_time": "0:37:57"} +{"current_steps": 5248, "total_steps": 5676, "loss": 0.5476140975952148, "lr": 3.1075798719255813e-07, "epoch": 1.849339207048458, "percentage": 92.46, "elapsed_time": "7:44:20", "remaining_time": "0:37:52"} +{"current_steps": 5249, "total_steps": 5676, "loss": 0.5542911291122437, "lr": 3.093184272786254e-07, "epoch": 1.8496916299559472, "percentage": 92.48, "elapsed_time": "7:44:26", "remaining_time": "0:37:46"} +{"current_steps": 5250, "total_steps": 5676, "loss": 0.5147569179534912, "lr": 3.078821570495005e-07, "epoch": 1.850044052863436, "percentage": 92.49, "elapsed_time": "7:44:30", "remaining_time": "0:37:41"} +{"current_steps": 5251, "total_steps": 5676, "loss": 0.5774611830711365, "lr": 3.0644917699275355e-07, "epoch": 1.8503964757709253, "percentage": 92.51, "elapsed_time": "7:44:35", "remaining_time": "0:37:36"} +{"current_steps": 5252, "total_steps": 5676, "loss": 0.6516300439834595, "lr": 3.0501948759483646e-07, "epoch": 1.8507488986784142, "percentage": 92.53, "elapsed_time": "7:44:40", "remaining_time": "0:37:30"} +{"current_steps": 5253, "total_steps": 5676, "loss": 0.7598013877868652, "lr": 3.0359308934108435e-07, "epoch": 1.8511013215859031, "percentage": 92.55, "elapsed_time": "7:44:46", "remaining_time": "0:37:25"} +{"current_steps": 5254, "total_steps": 5676, "loss": 0.5605336427688599, "lr": 3.0216998271571653e-07, "epoch": 1.851453744493392, "percentage": 92.57, "elapsed_time": "7:44:50", "remaining_time": "0:37:20"} +{"current_steps": 5255, "total_steps": 5676, "loss": 0.6549514532089233, "lr": 3.007501682018288e-07, "epoch": 1.851806167400881, "percentage": 92.58, "elapsed_time": "7:44:56", "remaining_time": "0:37:14"} +{"current_steps": 5256, "total_steps": 5676, "loss": 0.5390901565551758, "lr": 2.993336462814089e-07, "epoch": 1.85215859030837, "percentage": 92.6, "elapsed_time": "7:45:01", "remaining_time": "0:37:09"} +{"current_steps": 5257, "total_steps": 5676, "loss": 0.5039275884628296, "lr": 2.979204174353201e-07, "epoch": 1.8525110132158589, "percentage": 92.62, "elapsed_time": "7:45:05", "remaining_time": "0:37:04"} +{"current_steps": 5258, "total_steps": 5676, "loss": 0.4715292453765869, "lr": 2.9651048214330956e-07, "epoch": 1.852863436123348, "percentage": 92.64, "elapsed_time": "7:45:10", "remaining_time": "0:36:58"} +{"current_steps": 5259, "total_steps": 5676, "loss": 0.4593687653541565, "lr": 2.951038408840068e-07, "epoch": 1.853215859030837, "percentage": 92.65, "elapsed_time": "7:45:17", "remaining_time": "0:36:53"} +{"current_steps": 5260, "total_steps": 5676, "loss": 0.8451346158981323, "lr": 2.9370049413492084e-07, "epoch": 1.8535682819383261, "percentage": 92.67, "elapsed_time": "7:45:22", "remaining_time": "0:36:48"} +{"current_steps": 5261, "total_steps": 5676, "loss": 0.5567130446434021, "lr": 2.923004423724474e-07, "epoch": 1.853920704845815, "percentage": 92.69, "elapsed_time": "7:45:28", "remaining_time": "0:36:43"} +{"current_steps": 5262, "total_steps": 5676, "loss": 0.4740293622016907, "lr": 2.909036860718595e-07, "epoch": 1.854273127753304, "percentage": 92.71, "elapsed_time": "7:45:34", "remaining_time": "0:36:37"} +{"current_steps": 5263, "total_steps": 5676, "loss": 0.5279378294944763, "lr": 2.895102257073101e-07, "epoch": 1.854625550660793, "percentage": 92.72, "elapsed_time": "7:45:41", "remaining_time": "0:36:32"} +{"current_steps": 5264, "total_steps": 5676, "loss": 0.5977471470832825, "lr": 2.881200617518387e-07, "epoch": 1.8549779735682819, "percentage": 92.74, "elapsed_time": "7:45:46", "remaining_time": "0:36:27"} +{"current_steps": 5265, "total_steps": 5676, "loss": 0.5385996699333191, "lr": 2.8673319467736104e-07, "epoch": 1.8553303964757708, "percentage": 92.76, "elapsed_time": "7:45:51", "remaining_time": "0:36:21"} +{"current_steps": 5266, "total_steps": 5676, "loss": 0.5702279806137085, "lr": 2.85349624954675e-07, "epoch": 1.85568281938326, "percentage": 92.78, "elapsed_time": "7:45:56", "remaining_time": "0:36:16"} +{"current_steps": 5267, "total_steps": 5676, "loss": 0.584097146987915, "lr": 2.839693530534604e-07, "epoch": 1.856035242290749, "percentage": 92.79, "elapsed_time": "7:46:02", "remaining_time": "0:36:11"} +{"current_steps": 5268, "total_steps": 5676, "loss": 0.6205782890319824, "lr": 2.825923794422758e-07, "epoch": 1.8563876651982378, "percentage": 92.81, "elapsed_time": "7:46:08", "remaining_time": "0:36:06"} +{"current_steps": 5269, "total_steps": 5676, "loss": 0.5626852512359619, "lr": 2.8121870458856284e-07, "epoch": 1.856740088105727, "percentage": 92.83, "elapsed_time": "7:46:12", "remaining_time": "0:36:00"} +{"current_steps": 5270, "total_steps": 5676, "loss": 0.6052513122558594, "lr": 2.798483289586396e-07, "epoch": 1.857092511013216, "percentage": 92.85, "elapsed_time": "7:46:17", "remaining_time": "0:35:55"} +{"current_steps": 5271, "total_steps": 5676, "loss": 0.5074095726013184, "lr": 2.7848125301770504e-07, "epoch": 1.8574449339207049, "percentage": 92.86, "elapsed_time": "7:46:21", "remaining_time": "0:35:49"} +{"current_steps": 5272, "total_steps": 5676, "loss": 0.8006119728088379, "lr": 2.7711747722984127e-07, "epoch": 1.8577973568281938, "percentage": 92.88, "elapsed_time": "7:46:25", "remaining_time": "0:35:44"} +{"current_steps": 5273, "total_steps": 5676, "loss": 0.6437188982963562, "lr": 2.7575700205800694e-07, "epoch": 1.8581497797356827, "percentage": 92.9, "elapsed_time": "7:46:29", "remaining_time": "0:35:39"} +{"current_steps": 5274, "total_steps": 5676, "loss": 0.6610177755355835, "lr": 2.743998279640403e-07, "epoch": 1.8585022026431717, "percentage": 92.92, "elapsed_time": "7:46:34", "remaining_time": "0:35:33"} +{"current_steps": 5275, "total_steps": 5676, "loss": 0.6041977405548096, "lr": 2.7304595540865953e-07, "epoch": 1.8588546255506608, "percentage": 92.94, "elapsed_time": "7:46:38", "remaining_time": "0:35:28"} +{"current_steps": 5276, "total_steps": 5676, "loss": 0.5684002041816711, "lr": 2.716953848514625e-07, "epoch": 1.8592070484581498, "percentage": 92.95, "elapsed_time": "7:46:42", "remaining_time": "0:35:23"} +{"current_steps": 5277, "total_steps": 5676, "loss": 0.7256498336791992, "lr": 2.703481167509281e-07, "epoch": 1.859559471365639, "percentage": 92.97, "elapsed_time": "7:46:47", "remaining_time": "0:35:17"} +{"current_steps": 5278, "total_steps": 5676, "loss": 0.7264266014099121, "lr": 2.690041515644093e-07, "epoch": 1.8599118942731279, "percentage": 92.99, "elapsed_time": "7:46:52", "remaining_time": "0:35:12"} +{"current_steps": 5279, "total_steps": 5676, "loss": 0.5427879095077515, "lr": 2.6766348974813895e-07, "epoch": 1.8602643171806168, "percentage": 93.01, "elapsed_time": "7:46:56", "remaining_time": "0:35:06"} +{"current_steps": 5280, "total_steps": 5676, "loss": 0.5970745086669922, "lr": 2.663261317572341e-07, "epoch": 1.8606167400881057, "percentage": 93.02, "elapsed_time": "7:47:03", "remaining_time": "0:35:01"} +{"current_steps": 5281, "total_steps": 5676, "loss": 0.5796299576759338, "lr": 2.6499207804568495e-07, "epoch": 1.8609691629955947, "percentage": 93.04, "elapsed_time": "7:47:08", "remaining_time": "0:34:56"} +{"current_steps": 5282, "total_steps": 5676, "loss": 0.4900246262550354, "lr": 2.6366132906635923e-07, "epoch": 1.8613215859030836, "percentage": 93.06, "elapsed_time": "7:47:13", "remaining_time": "0:34:51"} +{"current_steps": 5283, "total_steps": 5676, "loss": 0.6052582263946533, "lr": 2.6233388527100777e-07, "epoch": 1.8616740088105725, "percentage": 93.08, "elapsed_time": "7:47:19", "remaining_time": "0:34:45"} +{"current_steps": 5284, "total_steps": 5676, "loss": 0.6908484697341919, "lr": 2.610097471102524e-07, "epoch": 1.8620264317180617, "percentage": 93.09, "elapsed_time": "7:47:23", "remaining_time": "0:34:40"} +{"current_steps": 5285, "total_steps": 5676, "loss": 0.6353795528411865, "lr": 2.596889150336024e-07, "epoch": 1.8623788546255506, "percentage": 93.11, "elapsed_time": "7:47:27", "remaining_time": "0:34:35"} +{"current_steps": 5286, "total_steps": 5676, "loss": 0.803575873374939, "lr": 2.5837138948943354e-07, "epoch": 1.8627312775330398, "percentage": 93.13, "elapsed_time": "7:47:33", "remaining_time": "0:34:29"} +{"current_steps": 5287, "total_steps": 5676, "loss": 0.5551957488059998, "lr": 2.5705717092500694e-07, "epoch": 1.8630837004405287, "percentage": 93.15, "elapsed_time": "7:47:39", "remaining_time": "0:34:24"} +{"current_steps": 5288, "total_steps": 5676, "loss": 0.6247879266738892, "lr": 2.5574625978646017e-07, "epoch": 1.8634361233480177, "percentage": 93.16, "elapsed_time": "7:47:43", "remaining_time": "0:34:19"} +{"current_steps": 5289, "total_steps": 5676, "loss": 0.6029977798461914, "lr": 2.544386565188062e-07, "epoch": 1.8637885462555066, "percentage": 93.18, "elapsed_time": "7:47:49", "remaining_time": "0:34:13"} +{"current_steps": 5290, "total_steps": 5676, "loss": 0.611297070980072, "lr": 2.531343615659343e-07, "epoch": 1.8641409691629955, "percentage": 93.2, "elapsed_time": "7:47:55", "remaining_time": "0:34:08"} +{"current_steps": 5291, "total_steps": 5676, "loss": 0.5290260314941406, "lr": 2.518333753706137e-07, "epoch": 1.8644933920704845, "percentage": 93.22, "elapsed_time": "7:48:00", "remaining_time": "0:34:03"} +{"current_steps": 5292, "total_steps": 5676, "loss": 0.5988795757293701, "lr": 2.5053569837448664e-07, "epoch": 1.8648458149779734, "percentage": 93.23, "elapsed_time": "7:48:06", "remaining_time": "0:33:58"} +{"current_steps": 5293, "total_steps": 5676, "loss": 0.671028733253479, "lr": 2.4924133101807636e-07, "epoch": 1.8651982378854626, "percentage": 93.25, "elapsed_time": "7:48:11", "remaining_time": "0:33:52"} +{"current_steps": 5294, "total_steps": 5676, "loss": 0.5741167664527893, "lr": 2.4795027374077905e-07, "epoch": 1.8655506607929515, "percentage": 93.27, "elapsed_time": "7:48:16", "remaining_time": "0:33:47"} +{"current_steps": 5295, "total_steps": 5676, "loss": 0.47447216510772705, "lr": 2.4666252698086867e-07, "epoch": 1.8659030837004407, "percentage": 93.29, "elapsed_time": "7:48:20", "remaining_time": "0:33:41"} +{"current_steps": 5296, "total_steps": 5676, "loss": 0.6535651087760925, "lr": 2.453780911754955e-07, "epoch": 1.8662555066079296, "percentage": 93.31, "elapsed_time": "7:48:26", "remaining_time": "0:33:36"} +{"current_steps": 5297, "total_steps": 5676, "loss": 0.5928847193717957, "lr": 2.4409696676068517e-07, "epoch": 1.8666079295154185, "percentage": 93.32, "elapsed_time": "7:48:30", "remaining_time": "0:33:31"} +{"current_steps": 5298, "total_steps": 5676, "loss": 0.5928774476051331, "lr": 2.428191541713387e-07, "epoch": 1.8669603524229075, "percentage": 93.34, "elapsed_time": "7:48:35", "remaining_time": "0:33:26"} +{"current_steps": 5299, "total_steps": 5676, "loss": 0.5798670053482056, "lr": 2.415446538412358e-07, "epoch": 1.8673127753303964, "percentage": 93.36, "elapsed_time": "7:48:41", "remaining_time": "0:33:20"} +{"current_steps": 5300, "total_steps": 5676, "loss": 0.6222843527793884, "lr": 2.4027346620302707e-07, "epoch": 1.8676651982378853, "percentage": 93.38, "elapsed_time": "7:48:45", "remaining_time": "0:33:15"} +{"current_steps": 5301, "total_steps": 5676, "loss": 0.5501612424850464, "lr": 2.39005591688245e-07, "epoch": 1.8680176211453743, "percentage": 93.39, "elapsed_time": "7:48:55", "remaining_time": "0:33:10"} +{"current_steps": 5302, "total_steps": 5676, "loss": 0.5266422033309937, "lr": 2.377410307272887e-07, "epoch": 1.8683700440528634, "percentage": 93.41, "elapsed_time": "7:49:00", "remaining_time": "0:33:05"} +{"current_steps": 5303, "total_steps": 5676, "loss": 0.7145729064941406, "lr": 2.3647978374944037e-07, "epoch": 1.8687224669603524, "percentage": 93.43, "elapsed_time": "7:49:05", "remaining_time": "0:32:59"} +{"current_steps": 5304, "total_steps": 5676, "loss": 0.6505781412124634, "lr": 2.3522185118285411e-07, "epoch": 1.8690748898678415, "percentage": 93.45, "elapsed_time": "7:49:09", "remaining_time": "0:32:54"} +{"current_steps": 5305, "total_steps": 5676, "loss": 0.6278528571128845, "lr": 2.3396723345455728e-07, "epoch": 1.8694273127753305, "percentage": 93.46, "elapsed_time": "7:49:14", "remaining_time": "0:32:48"} +{"current_steps": 5306, "total_steps": 5676, "loss": 0.5650503039360046, "lr": 2.3271593099045475e-07, "epoch": 1.8697797356828194, "percentage": 93.48, "elapsed_time": "7:49:20", "remaining_time": "0:32:43"} +{"current_steps": 5307, "total_steps": 5676, "loss": 0.6267939209938049, "lr": 2.314679442153256e-07, "epoch": 1.8701321585903083, "percentage": 93.5, "elapsed_time": "7:49:26", "remaining_time": "0:32:38"} +{"current_steps": 5308, "total_steps": 5676, "loss": 0.45913875102996826, "lr": 2.302232735528187e-07, "epoch": 1.8704845814977973, "percentage": 93.52, "elapsed_time": "7:49:31", "remaining_time": "0:32:33"} +{"current_steps": 5309, "total_steps": 5676, "loss": 0.6122059226036072, "lr": 2.289819194254661e-07, "epoch": 1.8708370044052862, "percentage": 93.53, "elapsed_time": "7:49:36", "remaining_time": "0:32:27"} +{"current_steps": 5310, "total_steps": 5676, "loss": 0.6479405164718628, "lr": 2.2774388225466514e-07, "epoch": 1.8711894273127754, "percentage": 93.55, "elapsed_time": "7:49:41", "remaining_time": "0:32:22"} +{"current_steps": 5311, "total_steps": 5676, "loss": 0.5013849139213562, "lr": 2.26509162460693e-07, "epoch": 1.8715418502202643, "percentage": 93.57, "elapsed_time": "7:49:46", "remaining_time": "0:32:17"} +{"current_steps": 5312, "total_steps": 5676, "loss": 0.6431373357772827, "lr": 2.2527776046269767e-07, "epoch": 1.8718942731277532, "percentage": 93.59, "elapsed_time": "7:49:51", "remaining_time": "0:32:11"} +{"current_steps": 5313, "total_steps": 5676, "loss": 0.6447317004203796, "lr": 2.2404967667870147e-07, "epoch": 1.8722466960352424, "percentage": 93.6, "elapsed_time": "7:49:56", "remaining_time": "0:32:06"} +{"current_steps": 5314, "total_steps": 5676, "loss": 0.5784682631492615, "lr": 2.2282491152560203e-07, "epoch": 1.8725991189427313, "percentage": 93.62, "elapsed_time": "7:50:01", "remaining_time": "0:32:01"} +{"current_steps": 5315, "total_steps": 5676, "loss": 0.560835599899292, "lr": 2.2160346541916677e-07, "epoch": 1.8729515418502203, "percentage": 93.64, "elapsed_time": "7:50:07", "remaining_time": "0:31:55"} +{"current_steps": 5316, "total_steps": 5676, "loss": 0.5930913686752319, "lr": 2.2038533877404066e-07, "epoch": 1.8733039647577092, "percentage": 93.66, "elapsed_time": "7:50:13", "remaining_time": "0:31:50"} +{"current_steps": 5317, "total_steps": 5676, "loss": 0.7221095561981201, "lr": 2.1917053200374073e-07, "epoch": 1.8736563876651982, "percentage": 93.68, "elapsed_time": "7:50:17", "remaining_time": "0:31:45"} +{"current_steps": 5318, "total_steps": 5676, "loss": 0.6307567358016968, "lr": 2.179590455206515e-07, "epoch": 1.874008810572687, "percentage": 93.69, "elapsed_time": "7:50:22", "remaining_time": "0:31:39"} +{"current_steps": 5319, "total_steps": 5676, "loss": 0.6158597469329834, "lr": 2.167508797360396e-07, "epoch": 1.8743612334801762, "percentage": 93.71, "elapsed_time": "7:50:28", "remaining_time": "0:31:34"} +{"current_steps": 5320, "total_steps": 5676, "loss": 0.5778557062149048, "lr": 2.1554603506003802e-07, "epoch": 1.8747136563876652, "percentage": 93.73, "elapsed_time": "7:50:34", "remaining_time": "0:31:29"} +{"current_steps": 5321, "total_steps": 5676, "loss": 0.5213632583618164, "lr": 2.1434451190165294e-07, "epoch": 1.8750660792951543, "percentage": 93.75, "elapsed_time": "7:50:40", "remaining_time": "0:31:24"} +{"current_steps": 5322, "total_steps": 5676, "loss": 0.6633203029632568, "lr": 2.131463106687659e-07, "epoch": 1.8754185022026433, "percentage": 93.76, "elapsed_time": "7:50:47", "remaining_time": "0:31:18"} +{"current_steps": 5323, "total_steps": 5676, "loss": 0.6586780548095703, "lr": 2.1195143176812817e-07, "epoch": 1.8757709251101322, "percentage": 93.78, "elapsed_time": "7:50:52", "remaining_time": "0:31:13"} +{"current_steps": 5324, "total_steps": 5676, "loss": 0.4946047067642212, "lr": 2.1075987560536305e-07, "epoch": 1.8761233480176212, "percentage": 93.8, "elapsed_time": "7:50:57", "remaining_time": "0:31:08"} +{"current_steps": 5325, "total_steps": 5676, "loss": 0.5689302682876587, "lr": 2.0957164258497031e-07, "epoch": 1.87647577092511, "percentage": 93.82, "elapsed_time": "7:51:04", "remaining_time": "0:31:03"} +{"current_steps": 5326, "total_steps": 5676, "loss": 0.5761843323707581, "lr": 2.0838673311031287e-07, "epoch": 1.876828193832599, "percentage": 93.83, "elapsed_time": "7:51:09", "remaining_time": "0:30:57"} +{"current_steps": 5327, "total_steps": 5676, "loss": 0.5714447498321533, "lr": 2.0720514758363343e-07, "epoch": 1.877180616740088, "percentage": 93.85, "elapsed_time": "7:51:15", "remaining_time": "0:30:52"} +{"current_steps": 5328, "total_steps": 5676, "loss": 0.4566301107406616, "lr": 2.0602688640604441e-07, "epoch": 1.8775330396475771, "percentage": 93.87, "elapsed_time": "7:51:20", "remaining_time": "0:30:47"} +{"current_steps": 5329, "total_steps": 5676, "loss": 0.6326137781143188, "lr": 2.04851949977527e-07, "epoch": 1.877885462555066, "percentage": 93.89, "elapsed_time": "7:51:26", "remaining_time": "0:30:41"} +{"current_steps": 5330, "total_steps": 5676, "loss": 0.6342206001281738, "lr": 2.036803386969355e-07, "epoch": 1.8782378854625552, "percentage": 93.9, "elapsed_time": "7:51:31", "remaining_time": "0:30:36"} +{"current_steps": 5331, "total_steps": 5676, "loss": 0.5525872707366943, "lr": 2.0251205296199616e-07, "epoch": 1.8785903083700441, "percentage": 93.92, "elapsed_time": "7:51:36", "remaining_time": "0:30:31"} +{"current_steps": 5332, "total_steps": 5676, "loss": 0.4932950735092163, "lr": 2.0134709316930733e-07, "epoch": 1.878942731277533, "percentage": 93.94, "elapsed_time": "7:51:41", "remaining_time": "0:30:25"} +{"current_steps": 5333, "total_steps": 5676, "loss": 0.6526485681533813, "lr": 2.001854597143349e-07, "epoch": 1.879295154185022, "percentage": 93.96, "elapsed_time": "7:51:47", "remaining_time": "0:30:20"} +{"current_steps": 5334, "total_steps": 5676, "loss": 0.6256940960884094, "lr": 1.990271529914156e-07, "epoch": 1.879647577092511, "percentage": 93.97, "elapsed_time": "7:51:51", "remaining_time": "0:30:15"} +{"current_steps": 5335, "total_steps": 5676, "loss": 0.6406987905502319, "lr": 1.9787217339376053e-07, "epoch": 1.88, "percentage": 93.99, "elapsed_time": "7:51:55", "remaining_time": "0:30:09"} +{"current_steps": 5336, "total_steps": 5676, "loss": 0.6141321659088135, "lr": 1.9672052131345043e-07, "epoch": 1.8803524229074888, "percentage": 94.01, "elapsed_time": "7:52:00", "remaining_time": "0:30:04"} +{"current_steps": 5337, "total_steps": 5676, "loss": 0.558428943157196, "lr": 1.955721971414326e-07, "epoch": 1.880704845814978, "percentage": 94.03, "elapsed_time": "7:52:06", "remaining_time": "0:29:59"} +{"current_steps": 5338, "total_steps": 5676, "loss": 0.5995065569877625, "lr": 1.9442720126752968e-07, "epoch": 1.881057268722467, "percentage": 94.05, "elapsed_time": "7:52:12", "remaining_time": "0:29:53"} +{"current_steps": 5339, "total_steps": 5676, "loss": 0.5109822750091553, "lr": 1.932855340804296e-07, "epoch": 1.881409691629956, "percentage": 94.06, "elapsed_time": "7:52:19", "remaining_time": "0:29:48"} +{"current_steps": 5340, "total_steps": 5676, "loss": 0.6695220470428467, "lr": 1.921471959676957e-07, "epoch": 1.881762114537445, "percentage": 94.08, "elapsed_time": "7:52:24", "remaining_time": "0:29:43"} +{"current_steps": 5341, "total_steps": 5676, "loss": 0.6982283592224121, "lr": 1.9101218731575777e-07, "epoch": 1.882114537444934, "percentage": 94.1, "elapsed_time": "7:52:29", "remaining_time": "0:29:38"} +{"current_steps": 5342, "total_steps": 5676, "loss": 0.6475410461425781, "lr": 1.8988050850991314e-07, "epoch": 1.882466960352423, "percentage": 94.12, "elapsed_time": "7:52:34", "remaining_time": "0:29:32"} +{"current_steps": 5343, "total_steps": 5676, "loss": 0.57706218957901, "lr": 1.8875215993433448e-07, "epoch": 1.8828193832599118, "percentage": 94.13, "elapsed_time": "7:52:41", "remaining_time": "0:29:27"} +{"current_steps": 5344, "total_steps": 5676, "loss": 0.5243045091629028, "lr": 1.8762714197205988e-07, "epoch": 1.8831718061674008, "percentage": 94.15, "elapsed_time": "7:52:46", "remaining_time": "0:29:22"} +{"current_steps": 5345, "total_steps": 5676, "loss": 0.6208887100219727, "lr": 1.865054550049994e-07, "epoch": 1.88352422907489, "percentage": 94.17, "elapsed_time": "7:52:50", "remaining_time": "0:29:16"} +{"current_steps": 5346, "total_steps": 5676, "loss": 0.5572443008422852, "lr": 1.853870994139284e-07, "epoch": 1.8838766519823789, "percentage": 94.19, "elapsed_time": "7:52:58", "remaining_time": "0:29:11"} +{"current_steps": 5347, "total_steps": 5676, "loss": 0.5673031806945801, "lr": 1.8427207557849436e-07, "epoch": 1.8842290748898678, "percentage": 94.2, "elapsed_time": "7:53:03", "remaining_time": "0:29:06"} +{"current_steps": 5348, "total_steps": 5676, "loss": 0.5085422992706299, "lr": 1.8316038387721558e-07, "epoch": 1.884581497797357, "percentage": 94.22, "elapsed_time": "7:53:10", "remaining_time": "0:29:01"} +{"current_steps": 5349, "total_steps": 5676, "loss": 0.5480824708938599, "lr": 1.8205202468747463e-07, "epoch": 1.8849339207048459, "percentage": 94.24, "elapsed_time": "7:53:14", "remaining_time": "0:28:55"} +{"current_steps": 5350, "total_steps": 5676, "loss": 0.6236293911933899, "lr": 1.8094699838552387e-07, "epoch": 1.8852863436123348, "percentage": 94.26, "elapsed_time": "7:53:19", "remaining_time": "0:28:50"} +{"current_steps": 5351, "total_steps": 5676, "loss": 0.541741132736206, "lr": 1.798453053464888e-07, "epoch": 1.8856387665198238, "percentage": 94.27, "elapsed_time": "7:53:24", "remaining_time": "0:28:45"} +{"current_steps": 5352, "total_steps": 5676, "loss": 0.5309538245201111, "lr": 1.7874694594435692e-07, "epoch": 1.8859911894273127, "percentage": 94.29, "elapsed_time": "7:53:28", "remaining_time": "0:28:39"} +{"current_steps": 5353, "total_steps": 5676, "loss": 0.5886228084564209, "lr": 1.7765192055198888e-07, "epoch": 1.8863436123348016, "percentage": 94.31, "elapsed_time": "7:53:34", "remaining_time": "0:28:34"} +{"current_steps": 5354, "total_steps": 5676, "loss": 0.6216265559196472, "lr": 1.7656022954111064e-07, "epoch": 1.8866960352422908, "percentage": 94.33, "elapsed_time": "7:53:39", "remaining_time": "0:28:29"} +{"current_steps": 5355, "total_steps": 5676, "loss": 0.5393999814987183, "lr": 1.7547187328231575e-07, "epoch": 1.8870484581497797, "percentage": 94.34, "elapsed_time": "7:53:44", "remaining_time": "0:28:23"} +{"current_steps": 5356, "total_steps": 5676, "loss": 0.583373486995697, "lr": 1.74386852145072e-07, "epoch": 1.8874008810572689, "percentage": 94.36, "elapsed_time": "7:53:49", "remaining_time": "0:28:18"} +{"current_steps": 5357, "total_steps": 5676, "loss": 0.6403313875198364, "lr": 1.73305166497707e-07, "epoch": 1.8877533039647578, "percentage": 94.38, "elapsed_time": "7:53:53", "remaining_time": "0:28:13"} +{"current_steps": 5358, "total_steps": 5676, "loss": 0.5780963897705078, "lr": 1.7222681670741814e-07, "epoch": 1.8881057268722468, "percentage": 94.4, "elapsed_time": "7:53:59", "remaining_time": "0:28:07"} +{"current_steps": 5359, "total_steps": 5676, "loss": 0.6046397686004639, "lr": 1.711518031402748e-07, "epoch": 1.8884581497797357, "percentage": 94.42, "elapsed_time": "7:54:05", "remaining_time": "0:28:02"} +{"current_steps": 5360, "total_steps": 5676, "loss": 0.5582219362258911, "lr": 1.700801261612084e-07, "epoch": 1.8888105726872246, "percentage": 94.43, "elapsed_time": "7:54:13", "remaining_time": "0:27:57"} +{"current_steps": 5361, "total_steps": 5676, "loss": 0.4880410432815552, "lr": 1.6901178613402125e-07, "epoch": 1.8891629955947136, "percentage": 94.45, "elapsed_time": "7:54:18", "remaining_time": "0:27:52"} +{"current_steps": 5362, "total_steps": 5676, "loss": 0.7417550086975098, "lr": 1.6794678342138105e-07, "epoch": 1.8895154185022025, "percentage": 94.47, "elapsed_time": "7:54:23", "remaining_time": "0:27:46"} +{"current_steps": 5363, "total_steps": 5676, "loss": 0.4616948962211609, "lr": 1.668851183848219e-07, "epoch": 1.8898678414096917, "percentage": 94.49, "elapsed_time": "7:54:29", "remaining_time": "0:27:41"} +{"current_steps": 5364, "total_steps": 5676, "loss": 0.5595716834068298, "lr": 1.658267913847489e-07, "epoch": 1.8902202643171806, "percentage": 94.5, "elapsed_time": "7:54:36", "remaining_time": "0:27:36"} +{"current_steps": 5365, "total_steps": 5676, "loss": 0.72450852394104, "lr": 1.6477180278042793e-07, "epoch": 1.8905726872246698, "percentage": 94.52, "elapsed_time": "7:54:41", "remaining_time": "0:27:31"} +{"current_steps": 5366, "total_steps": 5676, "loss": 0.6261592507362366, "lr": 1.637201529299959e-07, "epoch": 1.8909251101321587, "percentage": 94.54, "elapsed_time": "7:54:46", "remaining_time": "0:27:25"} +{"current_steps": 5367, "total_steps": 5676, "loss": 0.5023064613342285, "lr": 1.6267184219045607e-07, "epoch": 1.8912775330396476, "percentage": 94.56, "elapsed_time": "7:54:52", "remaining_time": "0:27:20"} +{"current_steps": 5368, "total_steps": 5676, "loss": 0.7113457918167114, "lr": 1.6162687091767714e-07, "epoch": 1.8916299559471366, "percentage": 94.57, "elapsed_time": "7:54:56", "remaining_time": "0:27:15"} +{"current_steps": 5369, "total_steps": 5676, "loss": 0.5376787185668945, "lr": 1.6058523946639426e-07, "epoch": 1.8919823788546255, "percentage": 94.59, "elapsed_time": "7:55:02", "remaining_time": "0:27:09"} +{"current_steps": 5370, "total_steps": 5676, "loss": 0.6523979902267456, "lr": 1.5954694819020788e-07, "epoch": 1.8923348017621144, "percentage": 94.61, "elapsed_time": "7:55:07", "remaining_time": "0:27:04"} +{"current_steps": 5371, "total_steps": 5676, "loss": 0.6610705852508545, "lr": 1.5851199744158607e-07, "epoch": 1.8926872246696034, "percentage": 94.63, "elapsed_time": "7:55:13", "remaining_time": "0:26:59"} +{"current_steps": 5372, "total_steps": 5676, "loss": 0.657126247882843, "lr": 1.5748038757186445e-07, "epoch": 1.8930396475770925, "percentage": 94.64, "elapsed_time": "7:55:18", "remaining_time": "0:26:53"} +{"current_steps": 5373, "total_steps": 5676, "loss": 0.7247096300125122, "lr": 1.5645211893123846e-07, "epoch": 1.8933920704845815, "percentage": 94.66, "elapsed_time": "7:55:23", "remaining_time": "0:26:48"} +{"current_steps": 5374, "total_steps": 5676, "loss": 0.5392117500305176, "lr": 1.5542719186877553e-07, "epoch": 1.8937444933920706, "percentage": 94.68, "elapsed_time": "7:55:29", "remaining_time": "0:26:43"} +{"current_steps": 5375, "total_steps": 5676, "loss": 0.5038361549377441, "lr": 1.5440560673240735e-07, "epoch": 1.8940969162995596, "percentage": 94.7, "elapsed_time": "7:55:33", "remaining_time": "0:26:37"} +{"current_steps": 5376, "total_steps": 5676, "loss": 0.4768316447734833, "lr": 1.5338736386892982e-07, "epoch": 1.8944493392070485, "percentage": 94.71, "elapsed_time": "7:55:38", "remaining_time": "0:26:32"} +{"current_steps": 5377, "total_steps": 5676, "loss": 0.5925793051719666, "lr": 1.5237246362400316e-07, "epoch": 1.8948017621145374, "percentage": 94.73, "elapsed_time": "7:55:45", "remaining_time": "0:26:27"} +{"current_steps": 5378, "total_steps": 5676, "loss": 0.47840988636016846, "lr": 1.5136090634215616e-07, "epoch": 1.8951541850220264, "percentage": 94.75, "elapsed_time": "7:55:49", "remaining_time": "0:26:21"} +{"current_steps": 5379, "total_steps": 5676, "loss": 0.6365169882774353, "lr": 1.5035269236677974e-07, "epoch": 1.8955066079295153, "percentage": 94.77, "elapsed_time": "7:55:55", "remaining_time": "0:26:16"} +{"current_steps": 5380, "total_steps": 5676, "loss": 0.6287797689437866, "lr": 1.4934782204013344e-07, "epoch": 1.8958590308370042, "percentage": 94.79, "elapsed_time": "7:56:01", "remaining_time": "0:26:11"} +{"current_steps": 5381, "total_steps": 5676, "loss": 0.6859137415885925, "lr": 1.4834629570333548e-07, "epoch": 1.8962114537444934, "percentage": 94.8, "elapsed_time": "7:56:06", "remaining_time": "0:26:06"} +{"current_steps": 5382, "total_steps": 5676, "loss": 0.5545040369033813, "lr": 1.4734811369637725e-07, "epoch": 1.8965638766519823, "percentage": 94.82, "elapsed_time": "7:56:10", "remaining_time": "0:26:00"} +{"current_steps": 5383, "total_steps": 5676, "loss": 0.6418923139572144, "lr": 1.463532763581077e-07, "epoch": 1.8969162995594715, "percentage": 94.84, "elapsed_time": "7:56:16", "remaining_time": "0:25:55"} +{"current_steps": 5384, "total_steps": 5676, "loss": 0.7618488669395447, "lr": 1.4536178402624334e-07, "epoch": 1.8972687224669604, "percentage": 94.86, "elapsed_time": "7:56:20", "remaining_time": "0:25:50"} +{"current_steps": 5385, "total_steps": 5676, "loss": 0.6178286671638489, "lr": 1.4437363703736718e-07, "epoch": 1.8976211453744494, "percentage": 94.87, "elapsed_time": "7:56:25", "remaining_time": "0:25:44"} +{"current_steps": 5386, "total_steps": 5676, "loss": 0.6800570487976074, "lr": 1.4338883572692087e-07, "epoch": 1.8979735682819383, "percentage": 94.89, "elapsed_time": "7:56:30", "remaining_time": "0:25:39"} +{"current_steps": 5387, "total_steps": 5676, "loss": 0.6063584089279175, "lr": 1.4240738042921588e-07, "epoch": 1.8983259911894272, "percentage": 94.91, "elapsed_time": "7:56:36", "remaining_time": "0:25:34"} +{"current_steps": 5388, "total_steps": 5676, "loss": 0.5631873607635498, "lr": 1.4142927147742792e-07, "epoch": 1.8986784140969162, "percentage": 94.93, "elapsed_time": "7:56:42", "remaining_time": "0:25:28"} +{"current_steps": 5389, "total_steps": 5676, "loss": 0.5346484184265137, "lr": 1.4045450920358917e-07, "epoch": 1.8990308370044053, "percentage": 94.94, "elapsed_time": "7:56:48", "remaining_time": "0:25:23"} +{"current_steps": 5390, "total_steps": 5676, "loss": 0.5043535232543945, "lr": 1.3948309393860605e-07, "epoch": 1.8993832599118943, "percentage": 94.96, "elapsed_time": "7:56:54", "remaining_time": "0:25:18"} +{"current_steps": 5391, "total_steps": 5676, "loss": 0.6591805219650269, "lr": 1.3851502601224032e-07, "epoch": 1.8997356828193832, "percentage": 94.98, "elapsed_time": "7:57:00", "remaining_time": "0:25:13"} +{"current_steps": 5392, "total_steps": 5676, "loss": 0.6831244826316833, "lr": 1.3755030575312355e-07, "epoch": 1.9000881057268724, "percentage": 95.0, "elapsed_time": "7:57:07", "remaining_time": "0:25:07"} +{"current_steps": 5393, "total_steps": 5676, "loss": 0.6572617292404175, "lr": 1.3658893348874714e-07, "epoch": 1.9004405286343613, "percentage": 95.01, "elapsed_time": "7:57:13", "remaining_time": "0:25:02"} +{"current_steps": 5394, "total_steps": 5676, "loss": 0.5834530591964722, "lr": 1.3563090954546555e-07, "epoch": 1.9007929515418502, "percentage": 95.03, "elapsed_time": "7:57:18", "remaining_time": "0:24:57"} +{"current_steps": 5395, "total_steps": 5676, "loss": 0.5810972452163696, "lr": 1.3467623424850084e-07, "epoch": 1.9011453744493392, "percentage": 95.05, "elapsed_time": "7:57:24", "remaining_time": "0:24:51"} +{"current_steps": 5396, "total_steps": 5676, "loss": 0.6338596940040588, "lr": 1.3372490792193493e-07, "epoch": 1.9014977973568281, "percentage": 95.07, "elapsed_time": "7:57:29", "remaining_time": "0:24:46"} +{"current_steps": 5397, "total_steps": 5676, "loss": 0.5274045467376709, "lr": 1.327769308887117e-07, "epoch": 1.901850220264317, "percentage": 95.08, "elapsed_time": "7:57:34", "remaining_time": "0:24:41"} +{"current_steps": 5398, "total_steps": 5676, "loss": 0.5416278839111328, "lr": 1.3183230347064147e-07, "epoch": 1.9022026431718062, "percentage": 95.1, "elapsed_time": "7:57:38", "remaining_time": "0:24:35"} +{"current_steps": 5399, "total_steps": 5676, "loss": 0.4818935692310333, "lr": 1.3089102598839442e-07, "epoch": 1.9025550660792951, "percentage": 95.12, "elapsed_time": "7:57:43", "remaining_time": "0:24:30"} +{"current_steps": 5400, "total_steps": 5676, "loss": 0.4553770124912262, "lr": 1.299530987615072e-07, "epoch": 1.9029074889867843, "percentage": 95.14, "elapsed_time": "7:57:50", "remaining_time": "0:24:25"} +{"current_steps": 5401, "total_steps": 5676, "loss": 0.5663920640945435, "lr": 1.2901852210837507e-07, "epoch": 1.9032599118942732, "percentage": 95.16, "elapsed_time": "7:58:00", "remaining_time": "0:24:20"} +{"current_steps": 5402, "total_steps": 5676, "loss": 0.5654638409614563, "lr": 1.2808729634625872e-07, "epoch": 1.9036123348017622, "percentage": 95.17, "elapsed_time": "7:58:07", "remaining_time": "0:24:15"} +{"current_steps": 5403, "total_steps": 5676, "loss": 0.8061939477920532, "lr": 1.271594217912797e-07, "epoch": 1.903964757709251, "percentage": 95.19, "elapsed_time": "7:58:12", "remaining_time": "0:24:09"} +{"current_steps": 5404, "total_steps": 5676, "loss": 0.5832188129425049, "lr": 1.2623489875842276e-07, "epoch": 1.90431718061674, "percentage": 95.21, "elapsed_time": "7:58:18", "remaining_time": "0:24:04"} +{"current_steps": 5405, "total_steps": 5676, "loss": 0.6112633943557739, "lr": 1.2531372756153458e-07, "epoch": 1.904669603524229, "percentage": 95.23, "elapsed_time": "7:58:23", "remaining_time": "0:23:59"} +{"current_steps": 5406, "total_steps": 5676, "loss": 0.7083494663238525, "lr": 1.2439590851332394e-07, "epoch": 1.905022026431718, "percentage": 95.24, "elapsed_time": "7:58:27", "remaining_time": "0:23:53"} +{"current_steps": 5407, "total_steps": 5676, "loss": 0.5319055318832397, "lr": 1.2348144192536272e-07, "epoch": 1.905374449339207, "percentage": 95.26, "elapsed_time": "7:58:32", "remaining_time": "0:23:48"} +{"current_steps": 5408, "total_steps": 5676, "loss": 0.6199945211410522, "lr": 1.2257032810808256e-07, "epoch": 1.905726872246696, "percentage": 95.28, "elapsed_time": "7:58:37", "remaining_time": "0:23:43"} +{"current_steps": 5409, "total_steps": 5676, "loss": 0.6596004962921143, "lr": 1.2166256737077942e-07, "epoch": 1.9060792951541852, "percentage": 95.3, "elapsed_time": "7:58:43", "remaining_time": "0:23:37"} +{"current_steps": 5410, "total_steps": 5676, "loss": 0.553988516330719, "lr": 1.20758160021609e-07, "epoch": 1.906431718061674, "percentage": 95.31, "elapsed_time": "7:58:48", "remaining_time": "0:23:32"} +{"current_steps": 5411, "total_steps": 5676, "loss": 0.6295895576477051, "lr": 1.1985710636759128e-07, "epoch": 1.906784140969163, "percentage": 95.33, "elapsed_time": "7:58:53", "remaining_time": "0:23:27"} +{"current_steps": 5412, "total_steps": 5676, "loss": 0.6555598378181458, "lr": 1.1895940671460271e-07, "epoch": 1.907136563876652, "percentage": 95.35, "elapsed_time": "7:58:58", "remaining_time": "0:23:21"} +{"current_steps": 5413, "total_steps": 5676, "loss": 0.48203831911087036, "lr": 1.1806506136738616e-07, "epoch": 1.907488986784141, "percentage": 95.37, "elapsed_time": "7:59:03", "remaining_time": "0:23:16"} +{"current_steps": 5414, "total_steps": 5676, "loss": 0.6632858514785767, "lr": 1.1717407062954434e-07, "epoch": 1.9078414096916299, "percentage": 95.38, "elapsed_time": "7:59:09", "remaining_time": "0:23:11"} +{"current_steps": 5415, "total_steps": 5676, "loss": 0.6058870553970337, "lr": 1.1628643480354085e-07, "epoch": 1.9081938325991188, "percentage": 95.4, "elapsed_time": "7:59:13", "remaining_time": "0:23:05"} +{"current_steps": 5416, "total_steps": 5676, "loss": 0.5106638073921204, "lr": 1.1540215419070022e-07, "epoch": 1.908546255506608, "percentage": 95.42, "elapsed_time": "7:59:18", "remaining_time": "0:23:00"} +{"current_steps": 5417, "total_steps": 5676, "loss": 0.6641250848770142, "lr": 1.1452122909120788e-07, "epoch": 1.9088986784140969, "percentage": 95.44, "elapsed_time": "7:59:24", "remaining_time": "0:22:55"} +{"current_steps": 5418, "total_steps": 5676, "loss": 0.4823518395423889, "lr": 1.1364365980411019e-07, "epoch": 1.909251101321586, "percentage": 95.45, "elapsed_time": "7:59:30", "remaining_time": "0:22:50"} +{"current_steps": 5419, "total_steps": 5676, "loss": 0.5770869255065918, "lr": 1.127694466273166e-07, "epoch": 1.909603524229075, "percentage": 95.47, "elapsed_time": "7:59:36", "remaining_time": "0:22:44"} +{"current_steps": 5420, "total_steps": 5676, "loss": 0.5120491981506348, "lr": 1.1189858985759306e-07, "epoch": 1.909955947136564, "percentage": 95.49, "elapsed_time": "7:59:41", "remaining_time": "0:22:39"} +{"current_steps": 5421, "total_steps": 5676, "loss": 0.6742277145385742, "lr": 1.1103108979056865e-07, "epoch": 1.9103083700440529, "percentage": 95.51, "elapsed_time": "7:59:47", "remaining_time": "0:22:34"} +{"current_steps": 5422, "total_steps": 5676, "loss": 0.6053510904312134, "lr": 1.1016694672073336e-07, "epoch": 1.9106607929515418, "percentage": 95.53, "elapsed_time": "7:59:53", "remaining_time": "0:22:28"} +{"current_steps": 5423, "total_steps": 5676, "loss": 0.5598228573799133, "lr": 1.0930616094143698e-07, "epoch": 1.9110132158590307, "percentage": 95.54, "elapsed_time": "7:59:57", "remaining_time": "0:22:23"} +{"current_steps": 5424, "total_steps": 5676, "loss": 0.599521279335022, "lr": 1.0844873274488799e-07, "epoch": 1.9113656387665197, "percentage": 95.56, "elapsed_time": "8:00:04", "remaining_time": "0:22:18"} +{"current_steps": 5425, "total_steps": 5676, "loss": 0.5986596345901489, "lr": 1.075946624221591e-07, "epoch": 1.9117180616740088, "percentage": 95.58, "elapsed_time": "8:00:09", "remaining_time": "0:22:12"} +{"current_steps": 5426, "total_steps": 5676, "loss": 0.5657980442047119, "lr": 1.067439502631773e-07, "epoch": 1.9120704845814978, "percentage": 95.6, "elapsed_time": "8:00:13", "remaining_time": "0:22:07"} +{"current_steps": 5427, "total_steps": 5676, "loss": 0.5561040639877319, "lr": 1.0589659655673712e-07, "epoch": 1.912422907488987, "percentage": 95.61, "elapsed_time": "8:00:19", "remaining_time": "0:22:02"} +{"current_steps": 5428, "total_steps": 5676, "loss": 0.5088320970535278, "lr": 1.0505260159048513e-07, "epoch": 1.9127753303964758, "percentage": 95.63, "elapsed_time": "8:00:25", "remaining_time": "0:21:57"} +{"current_steps": 5429, "total_steps": 5676, "loss": 0.5679075717926025, "lr": 1.0421196565093217e-07, "epoch": 1.9131277533039648, "percentage": 95.65, "elapsed_time": "8:00:31", "remaining_time": "0:21:51"} +{"current_steps": 5430, "total_steps": 5676, "loss": 0.6701461672782898, "lr": 1.0337468902344994e-07, "epoch": 1.9134801762114537, "percentage": 95.67, "elapsed_time": "8:00:35", "remaining_time": "0:21:46"} +{"current_steps": 5431, "total_steps": 5676, "loss": 0.6172112822532654, "lr": 1.0254077199226553e-07, "epoch": 1.9138325991189427, "percentage": 95.68, "elapsed_time": "8:00:40", "remaining_time": "0:21:41"} +{"current_steps": 5432, "total_steps": 5676, "loss": 0.5926263332366943, "lr": 1.0171021484046806e-07, "epoch": 1.9141850220264316, "percentage": 95.7, "elapsed_time": "8:00:46", "remaining_time": "0:21:35"} +{"current_steps": 5433, "total_steps": 5676, "loss": 0.6142431497573853, "lr": 1.0088301785000754e-07, "epoch": 1.9145374449339208, "percentage": 95.72, "elapsed_time": "8:00:50", "remaining_time": "0:21:30"} +{"current_steps": 5434, "total_steps": 5676, "loss": 0.5367780923843384, "lr": 1.0005918130168934e-07, "epoch": 1.9148898678414097, "percentage": 95.74, "elapsed_time": "8:00:56", "remaining_time": "0:21:25"} +{"current_steps": 5435, "total_steps": 5676, "loss": 0.5241641998291016, "lr": 9.923870547518311e-08, "epoch": 1.9152422907488986, "percentage": 95.75, "elapsed_time": "8:01:01", "remaining_time": "0:21:19"} +{"current_steps": 5436, "total_steps": 5676, "loss": 0.5906308889389038, "lr": 9.842159064901157e-08, "epoch": 1.9155947136563878, "percentage": 95.77, "elapsed_time": "8:01:06", "remaining_time": "0:21:14"} +{"current_steps": 5437, "total_steps": 5676, "loss": 0.5411181449890137, "lr": 9.760783710056176e-08, "epoch": 1.9159471365638767, "percentage": 95.79, "elapsed_time": "8:01:12", "remaining_time": "0:21:09"} +{"current_steps": 5438, "total_steps": 5676, "loss": 0.6313618421554565, "lr": 9.679744510607825e-08, "epoch": 1.9162995594713657, "percentage": 95.81, "elapsed_time": "8:01:17", "remaining_time": "0:21:03"} +{"current_steps": 5439, "total_steps": 5676, "loss": 0.6330033540725708, "lr": 9.599041494066208e-08, "epoch": 1.9166519823788546, "percentage": 95.82, "elapsed_time": "8:01:23", "remaining_time": "0:20:58"} +{"current_steps": 5440, "total_steps": 5676, "loss": 0.5859507322311401, "lr": 9.518674687827634e-08, "epoch": 1.9170044052863435, "percentage": 95.84, "elapsed_time": "8:01:30", "remaining_time": "0:20:53"} +{"current_steps": 5441, "total_steps": 5676, "loss": 0.571119487285614, "lr": 9.438644119174057e-08, "epoch": 1.9173568281938325, "percentage": 95.86, "elapsed_time": "8:01:35", "remaining_time": "0:20:48"} +{"current_steps": 5442, "total_steps": 5676, "loss": 0.6114518046379089, "lr": 9.3589498152733e-08, "epoch": 1.9177092511013216, "percentage": 95.88, "elapsed_time": "8:01:40", "remaining_time": "0:20:42"} +{"current_steps": 5443, "total_steps": 5676, "loss": 0.5762027502059937, "lr": 9.279591803179277e-08, "epoch": 1.9180616740088106, "percentage": 95.89, "elapsed_time": "8:01:45", "remaining_time": "0:20:37"} +{"current_steps": 5444, "total_steps": 5676, "loss": 0.6081440448760986, "lr": 9.200570109831441e-08, "epoch": 1.9184140969162997, "percentage": 95.91, "elapsed_time": "8:01:50", "remaining_time": "0:20:32"} +{"current_steps": 5445, "total_steps": 5676, "loss": 0.5682440996170044, "lr": 9.121884762055222e-08, "epoch": 1.9187665198237887, "percentage": 95.93, "elapsed_time": "8:01:55", "remaining_time": "0:20:26"} +{"current_steps": 5446, "total_steps": 5676, "loss": 0.5290100574493408, "lr": 9.043535786561919e-08, "epoch": 1.9191189427312776, "percentage": 95.95, "elapsed_time": "8:01:59", "remaining_time": "0:20:21"} +{"current_steps": 5447, "total_steps": 5676, "loss": 0.5743255019187927, "lr": 8.965523209948367e-08, "epoch": 1.9194713656387665, "percentage": 95.97, "elapsed_time": "8:02:03", "remaining_time": "0:20:16"} +{"current_steps": 5448, "total_steps": 5676, "loss": 0.5955618023872375, "lr": 8.887847058697718e-08, "epoch": 1.9198237885462555, "percentage": 95.98, "elapsed_time": "8:02:10", "remaining_time": "0:20:10"} +{"current_steps": 5449, "total_steps": 5676, "loss": 0.4732915759086609, "lr": 8.810507359178322e-08, "epoch": 1.9201762114537444, "percentage": 96.0, "elapsed_time": "8:02:15", "remaining_time": "0:20:05"} +{"current_steps": 5450, "total_steps": 5676, "loss": 0.6712108850479126, "lr": 8.733504137644621e-08, "epoch": 1.9205286343612333, "percentage": 96.02, "elapsed_time": "8:02:20", "remaining_time": "0:20:00"} +{"current_steps": 5451, "total_steps": 5676, "loss": 0.5169811248779297, "lr": 8.656837420237152e-08, "epoch": 1.9208810572687225, "percentage": 96.04, "elapsed_time": "8:02:25", "remaining_time": "0:19:54"} +{"current_steps": 5452, "total_steps": 5676, "loss": 0.6117082238197327, "lr": 8.580507232981428e-08, "epoch": 1.9212334801762114, "percentage": 96.05, "elapsed_time": "8:02:30", "remaining_time": "0:19:49"} +{"current_steps": 5453, "total_steps": 5676, "loss": 0.7020283937454224, "lr": 8.504513601789388e-08, "epoch": 1.9215859030837006, "percentage": 96.07, "elapsed_time": "8:02:35", "remaining_time": "0:19:44"} +{"current_steps": 5454, "total_steps": 5676, "loss": 0.5489979386329651, "lr": 8.42885655245862e-08, "epoch": 1.9219383259911895, "percentage": 96.09, "elapsed_time": "8:02:40", "remaining_time": "0:19:38"} +{"current_steps": 5455, "total_steps": 5676, "loss": 0.5361644625663757, "lr": 8.353536110672133e-08, "epoch": 1.9222907488986785, "percentage": 96.11, "elapsed_time": "8:02:45", "remaining_time": "0:19:33"} +{"current_steps": 5456, "total_steps": 5676, "loss": 0.6470010280609131, "lr": 8.278552301998921e-08, "epoch": 1.9226431718061674, "percentage": 96.12, "elapsed_time": "8:02:50", "remaining_time": "0:19:28"} +{"current_steps": 5457, "total_steps": 5676, "loss": 0.4642202854156494, "lr": 8.203905151893731e-08, "epoch": 1.9229955947136563, "percentage": 96.14, "elapsed_time": "8:02:57", "remaining_time": "0:19:22"} +{"current_steps": 5458, "total_steps": 5676, "loss": 0.6817516088485718, "lr": 8.129594685696852e-08, "epoch": 1.9233480176211453, "percentage": 96.16, "elapsed_time": "8:03:04", "remaining_time": "0:19:17"} +{"current_steps": 5459, "total_steps": 5676, "loss": 0.5748617649078369, "lr": 8.055620928634433e-08, "epoch": 1.9237004405286342, "percentage": 96.18, "elapsed_time": "8:03:09", "remaining_time": "0:19:12"} +{"current_steps": 5460, "total_steps": 5676, "loss": 0.6730939149856567, "lr": 7.981983905818281e-08, "epoch": 1.9240528634361234, "percentage": 96.19, "elapsed_time": "8:03:15", "remaining_time": "0:19:07"} +{"current_steps": 5461, "total_steps": 5676, "loss": 0.46469685435295105, "lr": 7.90868364224584e-08, "epoch": 1.9244052863436123, "percentage": 96.21, "elapsed_time": "8:03:20", "remaining_time": "0:19:01"} +{"current_steps": 5462, "total_steps": 5676, "loss": 0.5633926391601562, "lr": 7.835720162800209e-08, "epoch": 1.9247577092511015, "percentage": 96.23, "elapsed_time": "8:03:25", "remaining_time": "0:18:56"} +{"current_steps": 5463, "total_steps": 5676, "loss": 0.5813394784927368, "lr": 7.76309349225035e-08, "epoch": 1.9251101321585904, "percentage": 96.25, "elapsed_time": "8:03:30", "remaining_time": "0:18:51"} +{"current_steps": 5464, "total_steps": 5676, "loss": 0.39959418773651123, "lr": 7.690803655250656e-08, "epoch": 1.9254625550660793, "percentage": 96.26, "elapsed_time": "8:03:37", "remaining_time": "0:18:45"} +{"current_steps": 5465, "total_steps": 5676, "loss": 0.6136372089385986, "lr": 7.618850676341383e-08, "epoch": 1.9258149779735683, "percentage": 96.28, "elapsed_time": "8:03:44", "remaining_time": "0:18:40"} +{"current_steps": 5466, "total_steps": 5676, "loss": 0.6664354801177979, "lr": 7.547234579948104e-08, "epoch": 1.9261674008810572, "percentage": 96.3, "elapsed_time": "8:03:49", "remaining_time": "0:18:35"} +{"current_steps": 5467, "total_steps": 5676, "loss": 0.6009566783905029, "lr": 7.475955390382483e-08, "epoch": 1.9265198237885461, "percentage": 96.32, "elapsed_time": "8:03:54", "remaining_time": "0:18:29"} +{"current_steps": 5468, "total_steps": 5676, "loss": 0.7307299375534058, "lr": 7.405013131841499e-08, "epoch": 1.9268722466960353, "percentage": 96.34, "elapsed_time": "8:04:00", "remaining_time": "0:18:24"} +{"current_steps": 5469, "total_steps": 5676, "loss": 0.5459531545639038, "lr": 7.334407828407885e-08, "epoch": 1.9272246696035242, "percentage": 96.35, "elapsed_time": "8:04:06", "remaining_time": "0:18:19"} +{"current_steps": 5470, "total_steps": 5676, "loss": 0.6230820417404175, "lr": 7.264139504049916e-08, "epoch": 1.9275770925110132, "percentage": 96.37, "elapsed_time": "8:04:11", "remaining_time": "0:18:14"} +{"current_steps": 5471, "total_steps": 5676, "loss": 0.5282379984855652, "lr": 7.194208182621509e-08, "epoch": 1.9279295154185023, "percentage": 96.39, "elapsed_time": "8:04:17", "remaining_time": "0:18:08"} +{"current_steps": 5472, "total_steps": 5676, "loss": 0.626023530960083, "lr": 7.12461388786212e-08, "epoch": 1.9282819383259913, "percentage": 96.41, "elapsed_time": "8:04:23", "remaining_time": "0:18:03"} +{"current_steps": 5473, "total_steps": 5676, "loss": 0.6897492408752441, "lr": 7.055356643396849e-08, "epoch": 1.9286343612334802, "percentage": 96.42, "elapsed_time": "8:04:27", "remaining_time": "0:17:58"} +{"current_steps": 5474, "total_steps": 5676, "loss": 0.583849310874939, "lr": 6.986436472736447e-08, "epoch": 1.9289867841409691, "percentage": 96.44, "elapsed_time": "8:04:33", "remaining_time": "0:17:52"} +{"current_steps": 5475, "total_steps": 5676, "loss": 0.6056735515594482, "lr": 6.917853399277197e-08, "epoch": 1.929339207048458, "percentage": 96.46, "elapsed_time": "8:04:39", "remaining_time": "0:17:47"} +{"current_steps": 5476, "total_steps": 5676, "loss": 0.52838134765625, "lr": 6.849607446300699e-08, "epoch": 1.929691629955947, "percentage": 96.48, "elapsed_time": "8:04:44", "remaining_time": "0:17:42"} +{"current_steps": 5477, "total_steps": 5676, "loss": 0.6466653943061829, "lr": 6.781698636974532e-08, "epoch": 1.9300440528634362, "percentage": 96.49, "elapsed_time": "8:04:49", "remaining_time": "0:17:36"} +{"current_steps": 5478, "total_steps": 5676, "loss": 0.6570286750793457, "lr": 6.714126994351589e-08, "epoch": 1.930396475770925, "percentage": 96.51, "elapsed_time": "8:04:53", "remaining_time": "0:17:31"} +{"current_steps": 5479, "total_steps": 5676, "loss": 0.7303042411804199, "lr": 6.646892541370409e-08, "epoch": 1.9307488986784143, "percentage": 96.53, "elapsed_time": "8:04:58", "remaining_time": "0:17:26"} +{"current_steps": 5480, "total_steps": 5676, "loss": 0.5556488037109375, "lr": 6.579995300854846e-08, "epoch": 1.9311013215859032, "percentage": 96.55, "elapsed_time": "8:05:05", "remaining_time": "0:17:20"} +{"current_steps": 5481, "total_steps": 5676, "loss": 0.6673456430435181, "lr": 6.513435295514404e-08, "epoch": 1.9314537444933921, "percentage": 96.56, "elapsed_time": "8:05:09", "remaining_time": "0:17:15"} +{"current_steps": 5482, "total_steps": 5676, "loss": 0.5605199337005615, "lr": 6.447212547944448e-08, "epoch": 1.931806167400881, "percentage": 96.58, "elapsed_time": "8:05:15", "remaining_time": "0:17:10"} +{"current_steps": 5483, "total_steps": 5676, "loss": 0.5455278158187866, "lr": 6.381327080625111e-08, "epoch": 1.93215859030837, "percentage": 96.6, "elapsed_time": "8:05:20", "remaining_time": "0:17:05"} +{"current_steps": 5484, "total_steps": 5676, "loss": 0.5371166467666626, "lr": 6.315778915922722e-08, "epoch": 1.932511013215859, "percentage": 96.62, "elapsed_time": "8:05:26", "remaining_time": "0:16:59"} +{"current_steps": 5485, "total_steps": 5676, "loss": 0.5873486399650574, "lr": 6.250568076088814e-08, "epoch": 1.9328634361233479, "percentage": 96.63, "elapsed_time": "8:05:32", "remaining_time": "0:16:54"} +{"current_steps": 5486, "total_steps": 5676, "loss": 0.4187420606613159, "lr": 6.18569458326046e-08, "epoch": 1.933215859030837, "percentage": 96.65, "elapsed_time": "8:05:37", "remaining_time": "0:16:49"} +{"current_steps": 5487, "total_steps": 5676, "loss": 0.6006373167037964, "lr": 6.121158459460042e-08, "epoch": 1.933568281938326, "percentage": 96.67, "elapsed_time": "8:05:41", "remaining_time": "0:16:43"} +{"current_steps": 5488, "total_steps": 5676, "loss": 0.6022043228149414, "lr": 6.056959726595702e-08, "epoch": 1.9339207048458151, "percentage": 96.69, "elapsed_time": "8:05:47", "remaining_time": "0:16:38"} +{"current_steps": 5489, "total_steps": 5676, "loss": 0.6324778199195862, "lr": 5.993098406460895e-08, "epoch": 1.934273127753304, "percentage": 96.71, "elapsed_time": "8:05:52", "remaining_time": "0:16:33"} +{"current_steps": 5490, "total_steps": 5676, "loss": 0.545529305934906, "lr": 5.929574520734505e-08, "epoch": 1.934625550660793, "percentage": 96.72, "elapsed_time": "8:05:57", "remaining_time": "0:16:27"} +{"current_steps": 5491, "total_steps": 5676, "loss": 0.623627781867981, "lr": 5.8663880909809454e-08, "epoch": 1.934977973568282, "percentage": 96.74, "elapsed_time": "8:06:02", "remaining_time": "0:16:22"} +{"current_steps": 5492, "total_steps": 5676, "loss": 0.529983639717102, "lr": 5.80353913865006e-08, "epoch": 1.9353303964757709, "percentage": 96.76, "elapsed_time": "8:06:08", "remaining_time": "0:16:17"} +{"current_steps": 5493, "total_steps": 5676, "loss": 0.638504147529602, "lr": 5.7410276850770055e-08, "epoch": 1.9356828193832598, "percentage": 96.78, "elapsed_time": "8:06:13", "remaining_time": "0:16:11"} +{"current_steps": 5494, "total_steps": 5676, "loss": 0.6822696924209595, "lr": 5.678853751482694e-08, "epoch": 1.9360352422907487, "percentage": 96.79, "elapsed_time": "8:06:18", "remaining_time": "0:16:06"} +{"current_steps": 5495, "total_steps": 5676, "loss": 0.5454750061035156, "lr": 5.6170173589730204e-08, "epoch": 1.936387665198238, "percentage": 96.81, "elapsed_time": "8:06:24", "remaining_time": "0:16:01"} +{"current_steps": 5496, "total_steps": 5676, "loss": 0.5301260948181152, "lr": 5.555518528539638e-08, "epoch": 1.9367400881057268, "percentage": 96.83, "elapsed_time": "8:06:28", "remaining_time": "0:15:55"} +{"current_steps": 5497, "total_steps": 5676, "loss": 0.697251558303833, "lr": 5.4943572810594035e-08, "epoch": 1.937092511013216, "percentage": 96.85, "elapsed_time": "8:06:33", "remaining_time": "0:15:50"} +{"current_steps": 5498, "total_steps": 5676, "loss": 0.5171586871147156, "lr": 5.433533637294819e-08, "epoch": 1.937444933920705, "percentage": 96.86, "elapsed_time": "8:06:38", "remaining_time": "0:15:45"} +{"current_steps": 5499, "total_steps": 5676, "loss": 0.6006083488464355, "lr": 5.373047617893479e-08, "epoch": 1.9377973568281939, "percentage": 96.88, "elapsed_time": "8:06:42", "remaining_time": "0:15:39"} +{"current_steps": 5500, "total_steps": 5676, "loss": 0.6083849668502808, "lr": 5.312899243388403e-08, "epoch": 1.9381497797356828, "percentage": 96.9, "elapsed_time": "8:06:48", "remaining_time": "0:15:34"} +{"current_steps": 5501, "total_steps": 5676, "loss": 0.6572569608688354, "lr": 5.2530885341982586e-08, "epoch": 1.9385022026431717, "percentage": 96.92, "elapsed_time": "8:06:56", "remaining_time": "0:15:29"} +{"current_steps": 5502, "total_steps": 5676, "loss": 0.497112512588501, "lr": 5.1936155106269146e-08, "epoch": 1.9388546255506607, "percentage": 96.93, "elapsed_time": "8:07:02", "remaining_time": "0:15:24"} +{"current_steps": 5503, "total_steps": 5676, "loss": 0.5804885625839233, "lr": 5.1344801928636664e-08, "epoch": 1.9392070484581496, "percentage": 96.95, "elapsed_time": "8:07:08", "remaining_time": "0:15:18"} +{"current_steps": 5504, "total_steps": 5676, "loss": 0.6225712299346924, "lr": 5.075682600982901e-08, "epoch": 1.9395594713656388, "percentage": 96.97, "elapsed_time": "8:07:14", "remaining_time": "0:15:13"} +{"current_steps": 5505, "total_steps": 5676, "loss": 0.6100028157234192, "lr": 5.017222754944651e-08, "epoch": 1.9399118942731277, "percentage": 96.99, "elapsed_time": "8:07:19", "remaining_time": "0:15:08"} +{"current_steps": 5506, "total_steps": 5676, "loss": 0.549712061882019, "lr": 4.959100674594486e-08, "epoch": 1.9402643171806169, "percentage": 97.0, "elapsed_time": "8:07:25", "remaining_time": "0:15:02"} +{"current_steps": 5507, "total_steps": 5676, "loss": 0.5327162146568298, "lr": 4.901316379662624e-08, "epoch": 1.9406167400881058, "percentage": 97.02, "elapsed_time": "8:07:32", "remaining_time": "0:14:57"} +{"current_steps": 5508, "total_steps": 5676, "loss": 0.7408417463302612, "lr": 4.8438698897652626e-08, "epoch": 1.9409691629955947, "percentage": 97.04, "elapsed_time": "8:07:37", "remaining_time": "0:14:52"} +{"current_steps": 5509, "total_steps": 5676, "loss": 0.6126288175582886, "lr": 4.7867612244036906e-08, "epoch": 1.9413215859030837, "percentage": 97.06, "elapsed_time": "8:07:42", "remaining_time": "0:14:47"} +{"current_steps": 5510, "total_steps": 5676, "loss": 0.542537271976471, "lr": 4.729990402964402e-08, "epoch": 1.9416740088105726, "percentage": 97.08, "elapsed_time": "8:07:47", "remaining_time": "0:14:41"} +{"current_steps": 5511, "total_steps": 5676, "loss": 0.5429843664169312, "lr": 4.6735574447195345e-08, "epoch": 1.9420264317180616, "percentage": 97.09, "elapsed_time": "8:07:51", "remaining_time": "0:14:36"} +{"current_steps": 5512, "total_steps": 5676, "loss": 0.6103960275650024, "lr": 4.617462368826098e-08, "epoch": 1.9423788546255507, "percentage": 97.11, "elapsed_time": "8:07:56", "remaining_time": "0:14:31"} +{"current_steps": 5513, "total_steps": 5676, "loss": 0.43702462315559387, "lr": 4.561705194326749e-08, "epoch": 1.9427312775330396, "percentage": 97.13, "elapsed_time": "8:08:02", "remaining_time": "0:14:25"} +{"current_steps": 5514, "total_steps": 5676, "loss": 0.5313314199447632, "lr": 4.506285940149457e-08, "epoch": 1.9430837004405286, "percentage": 97.15, "elapsed_time": "8:08:07", "remaining_time": "0:14:20"} +{"current_steps": 5515, "total_steps": 5676, "loss": 0.568792462348938, "lr": 4.451204625107064e-08, "epoch": 1.9434361233480177, "percentage": 97.16, "elapsed_time": "8:08:12", "remaining_time": "0:14:15"} +{"current_steps": 5516, "total_steps": 5676, "loss": 0.6055475473403931, "lr": 4.3964612678979446e-08, "epoch": 1.9437885462555067, "percentage": 97.18, "elapsed_time": "8:08:18", "remaining_time": "0:14:09"} +{"current_steps": 5517, "total_steps": 5676, "loss": 0.6203786730766296, "lr": 4.3420558871060116e-08, "epoch": 1.9441409691629956, "percentage": 97.2, "elapsed_time": "8:08:24", "remaining_time": "0:14:04"} +{"current_steps": 5518, "total_steps": 5676, "loss": 0.5914345979690552, "lr": 4.287988501200047e-08, "epoch": 1.9444933920704845, "percentage": 97.22, "elapsed_time": "8:08:28", "remaining_time": "0:13:59"} +{"current_steps": 5519, "total_steps": 5676, "loss": 0.5650739669799805, "lr": 4.2342591285343684e-08, "epoch": 1.9448458149779735, "percentage": 97.23, "elapsed_time": "8:08:34", "remaining_time": "0:13:53"} +{"current_steps": 5520, "total_steps": 5676, "loss": 0.5589660406112671, "lr": 4.180867787348164e-08, "epoch": 1.9451982378854624, "percentage": 97.25, "elapsed_time": "8:08:39", "remaining_time": "0:13:48"} +{"current_steps": 5521, "total_steps": 5676, "loss": 0.5683336853981018, "lr": 4.12781449576638e-08, "epoch": 1.9455506607929516, "percentage": 97.27, "elapsed_time": "8:08:44", "remaining_time": "0:13:43"} +{"current_steps": 5522, "total_steps": 5676, "loss": 0.5388365983963013, "lr": 4.075099271798943e-08, "epoch": 1.9459030837004405, "percentage": 97.29, "elapsed_time": "8:08:49", "remaining_time": "0:13:37"} +{"current_steps": 5523, "total_steps": 5676, "loss": 0.575006365776062, "lr": 4.0227221333408726e-08, "epoch": 1.9462555066079297, "percentage": 97.3, "elapsed_time": "8:08:54", "remaining_time": "0:13:32"} +{"current_steps": 5524, "total_steps": 5676, "loss": 0.49638503789901733, "lr": 3.970683098172723e-08, "epoch": 1.9466079295154186, "percentage": 97.32, "elapsed_time": "8:09:00", "remaining_time": "0:13:27"} +{"current_steps": 5525, "total_steps": 5676, "loss": 0.6484041213989258, "lr": 3.9189821839600294e-08, "epoch": 1.9469603524229075, "percentage": 97.34, "elapsed_time": "8:09:05", "remaining_time": "0:13:22"} +{"current_steps": 5526, "total_steps": 5676, "loss": 0.5522493124008179, "lr": 3.8676194082537535e-08, "epoch": 1.9473127753303965, "percentage": 97.36, "elapsed_time": "8:09:09", "remaining_time": "0:13:16"} +{"current_steps": 5527, "total_steps": 5676, "loss": 0.5875294208526611, "lr": 3.8165947884898356e-08, "epoch": 1.9476651982378854, "percentage": 97.37, "elapsed_time": "8:09:14", "remaining_time": "0:13:11"} +{"current_steps": 5528, "total_steps": 5676, "loss": 0.5725122690200806, "lr": 3.765908341989644e-08, "epoch": 1.9480176211453744, "percentage": 97.39, "elapsed_time": "8:09:19", "remaining_time": "0:13:06"} +{"current_steps": 5529, "total_steps": 5676, "loss": 0.5198935866355896, "lr": 3.7155600859595243e-08, "epoch": 1.9483700440528633, "percentage": 97.41, "elapsed_time": "8:09:27", "remaining_time": "0:13:00"} +{"current_steps": 5530, "total_steps": 5676, "loss": 0.6396631598472595, "lr": 3.665550037491361e-08, "epoch": 1.9487224669603525, "percentage": 97.43, "elapsed_time": "8:09:32", "remaining_time": "0:12:55"} +{"current_steps": 5531, "total_steps": 5676, "loss": 0.666089653968811, "lr": 3.6158782135617965e-08, "epoch": 1.9490748898678414, "percentage": 97.45, "elapsed_time": "8:09:36", "remaining_time": "0:12:50"} +{"current_steps": 5532, "total_steps": 5676, "loss": 0.6818836331367493, "lr": 3.5665446310330087e-08, "epoch": 1.9494273127753305, "percentage": 97.46, "elapsed_time": "8:09:42", "remaining_time": "0:12:44"} +{"current_steps": 5533, "total_steps": 5676, "loss": 0.533860981464386, "lr": 3.517549306652157e-08, "epoch": 1.9497797356828195, "percentage": 97.48, "elapsed_time": "8:09:47", "remaining_time": "0:12:39"} +{"current_steps": 5534, "total_steps": 5676, "loss": 0.6174973249435425, "lr": 3.468892257051493e-08, "epoch": 1.9501321585903084, "percentage": 97.5, "elapsed_time": "8:09:54", "remaining_time": "0:12:34"} +{"current_steps": 5535, "total_steps": 5676, "loss": 0.6010403037071228, "lr": 3.4205734987488027e-08, "epoch": 1.9504845814977974, "percentage": 97.52, "elapsed_time": "8:09:58", "remaining_time": "0:12:28"} +{"current_steps": 5536, "total_steps": 5676, "loss": 0.6475502252578735, "lr": 3.372593048146744e-08, "epoch": 1.9508370044052863, "percentage": 97.53, "elapsed_time": "8:10:04", "remaining_time": "0:12:23"} +{"current_steps": 5537, "total_steps": 5676, "loss": 0.5625165700912476, "lr": 3.3249509215330653e-08, "epoch": 1.9511894273127752, "percentage": 97.55, "elapsed_time": "8:10:10", "remaining_time": "0:12:18"} +{"current_steps": 5538, "total_steps": 5676, "loss": 0.6504719257354736, "lr": 3.277647135080941e-08, "epoch": 1.9515418502202642, "percentage": 97.57, "elapsed_time": "8:10:14", "remaining_time": "0:12:12"} +{"current_steps": 5539, "total_steps": 5676, "loss": 0.6217454671859741, "lr": 3.230681704848415e-08, "epoch": 1.9518942731277533, "percentage": 97.59, "elapsed_time": "8:10:19", "remaining_time": "0:12:07"} +{"current_steps": 5540, "total_steps": 5676, "loss": 0.5804678201675415, "lr": 3.1840546467788445e-08, "epoch": 1.9522466960352423, "percentage": 97.6, "elapsed_time": "8:10:24", "remaining_time": "0:12:02"} +{"current_steps": 5541, "total_steps": 5676, "loss": 0.6133759617805481, "lr": 3.1377659767006795e-08, "epoch": 1.9525991189427314, "percentage": 97.62, "elapsed_time": "8:10:30", "remaining_time": "0:11:57"} +{"current_steps": 5542, "total_steps": 5676, "loss": 0.508539080619812, "lr": 3.0918157103273506e-08, "epoch": 1.9529515418502204, "percentage": 97.64, "elapsed_time": "8:10:36", "remaining_time": "0:11:51"} +{"current_steps": 5543, "total_steps": 5676, "loss": 0.5682996511459351, "lr": 3.0462038632577126e-08, "epoch": 1.9533039647577093, "percentage": 97.66, "elapsed_time": "8:10:41", "remaining_time": "0:11:46"} +{"current_steps": 5544, "total_steps": 5676, "loss": 0.7072808742523193, "lr": 3.000930450975603e-08, "epoch": 1.9536563876651982, "percentage": 97.67, "elapsed_time": "8:10:48", "remaining_time": "0:11:41"} +{"current_steps": 5545, "total_steps": 5676, "loss": 0.5948976278305054, "lr": 2.9559954888497278e-08, "epoch": 1.9540088105726872, "percentage": 97.69, "elapsed_time": "8:10:52", "remaining_time": "0:11:35"} +{"current_steps": 5546, "total_steps": 5676, "loss": 0.5111032128334045, "lr": 2.911398992134218e-08, "epoch": 1.954361233480176, "percentage": 97.71, "elapsed_time": "8:10:58", "remaining_time": "0:11:30"} +{"current_steps": 5547, "total_steps": 5676, "loss": 0.553802490234375, "lr": 2.8671409759681858e-08, "epoch": 1.954713656387665, "percentage": 97.73, "elapsed_time": "8:11:03", "remaining_time": "0:11:25"} +{"current_steps": 5548, "total_steps": 5676, "loss": 0.5091711282730103, "lr": 2.8232214553759462e-08, "epoch": 1.9550660792951542, "percentage": 97.74, "elapsed_time": "8:11:10", "remaining_time": "0:11:19"} +{"current_steps": 5549, "total_steps": 5676, "loss": 0.47025251388549805, "lr": 2.7796404452666847e-08, "epoch": 1.9554185022026431, "percentage": 97.76, "elapsed_time": "8:11:16", "remaining_time": "0:11:14"} +{"current_steps": 5550, "total_steps": 5676, "loss": 0.6174348592758179, "lr": 2.7363979604349e-08, "epoch": 1.9557709251101323, "percentage": 97.78, "elapsed_time": "8:11:20", "remaining_time": "0:11:09"} +{"current_steps": 5551, "total_steps": 5676, "loss": 0.5516685247421265, "lr": 2.69349401555985e-08, "epoch": 1.9561233480176212, "percentage": 97.8, "elapsed_time": "8:11:26", "remaining_time": "0:11:03"} +{"current_steps": 5552, "total_steps": 5676, "loss": 0.6272131204605103, "lr": 2.6509286252063282e-08, "epoch": 1.9564757709251102, "percentage": 97.82, "elapsed_time": "8:11:30", "remaining_time": "0:10:58"} +{"current_steps": 5553, "total_steps": 5676, "loss": 0.5913189649581909, "lr": 2.6087018038239987e-08, "epoch": 1.956828193832599, "percentage": 97.83, "elapsed_time": "8:11:36", "remaining_time": "0:10:53"} +{"current_steps": 5554, "total_steps": 5676, "loss": 0.6802668571472168, "lr": 2.5668135657472835e-08, "epoch": 1.957180616740088, "percentage": 97.85, "elapsed_time": "8:11:41", "remaining_time": "0:10:48"} +{"current_steps": 5555, "total_steps": 5676, "loss": 0.5829865336418152, "lr": 2.525263925196142e-08, "epoch": 1.957533039647577, "percentage": 97.87, "elapsed_time": "8:11:48", "remaining_time": "0:10:42"} +{"current_steps": 5556, "total_steps": 5676, "loss": 0.6400870680809021, "lr": 2.4840528962752907e-08, "epoch": 1.9578854625550661, "percentage": 97.89, "elapsed_time": "8:11:53", "remaining_time": "0:10:37"} +{"current_steps": 5557, "total_steps": 5676, "loss": 0.48432302474975586, "lr": 2.4431804929746506e-08, "epoch": 1.958237885462555, "percentage": 97.9, "elapsed_time": "8:11:58", "remaining_time": "0:10:32"} +{"current_steps": 5558, "total_steps": 5676, "loss": 0.5494402647018433, "lr": 2.4026467291691223e-08, "epoch": 1.958590308370044, "percentage": 97.92, "elapsed_time": "8:12:03", "remaining_time": "0:10:26"} +{"current_steps": 5559, "total_steps": 5676, "loss": 0.5393223762512207, "lr": 2.3624516186186996e-08, "epoch": 1.9589427312775332, "percentage": 97.94, "elapsed_time": "8:12:08", "remaining_time": "0:10:21"} +{"current_steps": 5560, "total_steps": 5676, "loss": 0.5500867962837219, "lr": 2.322595174968245e-08, "epoch": 1.959295154185022, "percentage": 97.96, "elapsed_time": "8:12:13", "remaining_time": "0:10:16"} +{"current_steps": 5561, "total_steps": 5676, "loss": 0.5618818998336792, "lr": 2.283077411747825e-08, "epoch": 1.959647577092511, "percentage": 97.97, "elapsed_time": "8:12:18", "remaining_time": "0:10:10"} +{"current_steps": 5562, "total_steps": 5676, "loss": 0.5681769251823425, "lr": 2.243898342372597e-08, "epoch": 1.96, "percentage": 97.99, "elapsed_time": "8:12:23", "remaining_time": "0:10:05"} +{"current_steps": 5563, "total_steps": 5676, "loss": 0.8009706139564514, "lr": 2.2050579801424777e-08, "epoch": 1.960352422907489, "percentage": 98.01, "elapsed_time": "8:12:28", "remaining_time": "0:10:00"} +{"current_steps": 5564, "total_steps": 5676, "loss": 0.5609455704689026, "lr": 2.1665563382426978e-08, "epoch": 1.9607048458149778, "percentage": 98.03, "elapsed_time": "8:12:33", "remaining_time": "0:09:54"} +{"current_steps": 5565, "total_steps": 5676, "loss": 0.5615163445472717, "lr": 2.1283934297432472e-08, "epoch": 1.961057268722467, "percentage": 98.04, "elapsed_time": "8:12:38", "remaining_time": "0:09:49"} +{"current_steps": 5566, "total_steps": 5676, "loss": 0.4442581832408905, "lr": 2.0905692675993182e-08, "epoch": 1.961409691629956, "percentage": 98.06, "elapsed_time": "8:12:44", "remaining_time": "0:09:44"} +{"current_steps": 5567, "total_steps": 5676, "loss": 0.6557266116142273, "lr": 2.0530838646510842e-08, "epoch": 1.961762114537445, "percentage": 98.08, "elapsed_time": "8:12:49", "remaining_time": "0:09:38"} +{"current_steps": 5568, "total_steps": 5676, "loss": 0.5911799669265747, "lr": 2.0159372336235884e-08, "epoch": 1.962114537444934, "percentage": 98.1, "elapsed_time": "8:12:56", "remaining_time": "0:09:33"} +{"current_steps": 5569, "total_steps": 5676, "loss": 0.5480202436447144, "lr": 1.9791293871269656e-08, "epoch": 1.962466960352423, "percentage": 98.11, "elapsed_time": "8:13:01", "remaining_time": "0:09:28"} +{"current_steps": 5570, "total_steps": 5676, "loss": 0.6489467620849609, "lr": 1.9426603376563325e-08, "epoch": 1.962819383259912, "percentage": 98.13, "elapsed_time": "8:13:05", "remaining_time": "0:09:23"} +{"current_steps": 5571, "total_steps": 5676, "loss": 0.4699944853782654, "lr": 1.9065300975917856e-08, "epoch": 1.9631718061674008, "percentage": 98.15, "elapsed_time": "8:13:10", "remaining_time": "0:09:17"} +{"current_steps": 5572, "total_steps": 5676, "loss": 0.6684885025024414, "lr": 1.8707386791985137e-08, "epoch": 1.9635242290748898, "percentage": 98.17, "elapsed_time": "8:13:16", "remaining_time": "0:09:12"} +{"current_steps": 5573, "total_steps": 5676, "loss": 0.5847122073173523, "lr": 1.835286094626576e-08, "epoch": 1.9638766519823787, "percentage": 98.19, "elapsed_time": "8:13:21", "remaining_time": "0:09:07"} +{"current_steps": 5574, "total_steps": 5676, "loss": 0.5427859425544739, "lr": 1.8001723559109007e-08, "epoch": 1.9642290748898679, "percentage": 98.2, "elapsed_time": "8:13:26", "remaining_time": "0:09:01"} +{"current_steps": 5575, "total_steps": 5676, "loss": 0.6545590758323669, "lr": 1.7653974749715087e-08, "epoch": 1.9645814977973568, "percentage": 98.22, "elapsed_time": "8:13:30", "remaining_time": "0:08:56"} +{"current_steps": 5576, "total_steps": 5676, "loss": 0.6369475722312927, "lr": 1.730961463613512e-08, "epoch": 1.964933920704846, "percentage": 98.24, "elapsed_time": "8:13:35", "remaining_time": "0:08:51"} +{"current_steps": 5577, "total_steps": 5676, "loss": 0.5165325403213501, "lr": 1.696864333526893e-08, "epoch": 1.965286343612335, "percentage": 98.26, "elapsed_time": "8:13:39", "remaining_time": "0:08:45"} +{"current_steps": 5578, "total_steps": 5676, "loss": 0.5651812553405762, "lr": 1.6631060962863933e-08, "epoch": 1.9656387665198238, "percentage": 98.27, "elapsed_time": "8:13:44", "remaining_time": "0:08:40"} +{"current_steps": 5579, "total_steps": 5676, "loss": 0.5249905586242676, "lr": 1.6296867633519563e-08, "epoch": 1.9659911894273128, "percentage": 98.29, "elapsed_time": "8:13:49", "remaining_time": "0:08:35"} +{"current_steps": 5580, "total_steps": 5676, "loss": 0.6748663783073425, "lr": 1.5966063460683967e-08, "epoch": 1.9663436123348017, "percentage": 98.31, "elapsed_time": "8:13:54", "remaining_time": "0:08:29"} +{"current_steps": 5581, "total_steps": 5676, "loss": 0.5276468992233276, "lr": 1.5638648556656198e-08, "epoch": 1.9666960352422906, "percentage": 98.33, "elapsed_time": "8:14:01", "remaining_time": "0:08:24"} +{"current_steps": 5582, "total_steps": 5676, "loss": 0.5778729319572449, "lr": 1.5314623032581798e-08, "epoch": 1.9670484581497796, "percentage": 98.34, "elapsed_time": "8:14:05", "remaining_time": "0:08:19"} +{"current_steps": 5583, "total_steps": 5676, "loss": 0.5805479288101196, "lr": 1.4993986998457223e-08, "epoch": 1.9674008810572687, "percentage": 98.36, "elapsed_time": "8:14:11", "remaining_time": "0:08:13"} +{"current_steps": 5584, "total_steps": 5676, "loss": 0.6213263273239136, "lr": 1.4676740563129843e-08, "epoch": 1.9677533039647577, "percentage": 98.38, "elapsed_time": "8:14:17", "remaining_time": "0:08:08"} +{"current_steps": 5585, "total_steps": 5676, "loss": 0.5081031322479248, "lr": 1.4362883834294627e-08, "epoch": 1.9681057268722468, "percentage": 98.4, "elapsed_time": "8:14:22", "remaining_time": "0:08:03"} +{"current_steps": 5586, "total_steps": 5676, "loss": 0.5605350136756897, "lr": 1.4052416918495237e-08, "epoch": 1.9684581497797358, "percentage": 98.41, "elapsed_time": "8:14:27", "remaining_time": "0:07:58"} +{"current_steps": 5587, "total_steps": 5676, "loss": 0.701635479927063, "lr": 1.3745339921126255e-08, "epoch": 1.9688105726872247, "percentage": 98.43, "elapsed_time": "8:14:32", "remaining_time": "0:07:52"} +{"current_steps": 5588, "total_steps": 5676, "loss": 0.5537668466567993, "lr": 1.344165294642985e-08, "epoch": 1.9691629955947136, "percentage": 98.45, "elapsed_time": "8:14:37", "remaining_time": "0:07:47"} +{"current_steps": 5589, "total_steps": 5676, "loss": 0.6395033597946167, "lr": 1.3141356097500225e-08, "epoch": 1.9695154185022026, "percentage": 98.47, "elapsed_time": "8:14:42", "remaining_time": "0:07:42"} +{"current_steps": 5590, "total_steps": 5676, "loss": 0.549985408782959, "lr": 1.2844449476276943e-08, "epoch": 1.9698678414096915, "percentage": 98.48, "elapsed_time": "8:14:48", "remaining_time": "0:07:36"} +{"current_steps": 5591, "total_steps": 5676, "loss": 0.4503220021724701, "lr": 1.2550933183550496e-08, "epoch": 1.9702202643171807, "percentage": 98.5, "elapsed_time": "8:14:54", "remaining_time": "0:07:31"} +{"current_steps": 5592, "total_steps": 5676, "loss": 0.6369946599006653, "lr": 1.2260807318962286e-08, "epoch": 1.9705726872246696, "percentage": 98.52, "elapsed_time": "8:15:00", "remaining_time": "0:07:26"} +{"current_steps": 5593, "total_steps": 5676, "loss": 0.547295093536377, "lr": 1.197407198099909e-08, "epoch": 1.9709251101321585, "percentage": 98.54, "elapsed_time": "8:15:05", "remaining_time": "0:07:20"} +{"current_steps": 5594, "total_steps": 5676, "loss": 0.578770101070404, "lr": 1.1690727267000823e-08, "epoch": 1.9712775330396477, "percentage": 98.56, "elapsed_time": "8:15:10", "remaining_time": "0:07:15"} +{"current_steps": 5595, "total_steps": 5676, "loss": 0.5992920398712158, "lr": 1.1410773273151654e-08, "epoch": 1.9716299559471366, "percentage": 98.57, "elapsed_time": "8:15:16", "remaining_time": "0:07:10"} +{"current_steps": 5596, "total_steps": 5676, "loss": 0.5912446975708008, "lr": 1.1134210094488896e-08, "epoch": 1.9719823788546256, "percentage": 98.59, "elapsed_time": "8:15:22", "remaining_time": "0:07:04"} +{"current_steps": 5597, "total_steps": 5676, "loss": 0.6539223194122314, "lr": 1.0861037824896337e-08, "epoch": 1.9723348017621145, "percentage": 98.61, "elapsed_time": "8:15:27", "remaining_time": "0:06:59"} +{"current_steps": 5598, "total_steps": 5676, "loss": 0.6487923860549927, "lr": 1.0591256557108686e-08, "epoch": 1.9726872246696034, "percentage": 98.63, "elapsed_time": "8:15:34", "remaining_time": "0:06:54"} +{"current_steps": 5599, "total_steps": 5676, "loss": 0.7950254678726196, "lr": 1.0324866382707133e-08, "epoch": 1.9730396475770924, "percentage": 98.64, "elapsed_time": "8:15:39", "remaining_time": "0:06:48"} +{"current_steps": 5600, "total_steps": 5676, "loss": 0.4941173195838928, "lr": 1.006186739212267e-08, "epoch": 1.9733920704845815, "percentage": 98.66, "elapsed_time": "8:15:45", "remaining_time": "0:06:43"} +{"current_steps": 5601, "total_steps": 5676, "loss": 0.6733928322792053, "lr": 9.802259674637215e-09, "epoch": 1.9737444933920705, "percentage": 98.68, "elapsed_time": "8:15:54", "remaining_time": "0:06:38"} +{"current_steps": 5602, "total_steps": 5676, "loss": 0.5084437131881714, "lr": 9.546043318376941e-09, "epoch": 1.9740969162995596, "percentage": 98.7, "elapsed_time": "8:15:59", "remaining_time": "0:06:33"} +{"current_steps": 5603, "total_steps": 5676, "loss": 0.4499536156654358, "lr": 9.293218410320049e-09, "epoch": 1.9744493392070486, "percentage": 98.71, "elapsed_time": "8:16:06", "remaining_time": "0:06:27"} +{"current_steps": 5604, "total_steps": 5676, "loss": 0.557701826095581, "lr": 9.04378503629344e-09, "epoch": 1.9748017621145375, "percentage": 98.73, "elapsed_time": "8:16:11", "remaining_time": "0:06:22"} +{"current_steps": 5605, "total_steps": 5676, "loss": 0.6110183000564575, "lr": 8.797743280972715e-09, "epoch": 1.9751541850220264, "percentage": 98.75, "elapsed_time": "8:16:16", "remaining_time": "0:06:17"} +{"current_steps": 5606, "total_steps": 5676, "loss": 0.5877780318260193, "lr": 8.555093227878842e-09, "epoch": 1.9755066079295154, "percentage": 98.77, "elapsed_time": "8:16:20", "remaining_time": "0:06:11"} +{"current_steps": 5607, "total_steps": 5676, "loss": 0.48567962646484375, "lr": 8.315834959385927e-09, "epoch": 1.9758590308370043, "percentage": 98.78, "elapsed_time": "8:16:26", "remaining_time": "0:06:06"} +{"current_steps": 5608, "total_steps": 5676, "loss": 0.5536524653434753, "lr": 8.079968556714557e-09, "epoch": 1.9762114537444933, "percentage": 98.8, "elapsed_time": "8:16:30", "remaining_time": "0:06:01"} +{"current_steps": 5609, "total_steps": 5676, "loss": 0.7685257196426392, "lr": 7.847494099934017e-09, "epoch": 1.9765638766519824, "percentage": 98.82, "elapsed_time": "8:16:36", "remaining_time": "0:05:55"} +{"current_steps": 5610, "total_steps": 5676, "loss": 0.5442079305648804, "lr": 7.618411667961179e-09, "epoch": 1.9769162995594713, "percentage": 98.84, "elapsed_time": "8:16:42", "remaining_time": "0:05:50"} +{"current_steps": 5611, "total_steps": 5676, "loss": 0.5034504532814026, "lr": 7.392721338563835e-09, "epoch": 1.9772687224669605, "percentage": 98.85, "elapsed_time": "8:16:48", "remaining_time": "0:05:45"} +{"current_steps": 5612, "total_steps": 5676, "loss": 0.4912964701652527, "lr": 7.1704231883551465e-09, "epoch": 1.9776211453744494, "percentage": 98.87, "elapsed_time": "8:16:54", "remaining_time": "0:05:40"} +{"current_steps": 5613, "total_steps": 5676, "loss": 0.6034345626831055, "lr": 6.951517292800303e-09, "epoch": 1.9779735682819384, "percentage": 98.89, "elapsed_time": "8:16:58", "remaining_time": "0:05:34"} +{"current_steps": 5614, "total_steps": 5676, "loss": 0.5379009246826172, "lr": 6.736003726209861e-09, "epoch": 1.9783259911894273, "percentage": 98.91, "elapsed_time": "8:17:03", "remaining_time": "0:05:29"} +{"current_steps": 5615, "total_steps": 5676, "loss": 0.6571087837219238, "lr": 6.523882561744188e-09, "epoch": 1.9786784140969162, "percentage": 98.93, "elapsed_time": "8:17:09", "remaining_time": "0:05:24"} +{"current_steps": 5616, "total_steps": 5676, "loss": 0.6473923921585083, "lr": 6.315153871411239e-09, "epoch": 1.9790308370044052, "percentage": 98.94, "elapsed_time": "8:17:13", "remaining_time": "0:05:18"} +{"current_steps": 5617, "total_steps": 5676, "loss": 0.5877989530563354, "lr": 6.1098177260687786e-09, "epoch": 1.9793832599118941, "percentage": 98.96, "elapsed_time": "8:17:18", "remaining_time": "0:05:13"} +{"current_steps": 5618, "total_steps": 5676, "loss": 0.5058172941207886, "lr": 5.907874195422159e-09, "epoch": 1.9797356828193833, "percentage": 98.98, "elapsed_time": "8:17:23", "remaining_time": "0:05:08"} +{"current_steps": 5619, "total_steps": 5676, "loss": 0.6471046805381775, "lr": 5.70932334802432e-09, "epoch": 1.9800881057268722, "percentage": 99.0, "elapsed_time": "8:17:28", "remaining_time": "0:05:02"} +{"current_steps": 5620, "total_steps": 5676, "loss": 0.6687172651290894, "lr": 5.514165251276904e-09, "epoch": 1.9804405286343614, "percentage": 99.01, "elapsed_time": "8:17:33", "remaining_time": "0:04:57"} +{"current_steps": 5621, "total_steps": 5676, "loss": 0.5726118087768555, "lr": 5.322399971431358e-09, "epoch": 1.9807929515418503, "percentage": 99.03, "elapsed_time": "8:17:39", "remaining_time": "0:04:52"} +{"current_steps": 5622, "total_steps": 5676, "loss": 0.6534412503242493, "lr": 5.134027573584499e-09, "epoch": 1.9811453744493392, "percentage": 99.05, "elapsed_time": "8:17:45", "remaining_time": "0:04:46"} +{"current_steps": 5623, "total_steps": 5676, "loss": 0.5972425937652588, "lr": 4.949048121682953e-09, "epoch": 1.9814977973568282, "percentage": 99.07, "elapsed_time": "8:17:50", "remaining_time": "0:04:41"} +{"current_steps": 5624, "total_steps": 5676, "loss": 0.6420427560806274, "lr": 4.767461678522045e-09, "epoch": 1.9818502202643171, "percentage": 99.08, "elapsed_time": "8:17:54", "remaining_time": "0:04:36"} +{"current_steps": 5625, "total_steps": 5676, "loss": 0.5912461876869202, "lr": 4.589268305745798e-09, "epoch": 1.982202643171806, "percentage": 99.1, "elapsed_time": "8:17:57", "remaining_time": "0:04:30"} +{"current_steps": 5626, "total_steps": 5676, "loss": 0.5942744016647339, "lr": 4.414468063843602e-09, "epoch": 1.982555066079295, "percentage": 99.12, "elapsed_time": "8:18:03", "remaining_time": "0:04:25"} +{"current_steps": 5627, "total_steps": 5676, "loss": 0.6618138551712036, "lr": 4.243061012154659e-09, "epoch": 1.9829074889867842, "percentage": 99.14, "elapsed_time": "8:18:07", "remaining_time": "0:04:20"} +{"current_steps": 5628, "total_steps": 5676, "loss": 0.6046779155731201, "lr": 4.075047208867977e-09, "epoch": 1.983259911894273, "percentage": 99.15, "elapsed_time": "8:18:11", "remaining_time": "0:04:14"} +{"current_steps": 5629, "total_steps": 5676, "loss": 0.6797989010810852, "lr": 3.9104267110168235e-09, "epoch": 1.9836123348017622, "percentage": 99.17, "elapsed_time": "8:18:16", "remaining_time": "0:04:09"} +{"current_steps": 5630, "total_steps": 5676, "loss": 0.4882436692714691, "lr": 3.749199574486495e-09, "epoch": 1.9839647577092512, "percentage": 99.19, "elapsed_time": "8:18:21", "remaining_time": "0:04:04"} +{"current_steps": 5631, "total_steps": 5676, "loss": 0.6694678068161011, "lr": 3.591365854008766e-09, "epoch": 1.9843171806167401, "percentage": 99.21, "elapsed_time": "8:18:27", "remaining_time": "0:03:59"} +{"current_steps": 5632, "total_steps": 5676, "loss": 0.6015830039978027, "lr": 3.436925603161889e-09, "epoch": 1.984669603524229, "percentage": 99.22, "elapsed_time": "8:18:33", "remaining_time": "0:03:53"} +{"current_steps": 5633, "total_steps": 5676, "loss": 0.5627756118774414, "lr": 3.2858788743739267e-09, "epoch": 1.985022026431718, "percentage": 99.24, "elapsed_time": "8:18:40", "remaining_time": "0:03:48"} +{"current_steps": 5634, "total_steps": 5676, "loss": 0.6069298386573792, "lr": 3.138225718920529e-09, "epoch": 1.985374449339207, "percentage": 99.26, "elapsed_time": "8:18:45", "remaining_time": "0:03:43"} +{"current_steps": 5635, "total_steps": 5676, "loss": 0.5779693722724915, "lr": 2.993966186926045e-09, "epoch": 1.985726872246696, "percentage": 99.28, "elapsed_time": "8:18:49", "remaining_time": "0:03:37"} +{"current_steps": 5636, "total_steps": 5676, "loss": 0.6706609725952148, "lr": 2.8531003273624126e-09, "epoch": 1.986079295154185, "percentage": 99.3, "elapsed_time": "8:18:55", "remaining_time": "0:03:32"} +{"current_steps": 5637, "total_steps": 5676, "loss": 0.6361640691757202, "lr": 2.715628188046937e-09, "epoch": 1.986431718061674, "percentage": 99.31, "elapsed_time": "8:18:59", "remaining_time": "0:03:27"} +{"current_steps": 5638, "total_steps": 5676, "loss": 0.557577908039093, "lr": 2.581549815648954e-09, "epoch": 1.9867841409691631, "percentage": 99.33, "elapsed_time": "8:19:05", "remaining_time": "0:03:21"} +{"current_steps": 5639, "total_steps": 5676, "loss": 0.7735704183578491, "lr": 2.450865255684276e-09, "epoch": 1.987136563876652, "percentage": 99.35, "elapsed_time": "8:19:11", "remaining_time": "0:03:16"} +{"current_steps": 5640, "total_steps": 5676, "loss": 0.5836409330368042, "lr": 2.3235745525151956e-09, "epoch": 1.987488986784141, "percentage": 99.37, "elapsed_time": "8:19:16", "remaining_time": "0:03:11"} +{"current_steps": 5641, "total_steps": 5676, "loss": 0.5824601650238037, "lr": 2.1996777493527023e-09, "epoch": 1.98784140969163, "percentage": 99.38, "elapsed_time": "8:19:22", "remaining_time": "0:03:05"} +{"current_steps": 5642, "total_steps": 5676, "loss": 0.6183140873908997, "lr": 2.0791748882575958e-09, "epoch": 1.9881938325991189, "percentage": 99.4, "elapsed_time": "8:19:28", "remaining_time": "0:03:00"} +{"current_steps": 5643, "total_steps": 5676, "loss": 0.6071987748146057, "lr": 1.9620660101349333e-09, "epoch": 1.9885462555066078, "percentage": 99.42, "elapsed_time": "8:19:34", "remaining_time": "0:02:55"} +{"current_steps": 5644, "total_steps": 5676, "loss": 0.655383825302124, "lr": 1.8483511547406907e-09, "epoch": 1.988898678414097, "percentage": 99.44, "elapsed_time": "8:19:39", "remaining_time": "0:02:49"} +{"current_steps": 5645, "total_steps": 5676, "loss": 0.6328674554824829, "lr": 1.738030360677323e-09, "epoch": 1.989251101321586, "percentage": 99.45, "elapsed_time": "8:19:43", "remaining_time": "0:02:44"} +{"current_steps": 5646, "total_steps": 5676, "loss": 0.5012212991714478, "lr": 1.631103665394873e-09, "epoch": 1.989603524229075, "percentage": 99.47, "elapsed_time": "8:19:49", "remaining_time": "0:02:39"} +{"current_steps": 5647, "total_steps": 5676, "loss": 0.6202536821365356, "lr": 1.5275711051909724e-09, "epoch": 1.989955947136564, "percentage": 99.49, "elapsed_time": "8:19:54", "remaining_time": "0:02:34"} +{"current_steps": 5648, "total_steps": 5676, "loss": 0.4922720789909363, "lr": 1.427432715214172e-09, "epoch": 1.990308370044053, "percentage": 99.51, "elapsed_time": "8:19:58", "remaining_time": "0:02:28"} +{"current_steps": 5649, "total_steps": 5676, "loss": 0.6591637134552002, "lr": 1.33068852945617e-09, "epoch": 1.9906607929515419, "percentage": 99.52, "elapsed_time": "8:20:04", "remaining_time": "0:02:23"} +{"current_steps": 5650, "total_steps": 5676, "loss": 0.5481886863708496, "lr": 1.2373385807584736e-09, "epoch": 1.9910132158590308, "percentage": 99.54, "elapsed_time": "8:20:09", "remaining_time": "0:02:18"} +{"current_steps": 5651, "total_steps": 5676, "loss": 0.5642685890197754, "lr": 1.1473829008123994e-09, "epoch": 1.9913656387665197, "percentage": 99.56, "elapsed_time": "8:20:15", "remaining_time": "0:02:12"} +{"current_steps": 5652, "total_steps": 5676, "loss": 0.6736876368522644, "lr": 1.060821520153521e-09, "epoch": 1.9917180616740087, "percentage": 99.58, "elapsed_time": "8:20:21", "remaining_time": "0:02:07"} +{"current_steps": 5653, "total_steps": 5676, "loss": 0.6823733448982239, "lr": 9.776544681672218e-10, "epoch": 1.9920704845814978, "percentage": 99.59, "elapsed_time": "8:20:25", "remaining_time": "0:02:02"} +{"current_steps": 5654, "total_steps": 5676, "loss": 0.4686351716518402, "lr": 8.978817730864731e-10, "epoch": 1.9924229074889868, "percentage": 99.61, "elapsed_time": "8:20:29", "remaining_time": "0:01:56"} +{"current_steps": 5655, "total_steps": 5676, "loss": 0.5027543306350708, "lr": 8.215034619907247e-10, "epoch": 1.992775330396476, "percentage": 99.63, "elapsed_time": "8:20:34", "remaining_time": "0:01:51"} +{"current_steps": 5656, "total_steps": 5676, "loss": 0.6217285394668579, "lr": 7.485195608081253e-10, "epoch": 1.9931277533039649, "percentage": 99.65, "elapsed_time": "8:20:40", "remaining_time": "0:01:46"} +{"current_steps": 5657, "total_steps": 5676, "loss": 0.5942907929420471, "lr": 6.78930094315522e-10, "epoch": 1.9934801762114538, "percentage": 99.67, "elapsed_time": "8:20:46", "remaining_time": "0:01:40"} +{"current_steps": 5658, "total_steps": 5676, "loss": 0.7282885313034058, "lr": 6.127350861351299e-10, "epoch": 1.9938325991189427, "percentage": 99.68, "elapsed_time": "8:20:52", "remaining_time": "0:01:35"} +{"current_steps": 5659, "total_steps": 5676, "loss": 0.653915286064148, "lr": 5.499345587389737e-10, "epoch": 1.9941850220264317, "percentage": 99.7, "elapsed_time": "8:20:57", "remaining_time": "0:01:30"} +{"current_steps": 5660, "total_steps": 5676, "loss": 0.6993501782417297, "lr": 4.905285334455556e-10, "epoch": 1.9945374449339206, "percentage": 99.72, "elapsed_time": "8:21:02", "remaining_time": "0:01:24"} +{"current_steps": 5661, "total_steps": 5676, "loss": 0.712554931640625, "lr": 4.3451703042207694e-10, "epoch": 1.9948898678414095, "percentage": 99.74, "elapsed_time": "8:21:08", "remaining_time": "0:01:19"} +{"current_steps": 5662, "total_steps": 5676, "loss": 0.7523812055587769, "lr": 3.81900068681107e-10, "epoch": 1.9952422907488987, "percentage": 99.75, "elapsed_time": "8:21:14", "remaining_time": "0:01:14"} +{"current_steps": 5663, "total_steps": 5676, "loss": 0.5138256549835205, "lr": 3.3267766608502395e-10, "epoch": 1.9955947136563876, "percentage": 99.77, "elapsed_time": "8:21:19", "remaining_time": "0:01:09"} +{"current_steps": 5664, "total_steps": 5676, "loss": 0.6154034733772278, "lr": 2.8684983934490486e-10, "epoch": 1.9959471365638768, "percentage": 99.79, "elapsed_time": "8:21:23", "remaining_time": "0:01:03"} +{"current_steps": 5665, "total_steps": 5676, "loss": 0.5790190696716309, "lr": 2.4441660401608447e-10, "epoch": 1.9962995594713657, "percentage": 99.81, "elapsed_time": "8:21:28", "remaining_time": "0:00:58"} +{"current_steps": 5666, "total_steps": 5676, "loss": 0.6873353719711304, "lr": 2.0537797450370657e-10, "epoch": 1.9966519823788547, "percentage": 99.82, "elapsed_time": "8:21:34", "remaining_time": "0:00:53"} +{"current_steps": 5667, "total_steps": 5676, "loss": 0.5764753222465515, "lr": 1.6973396405939312e-10, "epoch": 1.9970044052863436, "percentage": 99.84, "elapsed_time": "8:21:40", "remaining_time": "0:00:47"} +{"current_steps": 5668, "total_steps": 5676, "loss": 0.5144297480583191, "lr": 1.374845847856854e-10, "epoch": 1.9973568281938325, "percentage": 99.86, "elapsed_time": "8:21:45", "remaining_time": "0:00:42"} +{"current_steps": 5669, "total_steps": 5676, "loss": 0.5934832692146301, "lr": 1.0862984762716189e-10, "epoch": 1.9977092511013215, "percentage": 99.88, "elapsed_time": "8:21:49", "remaining_time": "0:00:37"} +{"current_steps": 5670, "total_steps": 5676, "loss": 0.4544188976287842, "lr": 8.316976238154084e-11, "epoch": 1.9980616740088104, "percentage": 99.89, "elapsed_time": "8:21:55", "remaining_time": "0:00:31"} +{"current_steps": 5671, "total_steps": 5676, "loss": 0.44844698905944824, "lr": 6.110433769079827e-11, "epoch": 1.9984140969162996, "percentage": 99.91, "elapsed_time": "8:22:00", "remaining_time": "0:00:26"} +{"current_steps": 5672, "total_steps": 5676, "loss": 0.5272520780563354, "lr": 4.2433581045608905e-11, "epoch": 1.9987665198237885, "percentage": 99.93, "elapsed_time": "8:22:06", "remaining_time": "0:00:21"} +{"current_steps": 5673, "total_steps": 5676, "loss": 0.5003396272659302, "lr": 2.715749878312579e-11, "epoch": 1.9991189427312777, "percentage": 99.95, "elapsed_time": "8:22:10", "remaining_time": "0:00:15"} +{"current_steps": 5674, "total_steps": 5676, "loss": 0.561710000038147, "lr": 1.5276096090310887e-11, "epoch": 1.9994713656387666, "percentage": 99.96, "elapsed_time": "8:22:16", "remaining_time": "0:00:10"} +{"current_steps": 5675, "total_steps": 5676, "loss": 0.666955292224884, "lr": 6.789377000604447e-12, "epoch": 1.9998237885462555, "percentage": 99.98, "elapsed_time": "8:22:22", "remaining_time": "0:00:05"} +{"current_steps": 5676, "total_steps": 5676, "loss": 0.7278814911842346, "lr": 1.6973443939249934e-12, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "8:22:23", "remaining_time": "0:00:00"} +{"current_steps": 5676, "total_steps": 5676, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "8:22:28", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a56fa9b8360c8b4b94fd19e189c6c744f25fcfcf --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,39775 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 5676, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0003524229074889868, + "grad_norm": 1.6512674233185107, + "learning_rate": 0.0, + "loss": 1.493973731994629, + "step": 1 + }, + { + "epoch": 0.0007048458149779736, + "grad_norm": 1.4463228571593894, + "learning_rate": 7.042253521126761e-08, + "loss": 1.3692013025283813, + "step": 2 + }, + { + "epoch": 0.0010572687224669603, + "grad_norm": 1.4036766254408197, + "learning_rate": 1.4084507042253522e-07, + "loss": 1.3996260166168213, + "step": 3 + }, + { + "epoch": 0.0014096916299559472, + "grad_norm": 1.29446596506829, + "learning_rate": 2.1126760563380284e-07, + "loss": 1.3011515140533447, + "step": 4 + }, + { + "epoch": 0.001762114537444934, + "grad_norm": 1.5130555881795185, + "learning_rate": 2.8169014084507043e-07, + "loss": 1.3736083507537842, + "step": 5 + }, + { + "epoch": 0.0021145374449339205, + "grad_norm": 1.247517750517551, + "learning_rate": 3.521126760563381e-07, + "loss": 1.051241159439087, + "step": 6 + }, + { + "epoch": 0.0024669603524229075, + "grad_norm": 1.611437944890658, + "learning_rate": 4.225352112676057e-07, + "loss": 1.2594621181488037, + "step": 7 + }, + { + "epoch": 0.0028193832599118945, + "grad_norm": 1.4604380967241444, + "learning_rate": 4.929577464788733e-07, + "loss": 1.0498416423797607, + "step": 8 + }, + { + "epoch": 0.003171806167400881, + "grad_norm": 1.367174801368101, + "learning_rate": 5.633802816901409e-07, + "loss": 1.3313459157943726, + "step": 9 + }, + { + "epoch": 0.003524229074889868, + "grad_norm": 1.4378623823320218, + "learning_rate": 6.338028169014085e-07, + "loss": 1.2484922409057617, + "step": 10 + }, + { + "epoch": 0.0038766519823788545, + "grad_norm": 1.197911167360161, + "learning_rate": 7.042253521126762e-07, + "loss": 1.097194790840149, + "step": 11 + }, + { + "epoch": 0.004229074889867841, + "grad_norm": 1.3767897701080816, + "learning_rate": 7.746478873239437e-07, + "loss": 1.3065136671066284, + "step": 12 + }, + { + "epoch": 0.0045814977973568285, + "grad_norm": 1.2501177622273731, + "learning_rate": 8.450704225352114e-07, + "loss": 1.1574026346206665, + "step": 13 + }, + { + "epoch": 0.004933920704845815, + "grad_norm": 1.3002699887597202, + "learning_rate": 9.154929577464789e-07, + "loss": 1.1509445905685425, + "step": 14 + }, + { + "epoch": 0.0052863436123348016, + "grad_norm": 1.3458236321153771, + "learning_rate": 9.859154929577465e-07, + "loss": 1.069403886795044, + "step": 15 + }, + { + "epoch": 0.005638766519823789, + "grad_norm": 1.52712721337833, + "learning_rate": 1.0563380281690142e-06, + "loss": 1.1731287240982056, + "step": 16 + }, + { + "epoch": 0.0059911894273127755, + "grad_norm": 1.5628075837505453, + "learning_rate": 1.1267605633802817e-06, + "loss": 0.9314254522323608, + "step": 17 + }, + { + "epoch": 0.006343612334801762, + "grad_norm": 1.3686084350519343, + "learning_rate": 1.1971830985915492e-06, + "loss": 1.2915008068084717, + "step": 18 + }, + { + "epoch": 0.006696035242290749, + "grad_norm": 1.2653916141417434, + "learning_rate": 1.267605633802817e-06, + "loss": 1.1088309288024902, + "step": 19 + }, + { + "epoch": 0.007048458149779736, + "grad_norm": 1.362753082153478, + "learning_rate": 1.3380281690140844e-06, + "loss": 1.21511709690094, + "step": 20 + }, + { + "epoch": 0.0074008810572687225, + "grad_norm": 1.3054604275805306, + "learning_rate": 1.4084507042253523e-06, + "loss": 1.241409420967102, + "step": 21 + }, + { + "epoch": 0.007753303964757709, + "grad_norm": 1.3646723208790772, + "learning_rate": 1.4788732394366198e-06, + "loss": 1.2170014381408691, + "step": 22 + }, + { + "epoch": 0.008105726872246696, + "grad_norm": 1.424586503093174, + "learning_rate": 1.5492957746478873e-06, + "loss": 1.1405870914459229, + "step": 23 + }, + { + "epoch": 0.008458149779735682, + "grad_norm": 1.429368633092772, + "learning_rate": 1.6197183098591552e-06, + "loss": 1.122542381286621, + "step": 24 + }, + { + "epoch": 0.00881057268722467, + "grad_norm": 1.2201478884239083, + "learning_rate": 1.6901408450704227e-06, + "loss": 1.1686937808990479, + "step": 25 + }, + { + "epoch": 0.009162995594713657, + "grad_norm": 1.4065678272985154, + "learning_rate": 1.7605633802816902e-06, + "loss": 1.215955376625061, + "step": 26 + }, + { + "epoch": 0.009515418502202643, + "grad_norm": 1.3879787249393913, + "learning_rate": 1.8309859154929579e-06, + "loss": 1.075179100036621, + "step": 27 + }, + { + "epoch": 0.00986784140969163, + "grad_norm": 1.2313632017619234, + "learning_rate": 1.9014084507042254e-06, + "loss": 1.198237419128418, + "step": 28 + }, + { + "epoch": 0.010220264317180617, + "grad_norm": 1.6833211669458825, + "learning_rate": 1.971830985915493e-06, + "loss": 1.2356700897216797, + "step": 29 + }, + { + "epoch": 0.010572687224669603, + "grad_norm": 1.3637967517131555, + "learning_rate": 2.0422535211267608e-06, + "loss": 1.2373592853546143, + "step": 30 + }, + { + "epoch": 0.01092511013215859, + "grad_norm": 1.377232613936239, + "learning_rate": 2.1126760563380285e-06, + "loss": 1.1857718229293823, + "step": 31 + }, + { + "epoch": 0.011277533039647578, + "grad_norm": 1.3566319214936433, + "learning_rate": 2.1830985915492958e-06, + "loss": 1.1844017505645752, + "step": 32 + }, + { + "epoch": 0.011629955947136564, + "grad_norm": 1.2486508447822717, + "learning_rate": 2.2535211267605635e-06, + "loss": 1.275226354598999, + "step": 33 + }, + { + "epoch": 0.011982378854625551, + "grad_norm": 1.3044888735575617, + "learning_rate": 2.323943661971831e-06, + "loss": 1.169473648071289, + "step": 34 + }, + { + "epoch": 0.012334801762114538, + "grad_norm": 1.2608655384056326, + "learning_rate": 2.3943661971830984e-06, + "loss": 1.2182841300964355, + "step": 35 + }, + { + "epoch": 0.012687224669603524, + "grad_norm": 1.3780698009940295, + "learning_rate": 2.4647887323943666e-06, + "loss": 1.2110469341278076, + "step": 36 + }, + { + "epoch": 0.01303964757709251, + "grad_norm": 1.3829042894220551, + "learning_rate": 2.535211267605634e-06, + "loss": 1.2886571884155273, + "step": 37 + }, + { + "epoch": 0.013392070484581497, + "grad_norm": 1.2954566526081723, + "learning_rate": 2.6056338028169015e-06, + "loss": 1.0740901231765747, + "step": 38 + }, + { + "epoch": 0.013744493392070485, + "grad_norm": 1.2079072281757672, + "learning_rate": 2.676056338028169e-06, + "loss": 1.0119279623031616, + "step": 39 + }, + { + "epoch": 0.014096916299559472, + "grad_norm": 1.1460333237155051, + "learning_rate": 2.746478873239437e-06, + "loss": 1.0752044916152954, + "step": 40 + }, + { + "epoch": 0.014449339207048459, + "grad_norm": 1.3690776364650978, + "learning_rate": 2.8169014084507046e-06, + "loss": 1.345343828201294, + "step": 41 + }, + { + "epoch": 0.014801762114537445, + "grad_norm": 1.0813865739605455, + "learning_rate": 2.887323943661972e-06, + "loss": 1.102332353591919, + "step": 42 + }, + { + "epoch": 0.015154185022026432, + "grad_norm": 1.1643083589428873, + "learning_rate": 2.9577464788732396e-06, + "loss": 1.006919264793396, + "step": 43 + }, + { + "epoch": 0.015506607929515418, + "grad_norm": 1.1582412568670832, + "learning_rate": 3.0281690140845073e-06, + "loss": 1.104026436805725, + "step": 44 + }, + { + "epoch": 0.015859030837004406, + "grad_norm": 1.3060563783851553, + "learning_rate": 3.0985915492957746e-06, + "loss": 1.299152135848999, + "step": 45 + }, + { + "epoch": 0.01621145374449339, + "grad_norm": 1.4304085919726754, + "learning_rate": 3.1690140845070427e-06, + "loss": 1.1075072288513184, + "step": 46 + }, + { + "epoch": 0.01656387665198238, + "grad_norm": 0.9865545367526579, + "learning_rate": 3.2394366197183104e-06, + "loss": 1.0296107530593872, + "step": 47 + }, + { + "epoch": 0.016916299559471364, + "grad_norm": 1.1960961939132708, + "learning_rate": 3.3098591549295777e-06, + "loss": 1.1097803115844727, + "step": 48 + }, + { + "epoch": 0.017268722466960353, + "grad_norm": 1.0974682037636356, + "learning_rate": 3.3802816901408454e-06, + "loss": 0.945678174495697, + "step": 49 + }, + { + "epoch": 0.01762114537444934, + "grad_norm": 0.9924343523024514, + "learning_rate": 3.4507042253521127e-06, + "loss": 1.075556993484497, + "step": 50 + }, + { + "epoch": 0.017973568281938326, + "grad_norm": 1.0849849170905757, + "learning_rate": 3.5211267605633804e-06, + "loss": 1.0790367126464844, + "step": 51 + }, + { + "epoch": 0.018325991189427314, + "grad_norm": 1.220415189867698, + "learning_rate": 3.5915492957746485e-06, + "loss": 1.2567799091339111, + "step": 52 + }, + { + "epoch": 0.0186784140969163, + "grad_norm": 1.1058732491316554, + "learning_rate": 3.6619718309859158e-06, + "loss": 1.1437780857086182, + "step": 53 + }, + { + "epoch": 0.019030837004405287, + "grad_norm": 1.0871981925234313, + "learning_rate": 3.7323943661971835e-06, + "loss": 1.0962307453155518, + "step": 54 + }, + { + "epoch": 0.019383259911894272, + "grad_norm": 0.9603250960542756, + "learning_rate": 3.8028169014084508e-06, + "loss": 1.0149122476577759, + "step": 55 + }, + { + "epoch": 0.01973568281938326, + "grad_norm": 0.9630324155849409, + "learning_rate": 3.873239436619718e-06, + "loss": 0.9029096364974976, + "step": 56 + }, + { + "epoch": 0.02008810572687225, + "grad_norm": 1.1449327271146603, + "learning_rate": 3.943661971830986e-06, + "loss": 1.1290819644927979, + "step": 57 + }, + { + "epoch": 0.020440528634361233, + "grad_norm": 1.1046082203063978, + "learning_rate": 4.014084507042254e-06, + "loss": 1.0965365171432495, + "step": 58 + }, + { + "epoch": 0.02079295154185022, + "grad_norm": 1.2553158733514387, + "learning_rate": 4.0845070422535216e-06, + "loss": 1.2854020595550537, + "step": 59 + }, + { + "epoch": 0.021145374449339206, + "grad_norm": 1.0484971235480365, + "learning_rate": 4.154929577464789e-06, + "loss": 1.0303996801376343, + "step": 60 + }, + { + "epoch": 0.021497797356828195, + "grad_norm": 0.9670460326314384, + "learning_rate": 4.225352112676057e-06, + "loss": 1.0811198949813843, + "step": 61 + }, + { + "epoch": 0.02185022026431718, + "grad_norm": 0.992548164971829, + "learning_rate": 4.295774647887324e-06, + "loss": 1.1373648643493652, + "step": 62 + }, + { + "epoch": 0.022202643171806168, + "grad_norm": 1.009220008285868, + "learning_rate": 4.3661971830985915e-06, + "loss": 0.8717563152313232, + "step": 63 + }, + { + "epoch": 0.022555066079295156, + "grad_norm": 0.9171432664885892, + "learning_rate": 4.43661971830986e-06, + "loss": 0.9939290881156921, + "step": 64 + }, + { + "epoch": 0.02290748898678414, + "grad_norm": 1.139248361968882, + "learning_rate": 4.507042253521127e-06, + "loss": 1.1776926517486572, + "step": 65 + }, + { + "epoch": 0.02325991189427313, + "grad_norm": 0.8971048282009709, + "learning_rate": 4.577464788732395e-06, + "loss": 0.9149726629257202, + "step": 66 + }, + { + "epoch": 0.023612334801762114, + "grad_norm": 0.9597323965843616, + "learning_rate": 4.647887323943662e-06, + "loss": 0.996609091758728, + "step": 67 + }, + { + "epoch": 0.023964757709251102, + "grad_norm": 1.0074979173506051, + "learning_rate": 4.71830985915493e-06, + "loss": 1.102593183517456, + "step": 68 + }, + { + "epoch": 0.024317180616740087, + "grad_norm": 0.8938780612317906, + "learning_rate": 4.788732394366197e-06, + "loss": 1.0912048816680908, + "step": 69 + }, + { + "epoch": 0.024669603524229075, + "grad_norm": 0.936561005612989, + "learning_rate": 4.859154929577465e-06, + "loss": 1.1192498207092285, + "step": 70 + }, + { + "epoch": 0.025022026431718063, + "grad_norm": 1.059387656590118, + "learning_rate": 4.929577464788733e-06, + "loss": 1.0358459949493408, + "step": 71 + }, + { + "epoch": 0.025374449339207048, + "grad_norm": 0.9588756664450253, + "learning_rate": 5e-06, + "loss": 1.076169490814209, + "step": 72 + }, + { + "epoch": 0.025726872246696036, + "grad_norm": 1.355361750045824, + "learning_rate": 5.070422535211268e-06, + "loss": 0.9906084537506104, + "step": 73 + }, + { + "epoch": 0.02607929515418502, + "grad_norm": 1.187443908189842, + "learning_rate": 5.140845070422536e-06, + "loss": 0.8163654804229736, + "step": 74 + }, + { + "epoch": 0.02643171806167401, + "grad_norm": 0.9764403954844053, + "learning_rate": 5.211267605633803e-06, + "loss": 1.140099048614502, + "step": 75 + }, + { + "epoch": 0.026784140969162994, + "grad_norm": 0.863156257130764, + "learning_rate": 5.28169014084507e-06, + "loss": 0.7654916048049927, + "step": 76 + }, + { + "epoch": 0.027136563876651983, + "grad_norm": 1.0935626721226286, + "learning_rate": 5.352112676056338e-06, + "loss": 0.9476499557495117, + "step": 77 + }, + { + "epoch": 0.02748898678414097, + "grad_norm": 1.0875377862843238, + "learning_rate": 5.422535211267607e-06, + "loss": 1.120811939239502, + "step": 78 + }, + { + "epoch": 0.027841409691629956, + "grad_norm": 0.9578356099138406, + "learning_rate": 5.492957746478874e-06, + "loss": 0.9745736122131348, + "step": 79 + }, + { + "epoch": 0.028193832599118944, + "grad_norm": 1.074452545035149, + "learning_rate": 5.563380281690142e-06, + "loss": 1.086181879043579, + "step": 80 + }, + { + "epoch": 0.02854625550660793, + "grad_norm": 0.9510910839018534, + "learning_rate": 5.633802816901409e-06, + "loss": 0.9904681444168091, + "step": 81 + }, + { + "epoch": 0.028898678414096917, + "grad_norm": 1.0958191892945044, + "learning_rate": 5.7042253521126766e-06, + "loss": 1.0311436653137207, + "step": 82 + }, + { + "epoch": 0.029251101321585902, + "grad_norm": 1.066849780091366, + "learning_rate": 5.774647887323944e-06, + "loss": 0.996998131275177, + "step": 83 + }, + { + "epoch": 0.02960352422907489, + "grad_norm": 1.1397250117300832, + "learning_rate": 5.845070422535212e-06, + "loss": 1.1526594161987305, + "step": 84 + }, + { + "epoch": 0.029955947136563875, + "grad_norm": 1.145115830862378, + "learning_rate": 5.915492957746479e-06, + "loss": 1.0914695262908936, + "step": 85 + }, + { + "epoch": 0.030308370044052863, + "grad_norm": 1.1551265503738541, + "learning_rate": 5.9859154929577465e-06, + "loss": 0.9558745622634888, + "step": 86 + }, + { + "epoch": 0.03066079295154185, + "grad_norm": 1.0734015706063305, + "learning_rate": 6.056338028169015e-06, + "loss": 0.9668983221054077, + "step": 87 + }, + { + "epoch": 0.031013215859030836, + "grad_norm": 1.2231010171085557, + "learning_rate": 6.126760563380282e-06, + "loss": 1.0132758617401123, + "step": 88 + }, + { + "epoch": 0.03136563876651982, + "grad_norm": 0.9911917466596063, + "learning_rate": 6.197183098591549e-06, + "loss": 1.0816935300827026, + "step": 89 + }, + { + "epoch": 0.03171806167400881, + "grad_norm": 1.1311803004782939, + "learning_rate": 6.267605633802818e-06, + "loss": 1.03245210647583, + "step": 90 + }, + { + "epoch": 0.0320704845814978, + "grad_norm": 1.0820347157611818, + "learning_rate": 6.3380281690140855e-06, + "loss": 0.9812602400779724, + "step": 91 + }, + { + "epoch": 0.03242290748898678, + "grad_norm": 1.0039329578342324, + "learning_rate": 6.408450704225353e-06, + "loss": 0.9303219318389893, + "step": 92 + }, + { + "epoch": 0.032775330396475774, + "grad_norm": 1.0308477549900932, + "learning_rate": 6.478873239436621e-06, + "loss": 1.175403356552124, + "step": 93 + }, + { + "epoch": 0.03312775330396476, + "grad_norm": 1.0822064194108554, + "learning_rate": 6.549295774647888e-06, + "loss": 1.1863958835601807, + "step": 94 + }, + { + "epoch": 0.033480176211453744, + "grad_norm": 1.0560683839166303, + "learning_rate": 6.619718309859155e-06, + "loss": 0.8630557060241699, + "step": 95 + }, + { + "epoch": 0.03383259911894273, + "grad_norm": 1.1683225259655636, + "learning_rate": 6.690140845070423e-06, + "loss": 1.0499619245529175, + "step": 96 + }, + { + "epoch": 0.03418502202643172, + "grad_norm": 0.9766019012274652, + "learning_rate": 6.760563380281691e-06, + "loss": 0.9443086981773376, + "step": 97 + }, + { + "epoch": 0.034537444933920705, + "grad_norm": 1.1779098792527396, + "learning_rate": 6.830985915492958e-06, + "loss": 1.0011450052261353, + "step": 98 + }, + { + "epoch": 0.03488986784140969, + "grad_norm": 1.0941166094999715, + "learning_rate": 6.901408450704225e-06, + "loss": 1.0239083766937256, + "step": 99 + }, + { + "epoch": 0.03524229074889868, + "grad_norm": 1.1605003575433563, + "learning_rate": 6.9718309859154935e-06, + "loss": 1.1335347890853882, + "step": 100 + }, + { + "epoch": 0.035594713656387666, + "grad_norm": 1.1410420869639502, + "learning_rate": 7.042253521126761e-06, + "loss": 0.9650854468345642, + "step": 101 + }, + { + "epoch": 0.03594713656387665, + "grad_norm": 1.0366491839089684, + "learning_rate": 7.112676056338029e-06, + "loss": 0.9284406900405884, + "step": 102 + }, + { + "epoch": 0.036299559471365636, + "grad_norm": 1.0368314441443032, + "learning_rate": 7.183098591549297e-06, + "loss": 0.989676296710968, + "step": 103 + }, + { + "epoch": 0.03665198237885463, + "grad_norm": 1.0475480945800932, + "learning_rate": 7.253521126760564e-06, + "loss": 0.9149842262268066, + "step": 104 + }, + { + "epoch": 0.03700440528634361, + "grad_norm": 1.0115568298427282, + "learning_rate": 7.3239436619718316e-06, + "loss": 0.9793657064437866, + "step": 105 + }, + { + "epoch": 0.0373568281938326, + "grad_norm": 1.0923401558071288, + "learning_rate": 7.3943661971831e-06, + "loss": 0.9508543014526367, + "step": 106 + }, + { + "epoch": 0.03770925110132159, + "grad_norm": 1.123373083563155, + "learning_rate": 7.464788732394367e-06, + "loss": 1.0623283386230469, + "step": 107 + }, + { + "epoch": 0.038061674008810574, + "grad_norm": 1.0472469474411819, + "learning_rate": 7.535211267605634e-06, + "loss": 1.0039314031600952, + "step": 108 + }, + { + "epoch": 0.03841409691629956, + "grad_norm": 1.1301029490219276, + "learning_rate": 7.6056338028169015e-06, + "loss": 0.9315502643585205, + "step": 109 + }, + { + "epoch": 0.038766519823788544, + "grad_norm": 0.9958020122553335, + "learning_rate": 7.67605633802817e-06, + "loss": 0.936677098274231, + "step": 110 + }, + { + "epoch": 0.039118942731277535, + "grad_norm": 1.0684105284421879, + "learning_rate": 7.746478873239436e-06, + "loss": 0.9373410940170288, + "step": 111 + }, + { + "epoch": 0.03947136563876652, + "grad_norm": 1.155598878121798, + "learning_rate": 7.816901408450704e-06, + "loss": 1.01617431640625, + "step": 112 + }, + { + "epoch": 0.039823788546255505, + "grad_norm": 1.1889006080727076, + "learning_rate": 7.887323943661972e-06, + "loss": 1.1713547706604004, + "step": 113 + }, + { + "epoch": 0.0401762114537445, + "grad_norm": 1.1256223667919436, + "learning_rate": 7.95774647887324e-06, + "loss": 0.8982350826263428, + "step": 114 + }, + { + "epoch": 0.04052863436123348, + "grad_norm": 1.0914199985412718, + "learning_rate": 8.028169014084509e-06, + "loss": 0.8123869895935059, + "step": 115 + }, + { + "epoch": 0.040881057268722466, + "grad_norm": 1.1505365914239516, + "learning_rate": 8.098591549295775e-06, + "loss": 1.0762536525726318, + "step": 116 + }, + { + "epoch": 0.04123348017621145, + "grad_norm": 1.0367170014557934, + "learning_rate": 8.169014084507043e-06, + "loss": 1.004841923713684, + "step": 117 + }, + { + "epoch": 0.04158590308370044, + "grad_norm": 1.0966724197265187, + "learning_rate": 8.239436619718311e-06, + "loss": 0.9237936735153198, + "step": 118 + }, + { + "epoch": 0.04193832599118943, + "grad_norm": 1.0785540239343763, + "learning_rate": 8.309859154929578e-06, + "loss": 0.9038913249969482, + "step": 119 + }, + { + "epoch": 0.04229074889867841, + "grad_norm": 1.1784096317090726, + "learning_rate": 8.380281690140846e-06, + "loss": 0.9488446712493896, + "step": 120 + }, + { + "epoch": 0.042643171806167404, + "grad_norm": 1.1559534491366574, + "learning_rate": 8.450704225352114e-06, + "loss": 1.0862706899642944, + "step": 121 + }, + { + "epoch": 0.04299559471365639, + "grad_norm": 1.5143452874154766, + "learning_rate": 8.52112676056338e-06, + "loss": 0.8882313966751099, + "step": 122 + }, + { + "epoch": 0.043348017621145374, + "grad_norm": 1.1412568707979918, + "learning_rate": 8.591549295774648e-06, + "loss": 0.9125900268554688, + "step": 123 + }, + { + "epoch": 0.04370044052863436, + "grad_norm": 1.403727281403332, + "learning_rate": 8.661971830985915e-06, + "loss": 0.944568395614624, + "step": 124 + }, + { + "epoch": 0.04405286343612335, + "grad_norm": 1.2993905510610635, + "learning_rate": 8.732394366197183e-06, + "loss": 0.9303089380264282, + "step": 125 + }, + { + "epoch": 0.044405286343612335, + "grad_norm": 1.1184314169128153, + "learning_rate": 8.802816901408451e-06, + "loss": 1.0983362197875977, + "step": 126 + }, + { + "epoch": 0.04475770925110132, + "grad_norm": 1.40811546312751, + "learning_rate": 8.87323943661972e-06, + "loss": 1.002477765083313, + "step": 127 + }, + { + "epoch": 0.04511013215859031, + "grad_norm": 1.1638063617076078, + "learning_rate": 8.943661971830987e-06, + "loss": 0.9994120001792908, + "step": 128 + }, + { + "epoch": 0.045462555066079297, + "grad_norm": 1.2118035451866538, + "learning_rate": 9.014084507042254e-06, + "loss": 1.0785832405090332, + "step": 129 + }, + { + "epoch": 0.04581497797356828, + "grad_norm": 1.0820277493757582, + "learning_rate": 9.084507042253522e-06, + "loss": 0.779441237449646, + "step": 130 + }, + { + "epoch": 0.046167400881057266, + "grad_norm": 1.1766256779195974, + "learning_rate": 9.15492957746479e-06, + "loss": 1.0052348375320435, + "step": 131 + }, + { + "epoch": 0.04651982378854626, + "grad_norm": 1.0771619013639089, + "learning_rate": 9.225352112676057e-06, + "loss": 1.0327996015548706, + "step": 132 + }, + { + "epoch": 0.04687224669603524, + "grad_norm": 1.501276619683034, + "learning_rate": 9.295774647887325e-06, + "loss": 1.0643246173858643, + "step": 133 + }, + { + "epoch": 0.04722466960352423, + "grad_norm": 1.1427145785080848, + "learning_rate": 9.366197183098593e-06, + "loss": 0.8449216485023499, + "step": 134 + }, + { + "epoch": 0.04757709251101322, + "grad_norm": 1.2684019730338143, + "learning_rate": 9.43661971830986e-06, + "loss": 0.8867055177688599, + "step": 135 + }, + { + "epoch": 0.047929515418502204, + "grad_norm": 1.4156875615017863, + "learning_rate": 9.507042253521127e-06, + "loss": 1.048499584197998, + "step": 136 + }, + { + "epoch": 0.04828193832599119, + "grad_norm": 1.2120768691141688, + "learning_rate": 9.577464788732394e-06, + "loss": 1.0548617839813232, + "step": 137 + }, + { + "epoch": 0.048634361233480174, + "grad_norm": 1.0679337780928526, + "learning_rate": 9.647887323943664e-06, + "loss": 0.8882845044136047, + "step": 138 + }, + { + "epoch": 0.048986784140969165, + "grad_norm": 1.622342973826323, + "learning_rate": 9.71830985915493e-06, + "loss": 1.032647967338562, + "step": 139 + }, + { + "epoch": 0.04933920704845815, + "grad_norm": 1.058782348686911, + "learning_rate": 9.788732394366198e-06, + "loss": 1.039523959159851, + "step": 140 + }, + { + "epoch": 0.049691629955947135, + "grad_norm": 1.1234982994751406, + "learning_rate": 9.859154929577466e-06, + "loss": 0.8451036214828491, + "step": 141 + }, + { + "epoch": 0.05004405286343613, + "grad_norm": 1.1376284406077708, + "learning_rate": 9.929577464788733e-06, + "loss": 0.8285897970199585, + "step": 142 + }, + { + "epoch": 0.05039647577092511, + "grad_norm": 1.2057497016168632, + "learning_rate": 1e-05, + "loss": 0.9998278021812439, + "step": 143 + }, + { + "epoch": 0.050748898678414096, + "grad_norm": 1.4528082359287422, + "learning_rate": 1.0070422535211269e-05, + "loss": 0.9782301187515259, + "step": 144 + }, + { + "epoch": 0.05110132158590308, + "grad_norm": 1.1663459671948497, + "learning_rate": 1.0140845070422535e-05, + "loss": 1.0557070970535278, + "step": 145 + }, + { + "epoch": 0.05145374449339207, + "grad_norm": 1.159146071512081, + "learning_rate": 1.0211267605633803e-05, + "loss": 0.9516133069992065, + "step": 146 + }, + { + "epoch": 0.05180616740088106, + "grad_norm": 1.1694686158556986, + "learning_rate": 1.0281690140845072e-05, + "loss": 0.8965041637420654, + "step": 147 + }, + { + "epoch": 0.05215859030837004, + "grad_norm": 1.2713520268346183, + "learning_rate": 1.0352112676056338e-05, + "loss": 0.8627057075500488, + "step": 148 + }, + { + "epoch": 0.052511013215859034, + "grad_norm": 1.0456048049111641, + "learning_rate": 1.0422535211267606e-05, + "loss": 0.7627567648887634, + "step": 149 + }, + { + "epoch": 0.05286343612334802, + "grad_norm": 1.2332422487154633, + "learning_rate": 1.0492957746478873e-05, + "loss": 0.8522504568099976, + "step": 150 + }, + { + "epoch": 0.053215859030837004, + "grad_norm": 1.1106957565365498, + "learning_rate": 1.056338028169014e-05, + "loss": 0.7164312601089478, + "step": 151 + }, + { + "epoch": 0.05356828193832599, + "grad_norm": 1.0487512934158103, + "learning_rate": 1.0633802816901409e-05, + "loss": 0.9141941070556641, + "step": 152 + }, + { + "epoch": 0.05392070484581498, + "grad_norm": 1.5228596875919753, + "learning_rate": 1.0704225352112675e-05, + "loss": 0.9145504832267761, + "step": 153 + }, + { + "epoch": 0.054273127753303965, + "grad_norm": 1.19745569358961, + "learning_rate": 1.0774647887323943e-05, + "loss": 0.9851646423339844, + "step": 154 + }, + { + "epoch": 0.05462555066079295, + "grad_norm": 1.1547769204431162, + "learning_rate": 1.0845070422535213e-05, + "loss": 0.9319474697113037, + "step": 155 + }, + { + "epoch": 0.05497797356828194, + "grad_norm": 1.242055483054837, + "learning_rate": 1.0915492957746481e-05, + "loss": 0.995783269405365, + "step": 156 + }, + { + "epoch": 0.05533039647577093, + "grad_norm": 1.270129466753014, + "learning_rate": 1.0985915492957748e-05, + "loss": 0.8636226654052734, + "step": 157 + }, + { + "epoch": 0.05568281938325991, + "grad_norm": 1.26388911778751, + "learning_rate": 1.1056338028169016e-05, + "loss": 0.8860869407653809, + "step": 158 + }, + { + "epoch": 0.056035242290748896, + "grad_norm": 1.24911279543244, + "learning_rate": 1.1126760563380284e-05, + "loss": 0.9256196618080139, + "step": 159 + }, + { + "epoch": 0.05638766519823789, + "grad_norm": 1.1684473229538663, + "learning_rate": 1.119718309859155e-05, + "loss": 0.8217915296554565, + "step": 160 + }, + { + "epoch": 0.05674008810572687, + "grad_norm": 1.2938227991615623, + "learning_rate": 1.1267605633802819e-05, + "loss": 0.9808465838432312, + "step": 161 + }, + { + "epoch": 0.05709251101321586, + "grad_norm": 1.2234654171305366, + "learning_rate": 1.1338028169014087e-05, + "loss": 0.7733014822006226, + "step": 162 + }, + { + "epoch": 0.05744493392070485, + "grad_norm": 1.1428802626649461, + "learning_rate": 1.1408450704225353e-05, + "loss": 0.8581304550170898, + "step": 163 + }, + { + "epoch": 0.057797356828193834, + "grad_norm": 1.3252890457476052, + "learning_rate": 1.1478873239436621e-05, + "loss": 0.9242054224014282, + "step": 164 + }, + { + "epoch": 0.05814977973568282, + "grad_norm": 1.3695567443378234, + "learning_rate": 1.1549295774647888e-05, + "loss": 1.0302021503448486, + "step": 165 + }, + { + "epoch": 0.058502202643171804, + "grad_norm": 1.2950143159958714, + "learning_rate": 1.1619718309859156e-05, + "loss": 0.8954275846481323, + "step": 166 + }, + { + "epoch": 0.058854625550660795, + "grad_norm": 1.1779404187828553, + "learning_rate": 1.1690140845070424e-05, + "loss": 0.891846776008606, + "step": 167 + }, + { + "epoch": 0.05920704845814978, + "grad_norm": 1.1837706775348158, + "learning_rate": 1.176056338028169e-05, + "loss": 0.887005627155304, + "step": 168 + }, + { + "epoch": 0.059559471365638765, + "grad_norm": 1.289448297537656, + "learning_rate": 1.1830985915492958e-05, + "loss": 0.9020301103591919, + "step": 169 + }, + { + "epoch": 0.05991189427312775, + "grad_norm": 1.2185831955131692, + "learning_rate": 1.1901408450704227e-05, + "loss": 0.7925454378128052, + "step": 170 + }, + { + "epoch": 0.06026431718061674, + "grad_norm": 1.31750363404193, + "learning_rate": 1.1971830985915493e-05, + "loss": 0.8058332800865173, + "step": 171 + }, + { + "epoch": 0.060616740088105726, + "grad_norm": 1.2435062872951204, + "learning_rate": 1.2042253521126761e-05, + "loss": 0.892992377281189, + "step": 172 + }, + { + "epoch": 0.06096916299559471, + "grad_norm": 1.0835922361658872, + "learning_rate": 1.211267605633803e-05, + "loss": 0.8482734560966492, + "step": 173 + }, + { + "epoch": 0.0613215859030837, + "grad_norm": 1.2806384537102478, + "learning_rate": 1.2183098591549296e-05, + "loss": 0.8652878999710083, + "step": 174 + }, + { + "epoch": 0.06167400881057269, + "grad_norm": 1.183930720799068, + "learning_rate": 1.2253521126760564e-05, + "loss": 0.8590051531791687, + "step": 175 + }, + { + "epoch": 0.06202643171806167, + "grad_norm": 1.1264180921527844, + "learning_rate": 1.232394366197183e-05, + "loss": 0.7106916904449463, + "step": 176 + }, + { + "epoch": 0.06237885462555066, + "grad_norm": 1.5304901042334342, + "learning_rate": 1.2394366197183098e-05, + "loss": 0.9298936128616333, + "step": 177 + }, + { + "epoch": 0.06273127753303964, + "grad_norm": 1.3380597134261425, + "learning_rate": 1.2464788732394367e-05, + "loss": 1.027758240699768, + "step": 178 + }, + { + "epoch": 0.06308370044052863, + "grad_norm": 1.4071851827143296, + "learning_rate": 1.2535211267605636e-05, + "loss": 0.9576354026794434, + "step": 179 + }, + { + "epoch": 0.06343612334801763, + "grad_norm": 1.476054189108656, + "learning_rate": 1.2605633802816903e-05, + "loss": 0.6881245374679565, + "step": 180 + }, + { + "epoch": 0.0637885462555066, + "grad_norm": 1.736658600923819, + "learning_rate": 1.2676056338028171e-05, + "loss": 0.9629781246185303, + "step": 181 + }, + { + "epoch": 0.0641409691629956, + "grad_norm": 1.3990061114909895, + "learning_rate": 1.2746478873239439e-05, + "loss": 0.849892258644104, + "step": 182 + }, + { + "epoch": 0.06449339207048459, + "grad_norm": 1.2776276047787312, + "learning_rate": 1.2816901408450705e-05, + "loss": 0.9294229745864868, + "step": 183 + }, + { + "epoch": 0.06484581497797356, + "grad_norm": 1.346185395248099, + "learning_rate": 1.2887323943661974e-05, + "loss": 0.9534600973129272, + "step": 184 + }, + { + "epoch": 0.06519823788546256, + "grad_norm": 1.2547825941083024, + "learning_rate": 1.2957746478873242e-05, + "loss": 0.7937755584716797, + "step": 185 + }, + { + "epoch": 0.06555066079295155, + "grad_norm": 1.215372024356157, + "learning_rate": 1.3028169014084508e-05, + "loss": 0.9188590049743652, + "step": 186 + }, + { + "epoch": 0.06590308370044053, + "grad_norm": 1.3372931395210206, + "learning_rate": 1.3098591549295776e-05, + "loss": 0.8775123357772827, + "step": 187 + }, + { + "epoch": 0.06625550660792952, + "grad_norm": 1.2703292803517752, + "learning_rate": 1.3169014084507044e-05, + "loss": 0.8562190532684326, + "step": 188 + }, + { + "epoch": 0.0666079295154185, + "grad_norm": 1.1593142823065046, + "learning_rate": 1.323943661971831e-05, + "loss": 0.9427295327186584, + "step": 189 + }, + { + "epoch": 0.06696035242290749, + "grad_norm": 1.1080518257913534, + "learning_rate": 1.3309859154929579e-05, + "loss": 0.6142286062240601, + "step": 190 + }, + { + "epoch": 0.06731277533039648, + "grad_norm": 1.416041365414943, + "learning_rate": 1.3380281690140845e-05, + "loss": 0.7480863332748413, + "step": 191 + }, + { + "epoch": 0.06766519823788546, + "grad_norm": 1.6287312517465182, + "learning_rate": 1.3450704225352114e-05, + "loss": 0.898857593536377, + "step": 192 + }, + { + "epoch": 0.06801762114537445, + "grad_norm": 1.4737642135415263, + "learning_rate": 1.3521126760563382e-05, + "loss": 0.8584127426147461, + "step": 193 + }, + { + "epoch": 0.06837004405286344, + "grad_norm": 1.2178631494207084, + "learning_rate": 1.3591549295774648e-05, + "loss": 0.9400655031204224, + "step": 194 + }, + { + "epoch": 0.06872246696035242, + "grad_norm": 1.2698602238237462, + "learning_rate": 1.3661971830985916e-05, + "loss": 0.7750787734985352, + "step": 195 + }, + { + "epoch": 0.06907488986784141, + "grad_norm": 1.2474557266398312, + "learning_rate": 1.3732394366197184e-05, + "loss": 0.8530284762382507, + "step": 196 + }, + { + "epoch": 0.0694273127753304, + "grad_norm": 1.3191630227557989, + "learning_rate": 1.380281690140845e-05, + "loss": 0.9019994735717773, + "step": 197 + }, + { + "epoch": 0.06977973568281938, + "grad_norm": 1.1994310415476668, + "learning_rate": 1.3873239436619719e-05, + "loss": 0.7749642133712769, + "step": 198 + }, + { + "epoch": 0.07013215859030837, + "grad_norm": 1.3060142025317714, + "learning_rate": 1.3943661971830987e-05, + "loss": 0.956200122833252, + "step": 199 + }, + { + "epoch": 0.07048458149779736, + "grad_norm": 1.3510407726181874, + "learning_rate": 1.4014084507042253e-05, + "loss": 0.8544470071792603, + "step": 200 + }, + { + "epoch": 0.07083700440528634, + "grad_norm": 1.447521091304659, + "learning_rate": 1.4084507042253522e-05, + "loss": 0.8776387572288513, + "step": 201 + }, + { + "epoch": 0.07118942731277533, + "grad_norm": 1.5340123254246993, + "learning_rate": 1.4154929577464788e-05, + "loss": 0.9949591755867004, + "step": 202 + }, + { + "epoch": 0.07154185022026431, + "grad_norm": 1.306920931788941, + "learning_rate": 1.4225352112676058e-05, + "loss": 0.9616764783859253, + "step": 203 + }, + { + "epoch": 0.0718942731277533, + "grad_norm": 1.3490978686730206, + "learning_rate": 1.4295774647887326e-05, + "loss": 0.9247175455093384, + "step": 204 + }, + { + "epoch": 0.0722466960352423, + "grad_norm": 1.4241509312853966, + "learning_rate": 1.4366197183098594e-05, + "loss": 0.7946479320526123, + "step": 205 + }, + { + "epoch": 0.07259911894273127, + "grad_norm": 1.3949991357763207, + "learning_rate": 1.443661971830986e-05, + "loss": 0.7929860353469849, + "step": 206 + }, + { + "epoch": 0.07295154185022026, + "grad_norm": 1.3725430537583514, + "learning_rate": 1.4507042253521129e-05, + "loss": 0.9215391874313354, + "step": 207 + }, + { + "epoch": 0.07330396475770926, + "grad_norm": 1.4247014676365253, + "learning_rate": 1.4577464788732397e-05, + "loss": 0.8767607808113098, + "step": 208 + }, + { + "epoch": 0.07365638766519823, + "grad_norm": 1.3691339839746066, + "learning_rate": 1.4647887323943663e-05, + "loss": 0.8586276769638062, + "step": 209 + }, + { + "epoch": 0.07400881057268723, + "grad_norm": 1.3252388254138234, + "learning_rate": 1.4718309859154931e-05, + "loss": 0.8680851459503174, + "step": 210 + }, + { + "epoch": 0.07436123348017622, + "grad_norm": 1.2834178375463614, + "learning_rate": 1.47887323943662e-05, + "loss": 0.8887720108032227, + "step": 211 + }, + { + "epoch": 0.0747136563876652, + "grad_norm": 1.4918681608584679, + "learning_rate": 1.4859154929577466e-05, + "loss": 0.8887100219726562, + "step": 212 + }, + { + "epoch": 0.07506607929515419, + "grad_norm": 1.247870788657092, + "learning_rate": 1.4929577464788734e-05, + "loss": 0.9257807731628418, + "step": 213 + }, + { + "epoch": 0.07541850220264318, + "grad_norm": 1.2922967878533598, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.8107355833053589, + "step": 214 + }, + { + "epoch": 0.07577092511013216, + "grad_norm": 1.390091314994072, + "learning_rate": 1.5070422535211269e-05, + "loss": 0.8765913844108582, + "step": 215 + }, + { + "epoch": 0.07612334801762115, + "grad_norm": 1.3936279931065536, + "learning_rate": 1.5140845070422537e-05, + "loss": 0.8973524570465088, + "step": 216 + }, + { + "epoch": 0.07647577092511013, + "grad_norm": 1.310665112588589, + "learning_rate": 1.5211267605633803e-05, + "loss": 0.9194613695144653, + "step": 217 + }, + { + "epoch": 0.07682819383259912, + "grad_norm": 1.4152279415932816, + "learning_rate": 1.528169014084507e-05, + "loss": 0.8832643032073975, + "step": 218 + }, + { + "epoch": 0.07718061674008811, + "grad_norm": 1.465705079678902, + "learning_rate": 1.535211267605634e-05, + "loss": 0.9575356245040894, + "step": 219 + }, + { + "epoch": 0.07753303964757709, + "grad_norm": 1.2268114727867823, + "learning_rate": 1.5422535211267607e-05, + "loss": 0.8302342891693115, + "step": 220 + }, + { + "epoch": 0.07788546255506608, + "grad_norm": 1.2978917843344704, + "learning_rate": 1.5492957746478872e-05, + "loss": 0.7999966144561768, + "step": 221 + }, + { + "epoch": 0.07823788546255507, + "grad_norm": 1.271952593735668, + "learning_rate": 1.556338028169014e-05, + "loss": 0.8201859593391418, + "step": 222 + }, + { + "epoch": 0.07859030837004405, + "grad_norm": 1.635464665304201, + "learning_rate": 1.563380281690141e-05, + "loss": 0.872761607170105, + "step": 223 + }, + { + "epoch": 0.07894273127753304, + "grad_norm": 1.7544850567681591, + "learning_rate": 1.5704225352112677e-05, + "loss": 0.8695409297943115, + "step": 224 + }, + { + "epoch": 0.07929515418502203, + "grad_norm": 1.2478131333285527, + "learning_rate": 1.5774647887323945e-05, + "loss": 0.8532050848007202, + "step": 225 + }, + { + "epoch": 0.07964757709251101, + "grad_norm": 1.5276196879895285, + "learning_rate": 1.5845070422535213e-05, + "loss": 0.7875121235847473, + "step": 226 + }, + { + "epoch": 0.08, + "grad_norm": 1.5837485275916963, + "learning_rate": 1.591549295774648e-05, + "loss": 0.7131509780883789, + "step": 227 + }, + { + "epoch": 0.080352422907489, + "grad_norm": 1.4681482709870555, + "learning_rate": 1.598591549295775e-05, + "loss": 0.9758431911468506, + "step": 228 + }, + { + "epoch": 0.08070484581497797, + "grad_norm": 1.4451165548552447, + "learning_rate": 1.6056338028169017e-05, + "loss": 0.7894232273101807, + "step": 229 + }, + { + "epoch": 0.08105726872246696, + "grad_norm": 1.2417235745587356, + "learning_rate": 1.6126760563380285e-05, + "loss": 0.9933483600616455, + "step": 230 + }, + { + "epoch": 0.08140969162995594, + "grad_norm": 1.4745298800972837, + "learning_rate": 1.619718309859155e-05, + "loss": 0.8424056768417358, + "step": 231 + }, + { + "epoch": 0.08176211453744493, + "grad_norm": 1.4626597398090972, + "learning_rate": 1.6267605633802818e-05, + "loss": 0.7957695126533508, + "step": 232 + }, + { + "epoch": 0.08211453744493392, + "grad_norm": 1.243843455131114, + "learning_rate": 1.6338028169014086e-05, + "loss": 0.8491722345352173, + "step": 233 + }, + { + "epoch": 0.0824669603524229, + "grad_norm": 1.407640698868158, + "learning_rate": 1.6408450704225354e-05, + "loss": 0.7010964751243591, + "step": 234 + }, + { + "epoch": 0.0828193832599119, + "grad_norm": 1.4584433632361322, + "learning_rate": 1.6478873239436623e-05, + "loss": 0.8713864088058472, + "step": 235 + }, + { + "epoch": 0.08317180616740089, + "grad_norm": 1.261328425360657, + "learning_rate": 1.6549295774647887e-05, + "loss": 0.6724761128425598, + "step": 236 + }, + { + "epoch": 0.08352422907488986, + "grad_norm": 1.219837126653021, + "learning_rate": 1.6619718309859155e-05, + "loss": 0.8612109422683716, + "step": 237 + }, + { + "epoch": 0.08387665198237886, + "grad_norm": 1.4745868727167897, + "learning_rate": 1.6690140845070424e-05, + "loss": 0.5697110891342163, + "step": 238 + }, + { + "epoch": 0.08422907488986785, + "grad_norm": 1.2506294676144012, + "learning_rate": 1.676056338028169e-05, + "loss": 0.7877228260040283, + "step": 239 + }, + { + "epoch": 0.08458149779735682, + "grad_norm": 1.1492235860181979, + "learning_rate": 1.683098591549296e-05, + "loss": 0.8751014471054077, + "step": 240 + }, + { + "epoch": 0.08493392070484582, + "grad_norm": 1.527957574033417, + "learning_rate": 1.6901408450704228e-05, + "loss": 0.8731381893157959, + "step": 241 + }, + { + "epoch": 0.08528634361233481, + "grad_norm": 1.291362512763109, + "learning_rate": 1.6971830985915493e-05, + "loss": 0.831383228302002, + "step": 242 + }, + { + "epoch": 0.08563876651982379, + "grad_norm": 1.2699070733171296, + "learning_rate": 1.704225352112676e-05, + "loss": 0.792934238910675, + "step": 243 + }, + { + "epoch": 0.08599118942731278, + "grad_norm": 1.1592748972292606, + "learning_rate": 1.711267605633803e-05, + "loss": 0.6723657846450806, + "step": 244 + }, + { + "epoch": 0.08634361233480176, + "grad_norm": 1.4796981905185658, + "learning_rate": 1.7183098591549297e-05, + "loss": 0.8377546072006226, + "step": 245 + }, + { + "epoch": 0.08669603524229075, + "grad_norm": 1.2727987522874769, + "learning_rate": 1.7253521126760565e-05, + "loss": 0.8073972463607788, + "step": 246 + }, + { + "epoch": 0.08704845814977974, + "grad_norm": 1.6240304260373406, + "learning_rate": 1.732394366197183e-05, + "loss": 0.8913615942001343, + "step": 247 + }, + { + "epoch": 0.08740088105726872, + "grad_norm": 1.4436852067854697, + "learning_rate": 1.7394366197183098e-05, + "loss": 0.9133341312408447, + "step": 248 + }, + { + "epoch": 0.08775330396475771, + "grad_norm": 1.6098073633875791, + "learning_rate": 1.7464788732394366e-05, + "loss": 0.7593938112258911, + "step": 249 + }, + { + "epoch": 0.0881057268722467, + "grad_norm": 1.456505700957212, + "learning_rate": 1.7535211267605638e-05, + "loss": 0.8049266934394836, + "step": 250 + }, + { + "epoch": 0.08845814977973568, + "grad_norm": 1.44397678174898, + "learning_rate": 1.7605633802816902e-05, + "loss": 0.9065679311752319, + "step": 251 + }, + { + "epoch": 0.08881057268722467, + "grad_norm": 1.5285644429403964, + "learning_rate": 1.767605633802817e-05, + "loss": 0.9309085011482239, + "step": 252 + }, + { + "epoch": 0.08916299559471366, + "grad_norm": 1.3367293223358285, + "learning_rate": 1.774647887323944e-05, + "loss": 0.7846949100494385, + "step": 253 + }, + { + "epoch": 0.08951541850220264, + "grad_norm": 1.4721492627949804, + "learning_rate": 1.7816901408450707e-05, + "loss": 0.9153063297271729, + "step": 254 + }, + { + "epoch": 0.08986784140969163, + "grad_norm": 1.2843813691966974, + "learning_rate": 1.7887323943661975e-05, + "loss": 0.7743638157844543, + "step": 255 + }, + { + "epoch": 0.09022026431718062, + "grad_norm": 1.6034162783223496, + "learning_rate": 1.7957746478873243e-05, + "loss": 0.887751579284668, + "step": 256 + }, + { + "epoch": 0.0905726872246696, + "grad_norm": 1.2387435479452011, + "learning_rate": 1.8028169014084508e-05, + "loss": 0.8072899580001831, + "step": 257 + }, + { + "epoch": 0.09092511013215859, + "grad_norm": 1.3642448388425203, + "learning_rate": 1.8098591549295776e-05, + "loss": 0.8275943994522095, + "step": 258 + }, + { + "epoch": 0.09127753303964757, + "grad_norm": 1.3287842865535133, + "learning_rate": 1.8169014084507044e-05, + "loss": 0.8300620913505554, + "step": 259 + }, + { + "epoch": 0.09162995594713656, + "grad_norm": 1.26616505669333, + "learning_rate": 1.8239436619718312e-05, + "loss": 0.6886857748031616, + "step": 260 + }, + { + "epoch": 0.09198237885462555, + "grad_norm": 2.689833624979495, + "learning_rate": 1.830985915492958e-05, + "loss": 0.8190158605575562, + "step": 261 + }, + { + "epoch": 0.09233480176211453, + "grad_norm": 1.3392491700180422, + "learning_rate": 1.8380281690140845e-05, + "loss": 0.8500730991363525, + "step": 262 + }, + { + "epoch": 0.09268722466960352, + "grad_norm": 1.499663410513064, + "learning_rate": 1.8450704225352113e-05, + "loss": 0.8340811729431152, + "step": 263 + }, + { + "epoch": 0.09303964757709252, + "grad_norm": 1.3031308803407857, + "learning_rate": 1.852112676056338e-05, + "loss": 0.8055675029754639, + "step": 264 + }, + { + "epoch": 0.0933920704845815, + "grad_norm": 1.410218243221954, + "learning_rate": 1.859154929577465e-05, + "loss": 0.7956680059432983, + "step": 265 + }, + { + "epoch": 0.09374449339207049, + "grad_norm": 1.4181751660111779, + "learning_rate": 1.8661971830985917e-05, + "loss": 0.8232501745223999, + "step": 266 + }, + { + "epoch": 0.09409691629955948, + "grad_norm": 1.472224530959967, + "learning_rate": 1.8732394366197186e-05, + "loss": 0.8808565139770508, + "step": 267 + }, + { + "epoch": 0.09444933920704845, + "grad_norm": 1.5113548411958122, + "learning_rate": 1.880281690140845e-05, + "loss": 0.885380744934082, + "step": 268 + }, + { + "epoch": 0.09480176211453745, + "grad_norm": 1.5009611452094687, + "learning_rate": 1.887323943661972e-05, + "loss": 0.8408790826797485, + "step": 269 + }, + { + "epoch": 0.09515418502202644, + "grad_norm": 1.395810517840328, + "learning_rate": 1.8943661971830987e-05, + "loss": 0.7089993953704834, + "step": 270 + }, + { + "epoch": 0.09550660792951542, + "grad_norm": 1.280231938177333, + "learning_rate": 1.9014084507042255e-05, + "loss": 0.7941038608551025, + "step": 271 + }, + { + "epoch": 0.09585903083700441, + "grad_norm": 1.5210768015450882, + "learning_rate": 1.9084507042253523e-05, + "loss": 0.8269138932228088, + "step": 272 + }, + { + "epoch": 0.09621145374449339, + "grad_norm": 1.5053903060638305, + "learning_rate": 1.9154929577464788e-05, + "loss": 0.8206192255020142, + "step": 273 + }, + { + "epoch": 0.09656387665198238, + "grad_norm": 1.49737615599854, + "learning_rate": 1.922535211267606e-05, + "loss": 0.9146496653556824, + "step": 274 + }, + { + "epoch": 0.09691629955947137, + "grad_norm": 1.1755726979972605, + "learning_rate": 1.9295774647887327e-05, + "loss": 0.6738560199737549, + "step": 275 + }, + { + "epoch": 0.09726872246696035, + "grad_norm": 1.3169911381980228, + "learning_rate": 1.9366197183098595e-05, + "loss": 0.934916615486145, + "step": 276 + }, + { + "epoch": 0.09762114537444934, + "grad_norm": 1.357245739203775, + "learning_rate": 1.943661971830986e-05, + "loss": 0.8952134847640991, + "step": 277 + }, + { + "epoch": 0.09797356828193833, + "grad_norm": 1.3423178147772294, + "learning_rate": 1.9507042253521128e-05, + "loss": 0.9346420764923096, + "step": 278 + }, + { + "epoch": 0.09832599118942731, + "grad_norm": 1.5698833191970427, + "learning_rate": 1.9577464788732396e-05, + "loss": 0.8781993985176086, + "step": 279 + }, + { + "epoch": 0.0986784140969163, + "grad_norm": 1.4703395142125208, + "learning_rate": 1.9647887323943664e-05, + "loss": 0.8283448219299316, + "step": 280 + }, + { + "epoch": 0.09903083700440529, + "grad_norm": 1.2650765439550704, + "learning_rate": 1.9718309859154933e-05, + "loss": 0.8010722398757935, + "step": 281 + }, + { + "epoch": 0.09938325991189427, + "grad_norm": 1.3576050403922397, + "learning_rate": 1.97887323943662e-05, + "loss": 0.8697119951248169, + "step": 282 + }, + { + "epoch": 0.09973568281938326, + "grad_norm": 1.098837792765385, + "learning_rate": 1.9859154929577465e-05, + "loss": 0.6448882818222046, + "step": 283 + }, + { + "epoch": 0.10008810572687225, + "grad_norm": 1.5101908618325302, + "learning_rate": 1.9929577464788734e-05, + "loss": 0.7782007455825806, + "step": 284 + }, + { + "epoch": 0.10044052863436123, + "grad_norm": 1.455658231417001, + "learning_rate": 2e-05, + "loss": 0.8131508827209473, + "step": 285 + }, + { + "epoch": 0.10079295154185022, + "grad_norm": 1.4413777660177336, + "learning_rate": 1.999999830265561e-05, + "loss": 0.8592134714126587, + "step": 286 + }, + { + "epoch": 0.1011453744493392, + "grad_norm": 1.5671417589518397, + "learning_rate": 1.9999993210623002e-05, + "loss": 0.9374675750732422, + "step": 287 + }, + { + "epoch": 0.10149779735682819, + "grad_norm": 1.5499152824954487, + "learning_rate": 1.9999984723903913e-05, + "loss": 0.8416328430175781, + "step": 288 + }, + { + "epoch": 0.10185022026431718, + "grad_norm": 1.267360297703748, + "learning_rate": 1.9999972842501218e-05, + "loss": 0.7587184906005859, + "step": 289 + }, + { + "epoch": 0.10220264317180616, + "grad_norm": 1.4783535336356979, + "learning_rate": 1.9999957566418956e-05, + "loss": 1.010494351387024, + "step": 290 + }, + { + "epoch": 0.10255506607929515, + "grad_norm": 1.3092025632301814, + "learning_rate": 1.999993889566231e-05, + "loss": 0.7942835092544556, + "step": 291 + }, + { + "epoch": 0.10290748898678415, + "grad_norm": 1.4620379458028798, + "learning_rate": 1.999991683023762e-05, + "loss": 0.9069477915763855, + "step": 292 + }, + { + "epoch": 0.10325991189427312, + "grad_norm": 1.781963673155629, + "learning_rate": 1.9999891370152375e-05, + "loss": 0.8776397705078125, + "step": 293 + }, + { + "epoch": 0.10361233480176212, + "grad_norm": 1.3409879305652028, + "learning_rate": 1.9999862515415216e-05, + "loss": 0.8560416102409363, + "step": 294 + }, + { + "epoch": 0.10396475770925111, + "grad_norm": 1.601676543787724, + "learning_rate": 1.9999830266035942e-05, + "loss": 0.9177321195602417, + "step": 295 + }, + { + "epoch": 0.10431718061674009, + "grad_norm": 1.621521883940329, + "learning_rate": 1.99997946220255e-05, + "loss": 0.8830884695053101, + "step": 296 + }, + { + "epoch": 0.10466960352422908, + "grad_norm": 1.5076951372471592, + "learning_rate": 1.9999755583395987e-05, + "loss": 0.913659930229187, + "step": 297 + }, + { + "epoch": 0.10502202643171807, + "grad_norm": 1.48724181087663, + "learning_rate": 1.999971315016066e-05, + "loss": 0.773309588432312, + "step": 298 + }, + { + "epoch": 0.10537444933920705, + "grad_norm": 1.4640758198016095, + "learning_rate": 1.9999667322333916e-05, + "loss": 0.8432563543319702, + "step": 299 + }, + { + "epoch": 0.10572687224669604, + "grad_norm": 1.5419897004531282, + "learning_rate": 1.999961809993132e-05, + "loss": 0.9632397890090942, + "step": 300 + }, + { + "epoch": 0.10607929515418502, + "grad_norm": 1.4657018761848883, + "learning_rate": 1.999956548296958e-05, + "loss": 0.8205600380897522, + "step": 301 + }, + { + "epoch": 0.10643171806167401, + "grad_norm": 1.2908123355748096, + "learning_rate": 1.9999509471466557e-05, + "loss": 0.8789785504341125, + "step": 302 + }, + { + "epoch": 0.106784140969163, + "grad_norm": 1.4062841050093677, + "learning_rate": 1.999945006544126e-05, + "loss": 0.8445791006088257, + "step": 303 + }, + { + "epoch": 0.10713656387665198, + "grad_norm": 1.3201850616961108, + "learning_rate": 1.9999387264913865e-05, + "loss": 0.8025245666503906, + "step": 304 + }, + { + "epoch": 0.10748898678414097, + "grad_norm": 1.3596018005437036, + "learning_rate": 1.9999321069905688e-05, + "loss": 0.9271318912506104, + "step": 305 + }, + { + "epoch": 0.10784140969162996, + "grad_norm": 1.167387591378785, + "learning_rate": 1.999925148043919e-05, + "loss": 0.809894859790802, + "step": 306 + }, + { + "epoch": 0.10819383259911894, + "grad_norm": 1.4267923203712158, + "learning_rate": 1.999917849653801e-05, + "loss": 0.8940669298171997, + "step": 307 + }, + { + "epoch": 0.10854625550660793, + "grad_norm": 1.466148592973388, + "learning_rate": 1.9999102118226912e-05, + "loss": 0.9301233887672424, + "step": 308 + }, + { + "epoch": 0.10889867841409692, + "grad_norm": 1.271175959298383, + "learning_rate": 1.9999022345531834e-05, + "loss": 0.6429216861724854, + "step": 309 + }, + { + "epoch": 0.1092511013215859, + "grad_norm": 1.3392816449794738, + "learning_rate": 1.999893917847985e-05, + "loss": 0.7199009656906128, + "step": 310 + }, + { + "epoch": 0.10960352422907489, + "grad_norm": 1.2732787140894477, + "learning_rate": 1.999885261709919e-05, + "loss": 0.8312395811080933, + "step": 311 + }, + { + "epoch": 0.10995594713656388, + "grad_norm": 1.4809957988420102, + "learning_rate": 1.999876266141924e-05, + "loss": 0.8187745213508606, + "step": 312 + }, + { + "epoch": 0.11030837004405286, + "grad_norm": 1.2638906346778362, + "learning_rate": 1.9998669311470546e-05, + "loss": 0.8632344603538513, + "step": 313 + }, + { + "epoch": 0.11066079295154185, + "grad_norm": 1.5651718256034985, + "learning_rate": 1.9998572567284787e-05, + "loss": 0.8789447546005249, + "step": 314 + }, + { + "epoch": 0.11101321585903083, + "grad_norm": 1.4657438576086577, + "learning_rate": 1.999847242889481e-05, + "loss": 0.7647864818572998, + "step": 315 + }, + { + "epoch": 0.11136563876651982, + "grad_norm": 1.2962284510646964, + "learning_rate": 1.9998368896334606e-05, + "loss": 0.872633695602417, + "step": 316 + }, + { + "epoch": 0.11171806167400881, + "grad_norm": 1.4704185501053861, + "learning_rate": 1.9998261969639324e-05, + "loss": 0.8249840140342712, + "step": 317 + }, + { + "epoch": 0.11207048458149779, + "grad_norm": 1.6298830469717174, + "learning_rate": 1.999815164884526e-05, + "loss": 0.7558056116104126, + "step": 318 + }, + { + "epoch": 0.11242290748898678, + "grad_norm": 1.3075257157183537, + "learning_rate": 1.9998037933989866e-05, + "loss": 0.7447441220283508, + "step": 319 + }, + { + "epoch": 0.11277533039647578, + "grad_norm": 1.4956646267919036, + "learning_rate": 1.9997920825111743e-05, + "loss": 0.8260442018508911, + "step": 320 + }, + { + "epoch": 0.11312775330396475, + "grad_norm": 1.2866274072297625, + "learning_rate": 1.999780032225065e-05, + "loss": 0.7916134595870972, + "step": 321 + }, + { + "epoch": 0.11348017621145375, + "grad_norm": 1.3548711592442237, + "learning_rate": 1.9997676425447486e-05, + "loss": 0.7460259199142456, + "step": 322 + }, + { + "epoch": 0.11383259911894274, + "grad_norm": 1.4664419676620792, + "learning_rate": 1.9997549134744318e-05, + "loss": 0.9739946126937866, + "step": 323 + }, + { + "epoch": 0.11418502202643172, + "grad_norm": 1.3133090693965692, + "learning_rate": 1.9997418450184352e-05, + "loss": 0.7242900133132935, + "step": 324 + }, + { + "epoch": 0.1145374449339207, + "grad_norm": 1.7023646414032152, + "learning_rate": 1.9997284371811955e-05, + "loss": 0.7645323276519775, + "step": 325 + }, + { + "epoch": 0.1148898678414097, + "grad_norm": 1.3437215758424148, + "learning_rate": 1.9997146899672638e-05, + "loss": 0.7377017736434937, + "step": 326 + }, + { + "epoch": 0.11524229074889868, + "grad_norm": 1.3608732999796416, + "learning_rate": 1.9997006033813076e-05, + "loss": 0.7117934226989746, + "step": 327 + }, + { + "epoch": 0.11559471365638767, + "grad_norm": 1.485158034808982, + "learning_rate": 1.999686177428108e-05, + "loss": 0.8517680168151855, + "step": 328 + }, + { + "epoch": 0.11594713656387665, + "grad_norm": 1.3118416735480631, + "learning_rate": 1.9996714121125626e-05, + "loss": 0.7099400758743286, + "step": 329 + }, + { + "epoch": 0.11629955947136564, + "grad_norm": 1.3949559553781739, + "learning_rate": 1.9996563074396838e-05, + "loss": 0.8581711053848267, + "step": 330 + }, + { + "epoch": 0.11665198237885463, + "grad_norm": 1.322464822656225, + "learning_rate": 1.9996408634145994e-05, + "loss": 0.7841953635215759, + "step": 331 + }, + { + "epoch": 0.11700440528634361, + "grad_norm": 1.2580468593989962, + "learning_rate": 1.9996250800425515e-05, + "loss": 0.7376754879951477, + "step": 332 + }, + { + "epoch": 0.1173568281938326, + "grad_norm": 1.3538742269891202, + "learning_rate": 1.9996089573288985e-05, + "loss": 0.8934558033943176, + "step": 333 + }, + { + "epoch": 0.11770925110132159, + "grad_norm": 1.4597310886631008, + "learning_rate": 1.999592495279113e-05, + "loss": 0.7870250940322876, + "step": 334 + }, + { + "epoch": 0.11806167400881057, + "grad_norm": 1.5788273084375275, + "learning_rate": 1.9995756938987846e-05, + "loss": 0.7026203274726868, + "step": 335 + }, + { + "epoch": 0.11841409691629956, + "grad_norm": 2.206437289778364, + "learning_rate": 1.999558553193616e-05, + "loss": 1.0066381692886353, + "step": 336 + }, + { + "epoch": 0.11876651982378855, + "grad_norm": 1.349262918557434, + "learning_rate": 1.9995410731694255e-05, + "loss": 0.7860246896743774, + "step": 337 + }, + { + "epoch": 0.11911894273127753, + "grad_norm": 1.4261295710834618, + "learning_rate": 1.999523253832148e-05, + "loss": 0.8142588138580322, + "step": 338 + }, + { + "epoch": 0.11947136563876652, + "grad_norm": 1.403543131076251, + "learning_rate": 1.9995050951878317e-05, + "loss": 0.9737639427185059, + "step": 339 + }, + { + "epoch": 0.1198237885462555, + "grad_norm": 1.2538473699838193, + "learning_rate": 1.999486597242642e-05, + "loss": 0.6165765523910522, + "step": 340 + }, + { + "epoch": 0.12017621145374449, + "grad_norm": 1.4403971646421685, + "learning_rate": 1.999467760002857e-05, + "loss": 0.8553996086120605, + "step": 341 + }, + { + "epoch": 0.12052863436123348, + "grad_norm": 1.579218034733104, + "learning_rate": 1.9994485834748725e-05, + "loss": 0.9291022419929504, + "step": 342 + }, + { + "epoch": 0.12088105726872246, + "grad_norm": 1.3583147087232978, + "learning_rate": 1.9994290676651977e-05, + "loss": 0.8309136629104614, + "step": 343 + }, + { + "epoch": 0.12123348017621145, + "grad_norm": 1.2343518052190974, + "learning_rate": 1.999409212580458e-05, + "loss": 0.6963932514190674, + "step": 344 + }, + { + "epoch": 0.12158590308370044, + "grad_norm": 1.126432291251887, + "learning_rate": 1.9993890182273932e-05, + "loss": 0.8220632076263428, + "step": 345 + }, + { + "epoch": 0.12193832599118942, + "grad_norm": 1.5283410369228738, + "learning_rate": 1.9993684846128588e-05, + "loss": 0.8407794237136841, + "step": 346 + }, + { + "epoch": 0.12229074889867841, + "grad_norm": 1.479739244816861, + "learning_rate": 1.9993476117438257e-05, + "loss": 0.795718789100647, + "step": 347 + }, + { + "epoch": 0.1226431718061674, + "grad_norm": 1.3466106447402244, + "learning_rate": 1.9993263996273792e-05, + "loss": 0.7482223510742188, + "step": 348 + }, + { + "epoch": 0.12299559471365638, + "grad_norm": 1.4606743428798505, + "learning_rate": 1.99930484827072e-05, + "loss": 0.814468264579773, + "step": 349 + }, + { + "epoch": 0.12334801762114538, + "grad_norm": 1.5345713664893856, + "learning_rate": 1.9992829576811648e-05, + "loss": 0.8105748891830444, + "step": 350 + }, + { + "epoch": 0.12370044052863437, + "grad_norm": 1.6869192314100032, + "learning_rate": 1.9992607278661437e-05, + "loss": 0.8756073713302612, + "step": 351 + }, + { + "epoch": 0.12405286343612335, + "grad_norm": 1.228330868948225, + "learning_rate": 1.9992381588332043e-05, + "loss": 0.8643946647644043, + "step": 352 + }, + { + "epoch": 0.12440528634361234, + "grad_norm": 1.1468400313164093, + "learning_rate": 1.9992152505900067e-05, + "loss": 0.7691172361373901, + "step": 353 + }, + { + "epoch": 0.12475770925110131, + "grad_norm": 1.3198644948783926, + "learning_rate": 1.9991920031443288e-05, + "loss": 0.716686487197876, + "step": 354 + }, + { + "epoch": 0.12511013215859032, + "grad_norm": 1.39334404424432, + "learning_rate": 1.9991684165040616e-05, + "loss": 0.697482705116272, + "step": 355 + }, + { + "epoch": 0.12546255506607928, + "grad_norm": 1.5087579956634654, + "learning_rate": 1.999144490677212e-05, + "loss": 0.8039460182189941, + "step": 356 + }, + { + "epoch": 0.12581497797356828, + "grad_norm": 1.3206582875495743, + "learning_rate": 1.9991202256719032e-05, + "loss": 0.872138261795044, + "step": 357 + }, + { + "epoch": 0.12616740088105727, + "grad_norm": 1.330801420963485, + "learning_rate": 1.999095621496371e-05, + "loss": 0.8659502267837524, + "step": 358 + }, + { + "epoch": 0.12651982378854626, + "grad_norm": 1.2062023445068855, + "learning_rate": 1.9990706781589682e-05, + "loss": 0.7585660219192505, + "step": 359 + }, + { + "epoch": 0.12687224669603525, + "grad_norm": 1.349814688916852, + "learning_rate": 1.9990453956681626e-05, + "loss": 0.86381995677948, + "step": 360 + }, + { + "epoch": 0.12722466960352422, + "grad_norm": 1.3080210647965176, + "learning_rate": 1.9990197740325365e-05, + "loss": 0.7623461484909058, + "step": 361 + }, + { + "epoch": 0.1275770925110132, + "grad_norm": 1.4247026163468757, + "learning_rate": 1.9989938132607877e-05, + "loss": 0.8262917995452881, + "step": 362 + }, + { + "epoch": 0.1279295154185022, + "grad_norm": 1.3245955099655373, + "learning_rate": 1.9989675133617294e-05, + "loss": 0.7879630327224731, + "step": 363 + }, + { + "epoch": 0.1282819383259912, + "grad_norm": 1.5925116832241206, + "learning_rate": 1.9989408743442892e-05, + "loss": 0.8282565474510193, + "step": 364 + }, + { + "epoch": 0.12863436123348018, + "grad_norm": 1.151308483630064, + "learning_rate": 1.9989138962175105e-05, + "loss": 0.8358104228973389, + "step": 365 + }, + { + "epoch": 0.12898678414096917, + "grad_norm": 1.4831450607430074, + "learning_rate": 1.9988865789905513e-05, + "loss": 0.9111027121543884, + "step": 366 + }, + { + "epoch": 0.12933920704845814, + "grad_norm": 1.4181532995073547, + "learning_rate": 1.9988589226726847e-05, + "loss": 0.766915500164032, + "step": 367 + }, + { + "epoch": 0.12969162995594713, + "grad_norm": 1.3923253104774793, + "learning_rate": 1.9988309272733e-05, + "loss": 0.818048357963562, + "step": 368 + }, + { + "epoch": 0.13004405286343612, + "grad_norm": 1.2625645815303237, + "learning_rate": 1.9988025928019e-05, + "loss": 0.8188307285308838, + "step": 369 + }, + { + "epoch": 0.1303964757709251, + "grad_norm": 1.4656557007271924, + "learning_rate": 1.998773919268104e-05, + "loss": 0.88718181848526, + "step": 370 + }, + { + "epoch": 0.1307488986784141, + "grad_norm": 1.3104922660776017, + "learning_rate": 1.998744906681645e-05, + "loss": 0.9173898696899414, + "step": 371 + }, + { + "epoch": 0.1311013215859031, + "grad_norm": 1.4305544884130297, + "learning_rate": 1.9987155550523725e-05, + "loss": 0.8025110960006714, + "step": 372 + }, + { + "epoch": 0.13145374449339206, + "grad_norm": 1.2328392002659898, + "learning_rate": 1.9986858643902502e-05, + "loss": 0.8931341767311096, + "step": 373 + }, + { + "epoch": 0.13180616740088105, + "grad_norm": 1.258415234092876, + "learning_rate": 1.9986558347053574e-05, + "loss": 0.8813796043395996, + "step": 374 + }, + { + "epoch": 0.13215859030837004, + "grad_norm": 1.3254702068923054, + "learning_rate": 1.9986254660078877e-05, + "loss": 0.8021976947784424, + "step": 375 + }, + { + "epoch": 0.13251101321585904, + "grad_norm": 1.3001638136254743, + "learning_rate": 1.9985947583081506e-05, + "loss": 0.8083860874176025, + "step": 376 + }, + { + "epoch": 0.13286343612334803, + "grad_norm": 1.2519881014381842, + "learning_rate": 1.9985637116165705e-05, + "loss": 0.7639983296394348, + "step": 377 + }, + { + "epoch": 0.133215859030837, + "grad_norm": 1.3308962501940544, + "learning_rate": 1.9985323259436874e-05, + "loss": 0.7775800228118896, + "step": 378 + }, + { + "epoch": 0.13356828193832598, + "grad_norm": 1.3822704707659155, + "learning_rate": 1.9985006013001545e-05, + "loss": 0.8892228603363037, + "step": 379 + }, + { + "epoch": 0.13392070484581498, + "grad_norm": 1.4007373611969895, + "learning_rate": 1.998468537696742e-05, + "loss": 0.9158765077590942, + "step": 380 + }, + { + "epoch": 0.13427312775330397, + "grad_norm": 1.2142103786325267, + "learning_rate": 1.9984361351443343e-05, + "loss": 0.7523722648620605, + "step": 381 + }, + { + "epoch": 0.13462555066079296, + "grad_norm": 1.5406874167870075, + "learning_rate": 1.998403393653932e-05, + "loss": 0.8052740693092346, + "step": 382 + }, + { + "epoch": 0.13497797356828195, + "grad_norm": 4.839014305582762, + "learning_rate": 1.9983703132366484e-05, + "loss": 0.8271476626396179, + "step": 383 + }, + { + "epoch": 0.13533039647577091, + "grad_norm": 1.3724243356768093, + "learning_rate": 1.998336893903714e-05, + "loss": 0.8904454112052917, + "step": 384 + }, + { + "epoch": 0.1356828193832599, + "grad_norm": 1.5086695454887955, + "learning_rate": 1.9983031356664733e-05, + "loss": 0.8705847263336182, + "step": 385 + }, + { + "epoch": 0.1360352422907489, + "grad_norm": 1.3562221939291232, + "learning_rate": 1.9982690385363867e-05, + "loss": 0.8269569873809814, + "step": 386 + }, + { + "epoch": 0.1363876651982379, + "grad_norm": 1.6156870918588995, + "learning_rate": 1.998234602525029e-05, + "loss": 0.9796818494796753, + "step": 387 + }, + { + "epoch": 0.13674008810572688, + "grad_norm": 1.5268638185003427, + "learning_rate": 1.9981998276440892e-05, + "loss": 0.8276596665382385, + "step": 388 + }, + { + "epoch": 0.13709251101321585, + "grad_norm": 1.1979978409172833, + "learning_rate": 1.9981647139053737e-05, + "loss": 0.8739231824874878, + "step": 389 + }, + { + "epoch": 0.13744493392070484, + "grad_norm": 1.517970302113154, + "learning_rate": 1.9981292613208018e-05, + "loss": 0.677521824836731, + "step": 390 + }, + { + "epoch": 0.13779735682819383, + "grad_norm": 1.483399153515808, + "learning_rate": 1.9980934699024084e-05, + "loss": 0.744938313961029, + "step": 391 + }, + { + "epoch": 0.13814977973568282, + "grad_norm": 1.423178346498717, + "learning_rate": 1.998057339662344e-05, + "loss": 0.8367065787315369, + "step": 392 + }, + { + "epoch": 0.1385022026431718, + "grad_norm": 1.6714277386990386, + "learning_rate": 1.9980208706128733e-05, + "loss": 0.775547981262207, + "step": 393 + }, + { + "epoch": 0.1388546255506608, + "grad_norm": 1.244274379470138, + "learning_rate": 1.9979840627663764e-05, + "loss": 0.8287982940673828, + "step": 394 + }, + { + "epoch": 0.13920704845814977, + "grad_norm": 1.429588244120958, + "learning_rate": 1.997946916135349e-05, + "loss": 0.7582247257232666, + "step": 395 + }, + { + "epoch": 0.13955947136563876, + "grad_norm": 1.309709423857836, + "learning_rate": 1.997909430732401e-05, + "loss": 0.968267560005188, + "step": 396 + }, + { + "epoch": 0.13991189427312775, + "grad_norm": 1.4247483192434738, + "learning_rate": 1.9978716065702566e-05, + "loss": 0.8850257396697998, + "step": 397 + }, + { + "epoch": 0.14026431718061674, + "grad_norm": 1.1261344584223945, + "learning_rate": 1.9978334436617574e-05, + "loss": 0.7206246852874756, + "step": 398 + }, + { + "epoch": 0.14061674008810573, + "grad_norm": 1.2702546976441136, + "learning_rate": 1.9977949420198576e-05, + "loss": 0.7833065986633301, + "step": 399 + }, + { + "epoch": 0.14096916299559473, + "grad_norm": 1.2940706461552187, + "learning_rate": 1.9977561016576275e-05, + "loss": 0.7199673652648926, + "step": 400 + }, + { + "epoch": 0.1413215859030837, + "grad_norm": 1.3300807823897647, + "learning_rate": 1.9977169225882522e-05, + "loss": 0.7544811367988586, + "step": 401 + }, + { + "epoch": 0.14167400881057268, + "grad_norm": 1.3500860064281444, + "learning_rate": 1.9976774048250317e-05, + "loss": 0.7528219819068909, + "step": 402 + }, + { + "epoch": 0.14202643171806167, + "grad_norm": 1.230028309495833, + "learning_rate": 1.9976375483813814e-05, + "loss": 0.8025565147399902, + "step": 403 + }, + { + "epoch": 0.14237885462555067, + "grad_norm": 1.271700071603726, + "learning_rate": 1.997597353270831e-05, + "loss": 0.6553962230682373, + "step": 404 + }, + { + "epoch": 0.14273127753303966, + "grad_norm": 1.195900427449374, + "learning_rate": 1.9975568195070253e-05, + "loss": 0.7070015072822571, + "step": 405 + }, + { + "epoch": 0.14308370044052862, + "grad_norm": 1.238996854756085, + "learning_rate": 1.9975159471037247e-05, + "loss": 0.7454725503921509, + "step": 406 + }, + { + "epoch": 0.1434361233480176, + "grad_norm": 1.5517260528670263, + "learning_rate": 1.9974747360748038e-05, + "loss": 0.7074518799781799, + "step": 407 + }, + { + "epoch": 0.1437885462555066, + "grad_norm": 1.4240478656973132, + "learning_rate": 1.9974331864342527e-05, + "loss": 0.6870182752609253, + "step": 408 + }, + { + "epoch": 0.1441409691629956, + "grad_norm": 1.5514938206230895, + "learning_rate": 1.9973912981961763e-05, + "loss": 0.826898455619812, + "step": 409 + }, + { + "epoch": 0.1444933920704846, + "grad_norm": 1.483679538302774, + "learning_rate": 1.997349071374794e-05, + "loss": 0.7244436740875244, + "step": 410 + }, + { + "epoch": 0.14484581497797358, + "grad_norm": 1.2681717185328807, + "learning_rate": 1.9973065059844404e-05, + "loss": 0.6885448694229126, + "step": 411 + }, + { + "epoch": 0.14519823788546254, + "grad_norm": 1.3797417122455713, + "learning_rate": 1.9972636020395653e-05, + "loss": 0.8477644920349121, + "step": 412 + }, + { + "epoch": 0.14555066079295154, + "grad_norm": 1.5051840849568912, + "learning_rate": 1.9972203595547334e-05, + "loss": 0.9432111382484436, + "step": 413 + }, + { + "epoch": 0.14590308370044053, + "grad_norm": 1.351618505603555, + "learning_rate": 1.9971767785446243e-05, + "loss": 1.0101501941680908, + "step": 414 + }, + { + "epoch": 0.14625550660792952, + "grad_norm": 1.421926997117087, + "learning_rate": 1.997132859024032e-05, + "loss": 0.8174984455108643, + "step": 415 + }, + { + "epoch": 0.1466079295154185, + "grad_norm": 1.1573592385577054, + "learning_rate": 1.997088601007866e-05, + "loss": 0.6857198476791382, + "step": 416 + }, + { + "epoch": 0.14696035242290748, + "grad_norm": 1.1795540078822444, + "learning_rate": 1.9970440045111505e-05, + "loss": 0.7742792367935181, + "step": 417 + }, + { + "epoch": 0.14731277533039647, + "grad_norm": 1.783143700583216, + "learning_rate": 1.996999069549025e-05, + "loss": 0.7489269971847534, + "step": 418 + }, + { + "epoch": 0.14766519823788546, + "grad_norm": 1.4327273961807123, + "learning_rate": 1.9969537961367423e-05, + "loss": 0.7362021207809448, + "step": 419 + }, + { + "epoch": 0.14801762114537445, + "grad_norm": 1.3763810595433905, + "learning_rate": 1.996908184289673e-05, + "loss": 0.7596213221549988, + "step": 420 + }, + { + "epoch": 0.14837004405286344, + "grad_norm": 1.3357573192960268, + "learning_rate": 1.9968622340232993e-05, + "loss": 0.7739163637161255, + "step": 421 + }, + { + "epoch": 0.14872246696035243, + "grad_norm": 1.2890109075687697, + "learning_rate": 1.9968159453532215e-05, + "loss": 0.9059790372848511, + "step": 422 + }, + { + "epoch": 0.1490748898678414, + "grad_norm": 1.4830814966077062, + "learning_rate": 1.9967693182951516e-05, + "loss": 0.7298871278762817, + "step": 423 + }, + { + "epoch": 0.1494273127753304, + "grad_norm": 1.3303231094936145, + "learning_rate": 1.9967223528649194e-05, + "loss": 0.7218194007873535, + "step": 424 + }, + { + "epoch": 0.14977973568281938, + "grad_norm": 1.3738677080017252, + "learning_rate": 1.996675049078467e-05, + "loss": 0.8031259179115295, + "step": 425 + }, + { + "epoch": 0.15013215859030837, + "grad_norm": 1.402915539690338, + "learning_rate": 1.9966274069518533e-05, + "loss": 0.8583194613456726, + "step": 426 + }, + { + "epoch": 0.15048458149779737, + "grad_norm": 1.5081794718854693, + "learning_rate": 1.9965794265012514e-05, + "loss": 0.7829155921936035, + "step": 427 + }, + { + "epoch": 0.15083700440528636, + "grad_norm": 1.3040065928659967, + "learning_rate": 1.9965311077429484e-05, + "loss": 0.709203839302063, + "step": 428 + }, + { + "epoch": 0.15118942731277532, + "grad_norm": 1.324153309243564, + "learning_rate": 1.996482450693348e-05, + "loss": 0.7515710592269897, + "step": 429 + }, + { + "epoch": 0.1515418502202643, + "grad_norm": 1.5966034920450463, + "learning_rate": 1.9964334553689674e-05, + "loss": 0.8552615642547607, + "step": 430 + }, + { + "epoch": 0.1518942731277533, + "grad_norm": 1.3833039246024212, + "learning_rate": 1.9963841217864385e-05, + "loss": 0.7946224808692932, + "step": 431 + }, + { + "epoch": 0.1522466960352423, + "grad_norm": 1.351342046961, + "learning_rate": 1.9963344499625087e-05, + "loss": 0.7117756605148315, + "step": 432 + }, + { + "epoch": 0.1525991189427313, + "grad_norm": 1.5677032677150589, + "learning_rate": 1.9962844399140405e-05, + "loss": 0.8892849683761597, + "step": 433 + }, + { + "epoch": 0.15295154185022025, + "grad_norm": 1.6682742006947457, + "learning_rate": 1.9962340916580105e-05, + "loss": 0.9037783145904541, + "step": 434 + }, + { + "epoch": 0.15330396475770924, + "grad_norm": 1.3178590359087465, + "learning_rate": 1.9961834052115104e-05, + "loss": 0.7419179677963257, + "step": 435 + }, + { + "epoch": 0.15365638766519824, + "grad_norm": 1.500659178246394, + "learning_rate": 1.9961323805917464e-05, + "loss": 0.847285270690918, + "step": 436 + }, + { + "epoch": 0.15400881057268723, + "grad_norm": 1.520891708486689, + "learning_rate": 1.99608101781604e-05, + "loss": 0.793263852596283, + "step": 437 + }, + { + "epoch": 0.15436123348017622, + "grad_norm": 1.2927327484478677, + "learning_rate": 1.9960293169018276e-05, + "loss": 0.6600923538208008, + "step": 438 + }, + { + "epoch": 0.1547136563876652, + "grad_norm": 1.178823428760428, + "learning_rate": 1.9959772778666592e-05, + "loss": 0.7642164826393127, + "step": 439 + }, + { + "epoch": 0.15506607929515417, + "grad_norm": 1.4230767051116806, + "learning_rate": 1.995924900728201e-05, + "loss": 0.897221565246582, + "step": 440 + }, + { + "epoch": 0.15541850220264317, + "grad_norm": 1.3912415328195475, + "learning_rate": 1.9958721855042338e-05, + "loss": 0.830953061580658, + "step": 441 + }, + { + "epoch": 0.15577092511013216, + "grad_norm": 1.3683790024985447, + "learning_rate": 1.995819132212652e-05, + "loss": 0.7514863014221191, + "step": 442 + }, + { + "epoch": 0.15612334801762115, + "grad_norm": 1.3179910502987273, + "learning_rate": 1.995765740871466e-05, + "loss": 0.7039257287979126, + "step": 443 + }, + { + "epoch": 0.15647577092511014, + "grad_norm": 1.5017230130600239, + "learning_rate": 1.9957120114988e-05, + "loss": 0.810503363609314, + "step": 444 + }, + { + "epoch": 0.1568281938325991, + "grad_norm": 1.4050071397488821, + "learning_rate": 1.9956579441128942e-05, + "loss": 0.616968035697937, + "step": 445 + }, + { + "epoch": 0.1571806167400881, + "grad_norm": 1.3149075420166694, + "learning_rate": 1.9956035387321024e-05, + "loss": 0.7008740901947021, + "step": 446 + }, + { + "epoch": 0.1575330396475771, + "grad_norm": 1.4992101173925434, + "learning_rate": 1.995548795374893e-05, + "loss": 0.847025454044342, + "step": 447 + }, + { + "epoch": 0.15788546255506608, + "grad_norm": 1.3763555067673139, + "learning_rate": 1.9954937140598506e-05, + "loss": 0.7788053750991821, + "step": 448 + }, + { + "epoch": 0.15823788546255507, + "grad_norm": 1.301728118921247, + "learning_rate": 1.9954382948056735e-05, + "loss": 0.7592896819114685, + "step": 449 + }, + { + "epoch": 0.15859030837004406, + "grad_norm": 1.6001158206313053, + "learning_rate": 1.995382537631174e-05, + "loss": 0.9458491802215576, + "step": 450 + }, + { + "epoch": 0.15894273127753303, + "grad_norm": 1.3218132869761372, + "learning_rate": 1.9953264425552804e-05, + "loss": 0.8069632053375244, + "step": 451 + }, + { + "epoch": 0.15929515418502202, + "grad_norm": 1.316918406992957, + "learning_rate": 1.9952700095970357e-05, + "loss": 0.7876379489898682, + "step": 452 + }, + { + "epoch": 0.159647577092511, + "grad_norm": 1.5440089355741875, + "learning_rate": 1.9952132387755965e-05, + "loss": 0.796333909034729, + "step": 453 + }, + { + "epoch": 0.16, + "grad_norm": 1.243828269503452, + "learning_rate": 1.9951561301102348e-05, + "loss": 0.7171634435653687, + "step": 454 + }, + { + "epoch": 0.160352422907489, + "grad_norm": 1.429835470120866, + "learning_rate": 1.9950986836203374e-05, + "loss": 0.8312792778015137, + "step": 455 + }, + { + "epoch": 0.160704845814978, + "grad_norm": 1.4333167021702193, + "learning_rate": 1.995040899325406e-05, + "loss": 0.7496857643127441, + "step": 456 + }, + { + "epoch": 0.16105726872246695, + "grad_norm": 1.2513531381670333, + "learning_rate": 1.9949827772450555e-05, + "loss": 0.89504075050354, + "step": 457 + }, + { + "epoch": 0.16140969162995594, + "grad_norm": 1.5536951579594835, + "learning_rate": 1.9949243173990172e-05, + "loss": 0.7580761313438416, + "step": 458 + }, + { + "epoch": 0.16176211453744493, + "grad_norm": 1.6782383396512721, + "learning_rate": 1.9948655198071365e-05, + "loss": 0.7826676368713379, + "step": 459 + }, + { + "epoch": 0.16211453744493393, + "grad_norm": 1.5979456835427475, + "learning_rate": 1.9948063844893733e-05, + "loss": 0.7591372728347778, + "step": 460 + }, + { + "epoch": 0.16246696035242292, + "grad_norm": 1.394749193132719, + "learning_rate": 1.994746911465802e-05, + "loss": 0.7366905808448792, + "step": 461 + }, + { + "epoch": 0.16281938325991188, + "grad_norm": 1.2449236570155473, + "learning_rate": 1.9946871007566116e-05, + "loss": 0.7152266502380371, + "step": 462 + }, + { + "epoch": 0.16317180616740087, + "grad_norm": 1.475247855733958, + "learning_rate": 1.994626952382107e-05, + "loss": 0.8411930799484253, + "step": 463 + }, + { + "epoch": 0.16352422907488987, + "grad_norm": 1.1709525471997975, + "learning_rate": 1.9945664663627054e-05, + "loss": 0.6689857244491577, + "step": 464 + }, + { + "epoch": 0.16387665198237886, + "grad_norm": 1.3007920668059838, + "learning_rate": 1.9945056427189408e-05, + "loss": 0.6474499106407166, + "step": 465 + }, + { + "epoch": 0.16422907488986785, + "grad_norm": 1.397646475804827, + "learning_rate": 1.9944444814714604e-05, + "loss": 0.7861372232437134, + "step": 466 + }, + { + "epoch": 0.16458149779735684, + "grad_norm": 1.4072541980161448, + "learning_rate": 1.9943829826410273e-05, + "loss": 0.8301665186882019, + "step": 467 + }, + { + "epoch": 0.1649339207048458, + "grad_norm": 1.1473159016242473, + "learning_rate": 1.9943211462485176e-05, + "loss": 0.661811888217926, + "step": 468 + }, + { + "epoch": 0.1652863436123348, + "grad_norm": 1.4009911983471504, + "learning_rate": 1.9942589723149233e-05, + "loss": 0.7768537402153015, + "step": 469 + }, + { + "epoch": 0.1656387665198238, + "grad_norm": 1.209922489625636, + "learning_rate": 1.9941964608613503e-05, + "loss": 0.6139112710952759, + "step": 470 + }, + { + "epoch": 0.16599118942731278, + "grad_norm": 1.3814257371396368, + "learning_rate": 1.9941336119090193e-05, + "loss": 0.8284693956375122, + "step": 471 + }, + { + "epoch": 0.16634361233480177, + "grad_norm": 1.2594577624707568, + "learning_rate": 1.9940704254792655e-05, + "loss": 0.7281739711761475, + "step": 472 + }, + { + "epoch": 0.16669603524229074, + "grad_norm": 1.4773463672265492, + "learning_rate": 1.994006901593539e-05, + "loss": 0.687767744064331, + "step": 473 + }, + { + "epoch": 0.16704845814977973, + "grad_norm": 1.3067539084660165, + "learning_rate": 1.9939430402734046e-05, + "loss": 0.7553595304489136, + "step": 474 + }, + { + "epoch": 0.16740088105726872, + "grad_norm": 1.5537103296420662, + "learning_rate": 1.99387884154054e-05, + "loss": 0.9263294339179993, + "step": 475 + }, + { + "epoch": 0.1677533039647577, + "grad_norm": 1.5514792381885942, + "learning_rate": 1.9938143054167397e-05, + "loss": 0.7014337182044983, + "step": 476 + }, + { + "epoch": 0.1681057268722467, + "grad_norm": 1.1598559513797833, + "learning_rate": 1.9937494319239112e-05, + "loss": 0.6454538106918335, + "step": 477 + }, + { + "epoch": 0.1684581497797357, + "grad_norm": 1.3402764899565285, + "learning_rate": 1.9936842210840775e-05, + "loss": 0.7792352437973022, + "step": 478 + }, + { + "epoch": 0.16881057268722466, + "grad_norm": 1.481603380133959, + "learning_rate": 1.9936186729193753e-05, + "loss": 0.8773127794265747, + "step": 479 + }, + { + "epoch": 0.16916299559471365, + "grad_norm": 1.3472965431143242, + "learning_rate": 1.993552787452056e-05, + "loss": 0.892439603805542, + "step": 480 + }, + { + "epoch": 0.16951541850220264, + "grad_norm": 1.5839752051025837, + "learning_rate": 1.993486564704486e-05, + "loss": 0.89835524559021, + "step": 481 + }, + { + "epoch": 0.16986784140969163, + "grad_norm": 1.4593777249036533, + "learning_rate": 1.9934200046991453e-05, + "loss": 0.8013701438903809, + "step": 482 + }, + { + "epoch": 0.17022026431718063, + "grad_norm": 1.5168797838116639, + "learning_rate": 1.9933531074586296e-05, + "loss": 0.8086763620376587, + "step": 483 + }, + { + "epoch": 0.17057268722466962, + "grad_norm": 1.4399310447978144, + "learning_rate": 1.9932858730056486e-05, + "loss": 0.7736518383026123, + "step": 484 + }, + { + "epoch": 0.17092511013215858, + "grad_norm": 1.2982542574143365, + "learning_rate": 1.9932183013630257e-05, + "loss": 0.6247539520263672, + "step": 485 + }, + { + "epoch": 0.17127753303964757, + "grad_norm": 1.519445958865324, + "learning_rate": 1.9931503925536996e-05, + "loss": 0.7172006368637085, + "step": 486 + }, + { + "epoch": 0.17162995594713656, + "grad_norm": 1.3043787656359138, + "learning_rate": 1.993082146600723e-05, + "loss": 0.7854465246200562, + "step": 487 + }, + { + "epoch": 0.17198237885462556, + "grad_norm": 1.2038371426907561, + "learning_rate": 1.9930135635272637e-05, + "loss": 0.7018419504165649, + "step": 488 + }, + { + "epoch": 0.17233480176211455, + "grad_norm": 1.2578522146284077, + "learning_rate": 1.9929446433566033e-05, + "loss": 0.783660352230072, + "step": 489 + }, + { + "epoch": 0.1726872246696035, + "grad_norm": 1.4288043068768257, + "learning_rate": 1.992875386112138e-05, + "loss": 1.0166207551956177, + "step": 490 + }, + { + "epoch": 0.1730396475770925, + "grad_norm": 1.5208280960226344, + "learning_rate": 1.9928057918173786e-05, + "loss": 0.7692895531654358, + "step": 491 + }, + { + "epoch": 0.1733920704845815, + "grad_norm": 1.3733404774184526, + "learning_rate": 1.9927358604959503e-05, + "loss": 0.8005259037017822, + "step": 492 + }, + { + "epoch": 0.1737444933920705, + "grad_norm": 1.3189354109245792, + "learning_rate": 1.9926655921715924e-05, + "loss": 0.6780292987823486, + "step": 493 + }, + { + "epoch": 0.17409691629955948, + "grad_norm": 1.2272422506889333, + "learning_rate": 1.9925949868681587e-05, + "loss": 0.6501175165176392, + "step": 494 + }, + { + "epoch": 0.17444933920704847, + "grad_norm": 1.3095934443108421, + "learning_rate": 1.9925240446096176e-05, + "loss": 0.781839907169342, + "step": 495 + }, + { + "epoch": 0.17480176211453743, + "grad_norm": 1.4508599784840917, + "learning_rate": 1.992452765420052e-05, + "loss": 0.7617994546890259, + "step": 496 + }, + { + "epoch": 0.17515418502202643, + "grad_norm": 1.2324738440312524, + "learning_rate": 1.992381149323659e-05, + "loss": 0.8019097447395325, + "step": 497 + }, + { + "epoch": 0.17550660792951542, + "grad_norm": 1.3071824216187324, + "learning_rate": 1.9923091963447496e-05, + "loss": 0.7526847124099731, + "step": 498 + }, + { + "epoch": 0.1758590308370044, + "grad_norm": 1.340463358272731, + "learning_rate": 1.9922369065077497e-05, + "loss": 0.7101150751113892, + "step": 499 + }, + { + "epoch": 0.1762114537444934, + "grad_norm": 1.396850141714641, + "learning_rate": 1.9921642798372e-05, + "loss": 0.8519806861877441, + "step": 500 + }, + { + "epoch": 0.17656387665198237, + "grad_norm": 1.5427241760761283, + "learning_rate": 1.9920913163577542e-05, + "loss": 0.774759829044342, + "step": 501 + }, + { + "epoch": 0.17691629955947136, + "grad_norm": 1.4501760642130928, + "learning_rate": 1.992018016094182e-05, + "loss": 0.8597595691680908, + "step": 502 + }, + { + "epoch": 0.17726872246696035, + "grad_norm": 1.6336800938277667, + "learning_rate": 1.9919443790713658e-05, + "loss": 0.7023826241493225, + "step": 503 + }, + { + "epoch": 0.17762114537444934, + "grad_norm": 1.8758125980343456, + "learning_rate": 1.991870405314303e-05, + "loss": 0.8290892839431763, + "step": 504 + }, + { + "epoch": 0.17797356828193833, + "grad_norm": 1.368620384992611, + "learning_rate": 1.9917960948481062e-05, + "loss": 0.9240517020225525, + "step": 505 + }, + { + "epoch": 0.17832599118942732, + "grad_norm": 1.4203507781601712, + "learning_rate": 1.9917214476980012e-05, + "loss": 0.8247153759002686, + "step": 506 + }, + { + "epoch": 0.1786784140969163, + "grad_norm": 1.5364946844029868, + "learning_rate": 1.991646463889328e-05, + "loss": 0.9101368188858032, + "step": 507 + }, + { + "epoch": 0.17903083700440528, + "grad_norm": 1.3883082747026767, + "learning_rate": 1.9915711434475416e-05, + "loss": 0.7688114643096924, + "step": 508 + }, + { + "epoch": 0.17938325991189427, + "grad_norm": 1.41173691792053, + "learning_rate": 1.9914954863982106e-05, + "loss": 0.820112943649292, + "step": 509 + }, + { + "epoch": 0.17973568281938326, + "grad_norm": 1.2372115494246672, + "learning_rate": 1.9914194927670186e-05, + "loss": 0.6393542289733887, + "step": 510 + }, + { + "epoch": 0.18008810572687226, + "grad_norm": 1.5514274082803117, + "learning_rate": 1.991343162579763e-05, + "loss": 0.9463154673576355, + "step": 511 + }, + { + "epoch": 0.18044052863436125, + "grad_norm": 1.2818287593652882, + "learning_rate": 1.9912664958623556e-05, + "loss": 0.9498215913772583, + "step": 512 + }, + { + "epoch": 0.1807929515418502, + "grad_norm": 1.3538150363158374, + "learning_rate": 1.991189492640822e-05, + "loss": 0.7659052014350891, + "step": 513 + }, + { + "epoch": 0.1811453744493392, + "grad_norm": 1.3014303918670855, + "learning_rate": 1.9911121529413028e-05, + "loss": 0.9946317672729492, + "step": 514 + }, + { + "epoch": 0.1814977973568282, + "grad_norm": 1.2888096801517381, + "learning_rate": 1.991034476790052e-05, + "loss": 0.762086033821106, + "step": 515 + }, + { + "epoch": 0.18185022026431719, + "grad_norm": 1.2685969775930512, + "learning_rate": 1.990956464213438e-05, + "loss": 0.7507720589637756, + "step": 516 + }, + { + "epoch": 0.18220264317180618, + "grad_norm": 1.2567492686992259, + "learning_rate": 1.990878115237945e-05, + "loss": 0.7859716415405273, + "step": 517 + }, + { + "epoch": 0.18255506607929514, + "grad_norm": 1.3199744761398897, + "learning_rate": 1.9907994298901688e-05, + "loss": 0.8585234880447388, + "step": 518 + }, + { + "epoch": 0.18290748898678413, + "grad_norm": 1.2014345702103446, + "learning_rate": 1.990720408196821e-05, + "loss": 0.8569823503494263, + "step": 519 + }, + { + "epoch": 0.18325991189427313, + "grad_norm": 1.4066812868889107, + "learning_rate": 1.990641050184727e-05, + "loss": 0.8297367095947266, + "step": 520 + }, + { + "epoch": 0.18361233480176212, + "grad_norm": 1.4158335601181062, + "learning_rate": 1.9905613558808262e-05, + "loss": 0.7918041348457336, + "step": 521 + }, + { + "epoch": 0.1839647577092511, + "grad_norm": 1.3066639133280875, + "learning_rate": 1.9904813253121727e-05, + "loss": 0.8322931528091431, + "step": 522 + }, + { + "epoch": 0.1843171806167401, + "grad_norm": 1.600997340162295, + "learning_rate": 1.990400958505934e-05, + "loss": 0.6822292804718018, + "step": 523 + }, + { + "epoch": 0.18466960352422906, + "grad_norm": 1.344951810567012, + "learning_rate": 1.9903202554893925e-05, + "loss": 0.8989835977554321, + "step": 524 + }, + { + "epoch": 0.18502202643171806, + "grad_norm": 1.4644963211452282, + "learning_rate": 1.990239216289944e-05, + "loss": 0.671294093132019, + "step": 525 + }, + { + "epoch": 0.18537444933920705, + "grad_norm": 1.3104672306859468, + "learning_rate": 1.990157840935099e-05, + "loss": 0.9045379161834717, + "step": 526 + }, + { + "epoch": 0.18572687224669604, + "grad_norm": 1.2000125993399395, + "learning_rate": 1.990076129452482e-05, + "loss": 0.7117471694946289, + "step": 527 + }, + { + "epoch": 0.18607929515418503, + "grad_norm": 1.406356072194557, + "learning_rate": 1.9899940818698315e-05, + "loss": 0.890752911567688, + "step": 528 + }, + { + "epoch": 0.186431718061674, + "grad_norm": 1.3199977159633904, + "learning_rate": 1.9899116982149994e-05, + "loss": 0.7209222316741943, + "step": 529 + }, + { + "epoch": 0.186784140969163, + "grad_norm": 1.4346812218183875, + "learning_rate": 1.9898289785159534e-05, + "loss": 0.6912863254547119, + "step": 530 + }, + { + "epoch": 0.18713656387665198, + "grad_norm": 1.4271479463954384, + "learning_rate": 1.9897459228007736e-05, + "loss": 0.7060319185256958, + "step": 531 + }, + { + "epoch": 0.18748898678414097, + "grad_norm": 1.2685161281492263, + "learning_rate": 1.9896625310976553e-05, + "loss": 0.6975364685058594, + "step": 532 + }, + { + "epoch": 0.18784140969162996, + "grad_norm": 1.513572022269192, + "learning_rate": 1.989578803434907e-05, + "loss": 0.8576006293296814, + "step": 533 + }, + { + "epoch": 0.18819383259911895, + "grad_norm": 1.3324082350150075, + "learning_rate": 1.9894947398409516e-05, + "loss": 0.7182095646858215, + "step": 534 + }, + { + "epoch": 0.18854625550660792, + "grad_norm": 1.4808328503550712, + "learning_rate": 1.9894103403443265e-05, + "loss": 0.7546031475067139, + "step": 535 + }, + { + "epoch": 0.1888986784140969, + "grad_norm": 1.4412494852286755, + "learning_rate": 1.9893256049736824e-05, + "loss": 0.7083312273025513, + "step": 536 + }, + { + "epoch": 0.1892511013215859, + "grad_norm": 1.178231291011438, + "learning_rate": 1.9892405337577846e-05, + "loss": 0.5614915490150452, + "step": 537 + }, + { + "epoch": 0.1896035242290749, + "grad_norm": 1.440292679191453, + "learning_rate": 1.9891551267255114e-05, + "loss": 0.7647485733032227, + "step": 538 + }, + { + "epoch": 0.18995594713656389, + "grad_norm": 1.2459192275692494, + "learning_rate": 1.9890693839058566e-05, + "loss": 0.776042103767395, + "step": 539 + }, + { + "epoch": 0.19030837004405288, + "grad_norm": 1.4553672353845373, + "learning_rate": 1.9889833053279268e-05, + "loss": 0.7694810628890991, + "step": 540 + }, + { + "epoch": 0.19066079295154184, + "grad_norm": 1.3521577159056863, + "learning_rate": 1.9888968910209433e-05, + "loss": 0.6935995817184448, + "step": 541 + }, + { + "epoch": 0.19101321585903083, + "grad_norm": 1.7084028073476007, + "learning_rate": 1.988810141014241e-05, + "loss": 0.7538039088249207, + "step": 542 + }, + { + "epoch": 0.19136563876651982, + "grad_norm": 1.305324041919721, + "learning_rate": 1.9887230553372686e-05, + "loss": 0.8149158954620361, + "step": 543 + }, + { + "epoch": 0.19171806167400882, + "grad_norm": 1.6378712299065388, + "learning_rate": 1.988635634019589e-05, + "loss": 0.7776780128479004, + "step": 544 + }, + { + "epoch": 0.1920704845814978, + "grad_norm": 1.48919568324374, + "learning_rate": 1.9885478770908793e-05, + "loss": 0.8527307510375977, + "step": 545 + }, + { + "epoch": 0.19242290748898677, + "grad_norm": 1.1804269388923583, + "learning_rate": 1.98845978458093e-05, + "loss": 0.7239484190940857, + "step": 546 + }, + { + "epoch": 0.19277533039647576, + "grad_norm": 1.4096556213691402, + "learning_rate": 1.9883713565196462e-05, + "loss": 0.6937836408615112, + "step": 547 + }, + { + "epoch": 0.19312775330396476, + "grad_norm": 1.3705456771921078, + "learning_rate": 1.9882825929370456e-05, + "loss": 0.8567923903465271, + "step": 548 + }, + { + "epoch": 0.19348017621145375, + "grad_norm": 1.2851787163283013, + "learning_rate": 1.9881934938632615e-05, + "loss": 0.7948861122131348, + "step": 549 + }, + { + "epoch": 0.19383259911894274, + "grad_norm": 1.2883171824741761, + "learning_rate": 1.9881040593285398e-05, + "loss": 0.6808983087539673, + "step": 550 + }, + { + "epoch": 0.19418502202643173, + "grad_norm": 1.349988333670182, + "learning_rate": 1.9880142893632412e-05, + "loss": 0.9089908599853516, + "step": 551 + }, + { + "epoch": 0.1945374449339207, + "grad_norm": 1.373721278775904, + "learning_rate": 1.9879241839978393e-05, + "loss": 0.7947918176651001, + "step": 552 + }, + { + "epoch": 0.1948898678414097, + "grad_norm": 1.1679807856929723, + "learning_rate": 1.9878337432629224e-05, + "loss": 0.880418598651886, + "step": 553 + }, + { + "epoch": 0.19524229074889868, + "grad_norm": 1.4208435552970164, + "learning_rate": 1.9877429671891917e-05, + "loss": 0.8845832347869873, + "step": 554 + }, + { + "epoch": 0.19559471365638767, + "grad_norm": 1.4905396338040395, + "learning_rate": 1.9876518558074638e-05, + "loss": 0.7635341286659241, + "step": 555 + }, + { + "epoch": 0.19594713656387666, + "grad_norm": 1.3799865187677636, + "learning_rate": 1.9875604091486678e-05, + "loss": 0.9301069974899292, + "step": 556 + }, + { + "epoch": 0.19629955947136563, + "grad_norm": 1.255573262915276, + "learning_rate": 1.9874686272438467e-05, + "loss": 0.8788589239120483, + "step": 557 + }, + { + "epoch": 0.19665198237885462, + "grad_norm": 1.2418539833380446, + "learning_rate": 1.987376510124158e-05, + "loss": 0.7452565431594849, + "step": 558 + }, + { + "epoch": 0.1970044052863436, + "grad_norm": 1.5278788646328887, + "learning_rate": 1.9872840578208722e-05, + "loss": 0.819628119468689, + "step": 559 + }, + { + "epoch": 0.1973568281938326, + "grad_norm": 1.4844496784402743, + "learning_rate": 1.9871912703653744e-05, + "loss": 0.7807571291923523, + "step": 560 + }, + { + "epoch": 0.1977092511013216, + "grad_norm": 1.2611555469759475, + "learning_rate": 1.9870981477891626e-05, + "loss": 0.7091392278671265, + "step": 561 + }, + { + "epoch": 0.19806167400881058, + "grad_norm": 1.2433638561435678, + "learning_rate": 1.9870046901238496e-05, + "loss": 0.8174105882644653, + "step": 562 + }, + { + "epoch": 0.19841409691629955, + "grad_norm": 1.2352337461151273, + "learning_rate": 1.9869108974011607e-05, + "loss": 0.696865439414978, + "step": 563 + }, + { + "epoch": 0.19876651982378854, + "grad_norm": 1.4794543945089762, + "learning_rate": 1.986816769652936e-05, + "loss": 0.914303183555603, + "step": 564 + }, + { + "epoch": 0.19911894273127753, + "grad_norm": 1.2869835497381619, + "learning_rate": 1.986722306911129e-05, + "loss": 0.8397856950759888, + "step": 565 + }, + { + "epoch": 0.19947136563876652, + "grad_norm": 1.216082157504287, + "learning_rate": 1.9866275092078066e-05, + "loss": 0.7206380367279053, + "step": 566 + }, + { + "epoch": 0.19982378854625552, + "grad_norm": 1.3259146958291776, + "learning_rate": 1.98653237657515e-05, + "loss": 0.7017316818237305, + "step": 567 + }, + { + "epoch": 0.2001762114537445, + "grad_norm": 1.6365100661152858, + "learning_rate": 1.9864369090454538e-05, + "loss": 0.8797772526741028, + "step": 568 + }, + { + "epoch": 0.20052863436123347, + "grad_norm": 1.3948984288943356, + "learning_rate": 1.9863411066511257e-05, + "loss": 0.6643391847610474, + "step": 569 + }, + { + "epoch": 0.20088105726872246, + "grad_norm": 1.4631143705399865, + "learning_rate": 1.9862449694246878e-05, + "loss": 0.8662393093109131, + "step": 570 + }, + { + "epoch": 0.20123348017621145, + "grad_norm": 1.4103722629610054, + "learning_rate": 1.9861484973987762e-05, + "loss": 0.7766140699386597, + "step": 571 + }, + { + "epoch": 0.20158590308370045, + "grad_norm": 1.4422501075340284, + "learning_rate": 1.9860516906061397e-05, + "loss": 0.8582239151000977, + "step": 572 + }, + { + "epoch": 0.20193832599118944, + "grad_norm": 1.2359229208879663, + "learning_rate": 1.9859545490796414e-05, + "loss": 0.5838385820388794, + "step": 573 + }, + { + "epoch": 0.2022907488986784, + "grad_norm": 1.4256083108556754, + "learning_rate": 1.9858570728522573e-05, + "loss": 0.6715164184570312, + "step": 574 + }, + { + "epoch": 0.2026431718061674, + "grad_norm": 1.604413564730453, + "learning_rate": 1.9857592619570783e-05, + "loss": 0.7665218114852905, + "step": 575 + }, + { + "epoch": 0.20299559471365639, + "grad_norm": 1.3992633216102752, + "learning_rate": 1.985661116427308e-05, + "loss": 0.8060458898544312, + "step": 576 + }, + { + "epoch": 0.20334801762114538, + "grad_norm": 1.3647027340900928, + "learning_rate": 1.985562636296264e-05, + "loss": 0.8354060649871826, + "step": 577 + }, + { + "epoch": 0.20370044052863437, + "grad_norm": 1.61178503454425, + "learning_rate": 1.985463821597376e-05, + "loss": 0.8814351558685303, + "step": 578 + }, + { + "epoch": 0.20405286343612336, + "grad_norm": 1.3581614903846795, + "learning_rate": 1.9853646723641895e-05, + "loss": 0.9068918228149414, + "step": 579 + }, + { + "epoch": 0.20440528634361232, + "grad_norm": 1.4217775001953692, + "learning_rate": 1.9852651886303624e-05, + "loss": 0.7671997547149658, + "step": 580 + }, + { + "epoch": 0.20475770925110132, + "grad_norm": 1.2987191699893856, + "learning_rate": 1.9851653704296664e-05, + "loss": 0.7906886339187622, + "step": 581 + }, + { + "epoch": 0.2051101321585903, + "grad_norm": 1.4550942850887114, + "learning_rate": 1.985065217795987e-05, + "loss": 0.8424232006072998, + "step": 582 + }, + { + "epoch": 0.2054625550660793, + "grad_norm": 1.2767538498679667, + "learning_rate": 1.984964730763322e-05, + "loss": 0.8335819244384766, + "step": 583 + }, + { + "epoch": 0.2058149779735683, + "grad_norm": 1.2913652769028938, + "learning_rate": 1.9848639093657844e-05, + "loss": 0.8340694308280945, + "step": 584 + }, + { + "epoch": 0.20616740088105726, + "grad_norm": 1.3161255240413319, + "learning_rate": 1.9847627536376e-05, + "loss": 0.9228274822235107, + "step": 585 + }, + { + "epoch": 0.20651982378854625, + "grad_norm": 1.548405161064148, + "learning_rate": 1.984661263613107e-05, + "loss": 0.7843449115753174, + "step": 586 + }, + { + "epoch": 0.20687224669603524, + "grad_norm": 1.3039537503613003, + "learning_rate": 1.9845594393267594e-05, + "loss": 0.7411990165710449, + "step": 587 + }, + { + "epoch": 0.20722466960352423, + "grad_norm": 1.3644443695047568, + "learning_rate": 1.9844572808131228e-05, + "loss": 0.7520540356636047, + "step": 588 + }, + { + "epoch": 0.20757709251101322, + "grad_norm": 1.2894133104841217, + "learning_rate": 1.9843547881068763e-05, + "loss": 0.795365571975708, + "step": 589 + }, + { + "epoch": 0.20792951541850221, + "grad_norm": 1.280356655308606, + "learning_rate": 1.984251961242814e-05, + "loss": 0.8415528535842896, + "step": 590 + }, + { + "epoch": 0.20828193832599118, + "grad_norm": 1.4654647998731167, + "learning_rate": 1.9841488002558416e-05, + "loss": 0.8555570244789124, + "step": 591 + }, + { + "epoch": 0.20863436123348017, + "grad_norm": 1.314593410908928, + "learning_rate": 1.9840453051809792e-05, + "loss": 0.8214600086212158, + "step": 592 + }, + { + "epoch": 0.20898678414096916, + "grad_norm": 1.2598900623176714, + "learning_rate": 1.9839414760533607e-05, + "loss": 0.7746415138244629, + "step": 593 + }, + { + "epoch": 0.20933920704845815, + "grad_norm": 1.6285440778435663, + "learning_rate": 1.9838373129082325e-05, + "loss": 1.0861419439315796, + "step": 594 + }, + { + "epoch": 0.20969162995594715, + "grad_norm": 1.327372383451943, + "learning_rate": 1.9837328157809547e-05, + "loss": 0.7530953884124756, + "step": 595 + }, + { + "epoch": 0.21004405286343614, + "grad_norm": 1.420023169388647, + "learning_rate": 1.9836279847070004e-05, + "loss": 0.8811959624290466, + "step": 596 + }, + { + "epoch": 0.2103964757709251, + "grad_norm": 1.2274254083036087, + "learning_rate": 1.9835228197219573e-05, + "loss": 0.7956523299217224, + "step": 597 + }, + { + "epoch": 0.2107488986784141, + "grad_norm": 1.306015861681406, + "learning_rate": 1.9834173208615253e-05, + "loss": 0.8710414171218872, + "step": 598 + }, + { + "epoch": 0.21110132158590308, + "grad_norm": 1.303850147164254, + "learning_rate": 1.983311488161518e-05, + "loss": 0.9057297706604004, + "step": 599 + }, + { + "epoch": 0.21145374449339208, + "grad_norm": 1.2517049783711822, + "learning_rate": 1.983205321657862e-05, + "loss": 0.7531988024711609, + "step": 600 + }, + { + "epoch": 0.21180616740088107, + "grad_norm": 1.392455719061042, + "learning_rate": 1.983098821386598e-05, + "loss": 0.6508063077926636, + "step": 601 + }, + { + "epoch": 0.21215859030837003, + "grad_norm": 1.238668234857589, + "learning_rate": 1.9829919873838796e-05, + "loss": 0.7267025709152222, + "step": 602 + }, + { + "epoch": 0.21251101321585902, + "grad_norm": 1.2232739877442529, + "learning_rate": 1.9828848196859727e-05, + "loss": 0.6930510997772217, + "step": 603 + }, + { + "epoch": 0.21286343612334802, + "grad_norm": 1.4104259448916805, + "learning_rate": 1.9827773183292583e-05, + "loss": 0.7613120079040527, + "step": 604 + }, + { + "epoch": 0.213215859030837, + "grad_norm": 1.2586328753898472, + "learning_rate": 1.9826694833502295e-05, + "loss": 0.763299822807312, + "step": 605 + }, + { + "epoch": 0.213568281938326, + "grad_norm": 1.4431352363644856, + "learning_rate": 1.9825613147854928e-05, + "loss": 0.7599194049835205, + "step": 606 + }, + { + "epoch": 0.213920704845815, + "grad_norm": 1.3487971590690426, + "learning_rate": 1.9824528126717687e-05, + "loss": 0.869399905204773, + "step": 607 + }, + { + "epoch": 0.21427312775330395, + "grad_norm": 1.3853231700631432, + "learning_rate": 1.9823439770458893e-05, + "loss": 0.733409583568573, + "step": 608 + }, + { + "epoch": 0.21462555066079295, + "grad_norm": 1.2766333009964275, + "learning_rate": 1.9822348079448014e-05, + "loss": 0.8302386999130249, + "step": 609 + }, + { + "epoch": 0.21497797356828194, + "grad_norm": 1.1872454682531661, + "learning_rate": 1.9821253054055645e-05, + "loss": 0.8234561681747437, + "step": 610 + }, + { + "epoch": 0.21533039647577093, + "grad_norm": 1.336729476582052, + "learning_rate": 1.9820154694653514e-05, + "loss": 0.81988525390625, + "step": 611 + }, + { + "epoch": 0.21568281938325992, + "grad_norm": 1.1619766622665528, + "learning_rate": 1.9819053001614478e-05, + "loss": 0.6437678933143616, + "step": 612 + }, + { + "epoch": 0.21603524229074889, + "grad_norm": 1.398835884660331, + "learning_rate": 1.9817947975312527e-05, + "loss": 0.8256562948226929, + "step": 613 + }, + { + "epoch": 0.21638766519823788, + "grad_norm": 1.4423824320045469, + "learning_rate": 1.9816839616122787e-05, + "loss": 0.8204725980758667, + "step": 614 + }, + { + "epoch": 0.21674008810572687, + "grad_norm": 1.4648639859051293, + "learning_rate": 1.9815727924421507e-05, + "loss": 0.7492775917053223, + "step": 615 + }, + { + "epoch": 0.21709251101321586, + "grad_norm": 1.4585481343848268, + "learning_rate": 1.9814612900586075e-05, + "loss": 0.629736065864563, + "step": 616 + }, + { + "epoch": 0.21744493392070485, + "grad_norm": 1.3908853161597456, + "learning_rate": 1.9813494544995e-05, + "loss": 0.7974159717559814, + "step": 617 + }, + { + "epoch": 0.21779735682819384, + "grad_norm": 1.4158223772493663, + "learning_rate": 1.981237285802794e-05, + "loss": 0.8367668390274048, + "step": 618 + }, + { + "epoch": 0.2181497797356828, + "grad_norm": 1.2051753792883582, + "learning_rate": 1.9811247840065667e-05, + "loss": 0.7942521572113037, + "step": 619 + }, + { + "epoch": 0.2185022026431718, + "grad_norm": 1.3827277102573685, + "learning_rate": 1.981011949149009e-05, + "loss": 0.7863545417785645, + "step": 620 + }, + { + "epoch": 0.2188546255506608, + "grad_norm": 1.3809343727942922, + "learning_rate": 1.9808987812684247e-05, + "loss": 0.8667019605636597, + "step": 621 + }, + { + "epoch": 0.21920704845814978, + "grad_norm": 1.5738475739563456, + "learning_rate": 1.9807852804032306e-05, + "loss": 0.8555353283882141, + "step": 622 + }, + { + "epoch": 0.21955947136563878, + "grad_norm": 1.244926951925701, + "learning_rate": 1.9806714465919573e-05, + "loss": 0.8170013427734375, + "step": 623 + }, + { + "epoch": 0.21991189427312777, + "grad_norm": 1.331256668600172, + "learning_rate": 1.9805572798732475e-05, + "loss": 0.9277342557907104, + "step": 624 + }, + { + "epoch": 0.22026431718061673, + "grad_norm": 1.4090219105247375, + "learning_rate": 1.980442780285857e-05, + "loss": 0.6536964178085327, + "step": 625 + }, + { + "epoch": 0.22061674008810572, + "grad_norm": 1.4088256669280743, + "learning_rate": 1.980327947868655e-05, + "loss": 0.7197799682617188, + "step": 626 + }, + { + "epoch": 0.22096916299559471, + "grad_norm": 1.1381025512945977, + "learning_rate": 1.980212782660624e-05, + "loss": 0.7558401823043823, + "step": 627 + }, + { + "epoch": 0.2213215859030837, + "grad_norm": 1.4031284519802554, + "learning_rate": 1.9800972847008586e-05, + "loss": 0.7918291091918945, + "step": 628 + }, + { + "epoch": 0.2216740088105727, + "grad_norm": 1.4810910878326864, + "learning_rate": 1.979981454028567e-05, + "loss": 0.7159492373466492, + "step": 629 + }, + { + "epoch": 0.22202643171806166, + "grad_norm": 1.5418605472416471, + "learning_rate": 1.9798652906830694e-05, + "loss": 0.854686439037323, + "step": 630 + }, + { + "epoch": 0.22237885462555065, + "grad_norm": 1.6329149097762432, + "learning_rate": 1.9797487947038007e-05, + "loss": 0.736785888671875, + "step": 631 + }, + { + "epoch": 0.22273127753303965, + "grad_norm": 1.2749674694710476, + "learning_rate": 1.9796319661303065e-05, + "loss": 0.7092996835708618, + "step": 632 + }, + { + "epoch": 0.22308370044052864, + "grad_norm": 1.4592836621170417, + "learning_rate": 1.9795148050022477e-05, + "loss": 0.8890455961227417, + "step": 633 + }, + { + "epoch": 0.22343612334801763, + "grad_norm": 1.2618947600836363, + "learning_rate": 1.979397311359396e-05, + "loss": 0.7476855516433716, + "step": 634 + }, + { + "epoch": 0.22378854625550662, + "grad_norm": 1.4307363207113668, + "learning_rate": 1.979279485241637e-05, + "loss": 0.7810029983520508, + "step": 635 + }, + { + "epoch": 0.22414096916299558, + "grad_norm": 1.2070666788938549, + "learning_rate": 1.9791613266889688e-05, + "loss": 0.6679891347885132, + "step": 636 + }, + { + "epoch": 0.22449339207048458, + "grad_norm": 1.6320710320094325, + "learning_rate": 1.979042835741503e-05, + "loss": 0.809790849685669, + "step": 637 + }, + { + "epoch": 0.22484581497797357, + "grad_norm": 1.6737967848633384, + "learning_rate": 1.9789240124394638e-05, + "loss": 0.8347213268280029, + "step": 638 + }, + { + "epoch": 0.22519823788546256, + "grad_norm": 1.1935958187808327, + "learning_rate": 1.9788048568231875e-05, + "loss": 0.6620997190475464, + "step": 639 + }, + { + "epoch": 0.22555066079295155, + "grad_norm": 1.2898316066784317, + "learning_rate": 1.9786853689331235e-05, + "loss": 0.7727694511413574, + "step": 640 + }, + { + "epoch": 0.22590308370044052, + "grad_norm": 1.2854878709867101, + "learning_rate": 1.9785655488098348e-05, + "loss": 0.7433278560638428, + "step": 641 + }, + { + "epoch": 0.2262555066079295, + "grad_norm": 1.3523753090224933, + "learning_rate": 1.9784453964939966e-05, + "loss": 0.7375571727752686, + "step": 642 + }, + { + "epoch": 0.2266079295154185, + "grad_norm": 1.3285668366741343, + "learning_rate": 1.9783249120263962e-05, + "loss": 0.5838407874107361, + "step": 643 + }, + { + "epoch": 0.2269603524229075, + "grad_norm": 1.3906475095958148, + "learning_rate": 1.978204095447935e-05, + "loss": 0.7120088934898376, + "step": 644 + }, + { + "epoch": 0.22731277533039648, + "grad_norm": 1.5058740006044322, + "learning_rate": 1.9780829467996262e-05, + "loss": 0.7668102383613586, + "step": 645 + }, + { + "epoch": 0.22766519823788547, + "grad_norm": 1.2993959173766831, + "learning_rate": 1.977961466122596e-05, + "loss": 0.748942494392395, + "step": 646 + }, + { + "epoch": 0.22801762114537444, + "grad_norm": 1.476253609353715, + "learning_rate": 1.9778396534580836e-05, + "loss": 0.7569374442100525, + "step": 647 + }, + { + "epoch": 0.22837004405286343, + "grad_norm": 1.352884217242173, + "learning_rate": 1.97771750884744e-05, + "loss": 0.7981363534927368, + "step": 648 + }, + { + "epoch": 0.22872246696035242, + "grad_norm": 1.5069792289976334, + "learning_rate": 1.97759503233213e-05, + "loss": 0.7501301765441895, + "step": 649 + }, + { + "epoch": 0.2290748898678414, + "grad_norm": 1.4079968546467614, + "learning_rate": 1.9774722239537305e-05, + "loss": 0.7880003452301025, + "step": 650 + }, + { + "epoch": 0.2294273127753304, + "grad_norm": 1.3141024886679253, + "learning_rate": 1.977349083753931e-05, + "loss": 0.9007930755615234, + "step": 651 + }, + { + "epoch": 0.2297797356828194, + "grad_norm": 1.1634171776911992, + "learning_rate": 1.9772256117745335e-05, + "loss": 0.6291126012802124, + "step": 652 + }, + { + "epoch": 0.23013215859030836, + "grad_norm": 1.1487631323898542, + "learning_rate": 1.9771018080574534e-05, + "loss": 0.8155031204223633, + "step": 653 + }, + { + "epoch": 0.23048458149779735, + "grad_norm": 1.2941785819245946, + "learning_rate": 1.976977672644718e-05, + "loss": 0.7103240489959717, + "step": 654 + }, + { + "epoch": 0.23083700440528634, + "grad_norm": 1.4170836267106273, + "learning_rate": 1.9768532055784678e-05, + "loss": 0.8590278625488281, + "step": 655 + }, + { + "epoch": 0.23118942731277534, + "grad_norm": 1.6156852038452685, + "learning_rate": 1.9767284069009545e-05, + "loss": 0.7729001641273499, + "step": 656 + }, + { + "epoch": 0.23154185022026433, + "grad_norm": 1.543950265697803, + "learning_rate": 1.9766032766545445e-05, + "loss": 0.8287409543991089, + "step": 657 + }, + { + "epoch": 0.2318942731277533, + "grad_norm": 1.327581925526745, + "learning_rate": 1.9764778148817147e-05, + "loss": 0.8651477098464966, + "step": 658 + }, + { + "epoch": 0.23224669603524228, + "grad_norm": 1.3954780395501065, + "learning_rate": 1.976352021625056e-05, + "loss": 0.7582576274871826, + "step": 659 + }, + { + "epoch": 0.23259911894273128, + "grad_norm": 1.2510605377459358, + "learning_rate": 1.976225896927271e-05, + "loss": 0.6579675078392029, + "step": 660 + }, + { + "epoch": 0.23295154185022027, + "grad_norm": 1.4130234326235036, + "learning_rate": 1.9760994408311757e-05, + "loss": 0.8817700147628784, + "step": 661 + }, + { + "epoch": 0.23330396475770926, + "grad_norm": 1.3799441341137708, + "learning_rate": 1.9759726533796976e-05, + "loss": 0.7241606712341309, + "step": 662 + }, + { + "epoch": 0.23365638766519825, + "grad_norm": 1.2880787484904483, + "learning_rate": 1.9758455346158768e-05, + "loss": 0.7434183359146118, + "step": 663 + }, + { + "epoch": 0.23400881057268721, + "grad_norm": 1.3406860649308125, + "learning_rate": 1.9757180845828663e-05, + "loss": 0.632422685623169, + "step": 664 + }, + { + "epoch": 0.2343612334801762, + "grad_norm": 1.394213400542702, + "learning_rate": 1.9755903033239318e-05, + "loss": 0.7276040315628052, + "step": 665 + }, + { + "epoch": 0.2347136563876652, + "grad_norm": 1.4191729622512466, + "learning_rate": 1.975462190882451e-05, + "loss": 0.8070325255393982, + "step": 666 + }, + { + "epoch": 0.2350660792951542, + "grad_norm": 1.505939347053283, + "learning_rate": 1.9753337473019133e-05, + "loss": 0.867915689945221, + "step": 667 + }, + { + "epoch": 0.23541850220264318, + "grad_norm": 1.2080841146883634, + "learning_rate": 1.9752049726259223e-05, + "loss": 0.7905307412147522, + "step": 668 + }, + { + "epoch": 0.23577092511013215, + "grad_norm": 1.3166867899458456, + "learning_rate": 1.9750758668981925e-05, + "loss": 0.7721420526504517, + "step": 669 + }, + { + "epoch": 0.23612334801762114, + "grad_norm": 1.3746426458674128, + "learning_rate": 1.9749464301625515e-05, + "loss": 0.7926005125045776, + "step": 670 + }, + { + "epoch": 0.23647577092511013, + "grad_norm": 1.387001164209418, + "learning_rate": 1.974816662462939e-05, + "loss": 0.7651785612106323, + "step": 671 + }, + { + "epoch": 0.23682819383259912, + "grad_norm": 1.3285492717471519, + "learning_rate": 1.974686563843407e-05, + "loss": 0.7548795938491821, + "step": 672 + }, + { + "epoch": 0.2371806167400881, + "grad_norm": 1.256836928643264, + "learning_rate": 1.9745561343481197e-05, + "loss": 0.5405399799346924, + "step": 673 + }, + { + "epoch": 0.2375330396475771, + "grad_norm": 1.429166434081011, + "learning_rate": 1.9744253740213542e-05, + "loss": 0.7561137080192566, + "step": 674 + }, + { + "epoch": 0.23788546255506607, + "grad_norm": 1.2880562459402407, + "learning_rate": 1.9742942829074993e-05, + "loss": 0.8809534907341003, + "step": 675 + }, + { + "epoch": 0.23823788546255506, + "grad_norm": 1.4170174919214424, + "learning_rate": 1.974162861051057e-05, + "loss": 0.750350832939148, + "step": 676 + }, + { + "epoch": 0.23859030837004405, + "grad_norm": 1.629083058939835, + "learning_rate": 1.9740311084966398e-05, + "loss": 0.89476478099823, + "step": 677 + }, + { + "epoch": 0.23894273127753304, + "grad_norm": 1.2576348651951754, + "learning_rate": 1.9738990252889748e-05, + "loss": 0.8647176027297974, + "step": 678 + }, + { + "epoch": 0.23929515418502204, + "grad_norm": 1.4086313229573832, + "learning_rate": 1.9737666114728996e-05, + "loss": 0.7331727743148804, + "step": 679 + }, + { + "epoch": 0.239647577092511, + "grad_norm": 1.471872239566745, + "learning_rate": 1.9736338670933642e-05, + "loss": 0.7714364528656006, + "step": 680 + }, + { + "epoch": 0.24, + "grad_norm": 1.2246586432486557, + "learning_rate": 1.973500792195432e-05, + "loss": 0.7840908765792847, + "step": 681 + }, + { + "epoch": 0.24035242290748898, + "grad_norm": 1.5714154435783916, + "learning_rate": 1.9733673868242767e-05, + "loss": 0.8723878860473633, + "step": 682 + }, + { + "epoch": 0.24070484581497797, + "grad_norm": 1.3325473695906174, + "learning_rate": 1.9732336510251864e-05, + "loss": 0.782090425491333, + "step": 683 + }, + { + "epoch": 0.24105726872246697, + "grad_norm": 1.4114017797446734, + "learning_rate": 1.9730995848435594e-05, + "loss": 0.8000990152359009, + "step": 684 + }, + { + "epoch": 0.24140969162995596, + "grad_norm": 1.2098442989857856, + "learning_rate": 1.9729651883249075e-05, + "loss": 0.7499237060546875, + "step": 685 + }, + { + "epoch": 0.24176211453744492, + "grad_norm": 1.376086425817015, + "learning_rate": 1.972830461514854e-05, + "loss": 0.8786858916282654, + "step": 686 + }, + { + "epoch": 0.2421145374449339, + "grad_norm": 1.2058295584451697, + "learning_rate": 1.972695404459134e-05, + "loss": 0.7039557695388794, + "step": 687 + }, + { + "epoch": 0.2424669603524229, + "grad_norm": 1.2391412724176054, + "learning_rate": 1.9725600172035962e-05, + "loss": 0.6699448823928833, + "step": 688 + }, + { + "epoch": 0.2428193832599119, + "grad_norm": 1.4984585662906706, + "learning_rate": 1.9724242997941995e-05, + "loss": 0.6753977537155151, + "step": 689 + }, + { + "epoch": 0.2431718061674009, + "grad_norm": 1.465232022987203, + "learning_rate": 1.9722882522770163e-05, + "loss": 0.7139854431152344, + "step": 690 + }, + { + "epoch": 0.24352422907488988, + "grad_norm": 1.2814158831499989, + "learning_rate": 1.9721518746982296e-05, + "loss": 0.7894896864891052, + "step": 691 + }, + { + "epoch": 0.24387665198237884, + "grad_norm": 1.2615077213285395, + "learning_rate": 1.972015167104136e-05, + "loss": 0.5663755536079407, + "step": 692 + }, + { + "epoch": 0.24422907488986784, + "grad_norm": 2.058599574246893, + "learning_rate": 1.971878129541144e-05, + "loss": 0.8607856035232544, + "step": 693 + }, + { + "epoch": 0.24458149779735683, + "grad_norm": 1.351791839280567, + "learning_rate": 1.9717407620557724e-05, + "loss": 0.7384383678436279, + "step": 694 + }, + { + "epoch": 0.24493392070484582, + "grad_norm": 1.3580988060863546, + "learning_rate": 1.971603064694654e-05, + "loss": 0.6145502328872681, + "step": 695 + }, + { + "epoch": 0.2452863436123348, + "grad_norm": 1.216736398001555, + "learning_rate": 1.9714650375045328e-05, + "loss": 0.6758620738983154, + "step": 696 + }, + { + "epoch": 0.24563876651982378, + "grad_norm": 1.4471588548341505, + "learning_rate": 1.9713266805322643e-05, + "loss": 0.7416598200798035, + "step": 697 + }, + { + "epoch": 0.24599118942731277, + "grad_norm": 1.5476710427855191, + "learning_rate": 1.9711879938248163e-05, + "loss": 0.7603555917739868, + "step": 698 + }, + { + "epoch": 0.24634361233480176, + "grad_norm": 1.442293220466076, + "learning_rate": 1.9710489774292692e-05, + "loss": 0.9119949340820312, + "step": 699 + }, + { + "epoch": 0.24669603524229075, + "grad_norm": 1.3843099449438452, + "learning_rate": 1.9709096313928144e-05, + "loss": 0.6884537935256958, + "step": 700 + }, + { + "epoch": 0.24704845814977974, + "grad_norm": 1.618333940643818, + "learning_rate": 1.9707699557627554e-05, + "loss": 0.7928721904754639, + "step": 701 + }, + { + "epoch": 0.24740088105726873, + "grad_norm": 1.593414442103489, + "learning_rate": 1.970629950586508e-05, + "loss": 0.888218104839325, + "step": 702 + }, + { + "epoch": 0.2477533039647577, + "grad_norm": 1.484965940613647, + "learning_rate": 1.9704896159115997e-05, + "loss": 0.7949875593185425, + "step": 703 + }, + { + "epoch": 0.2481057268722467, + "grad_norm": 1.5094809465076762, + "learning_rate": 1.970348951785669e-05, + "loss": 0.9031823873519897, + "step": 704 + }, + { + "epoch": 0.24845814977973568, + "grad_norm": 1.4099687182713576, + "learning_rate": 1.9702079582564682e-05, + "loss": 0.636865496635437, + "step": 705 + }, + { + "epoch": 0.24881057268722467, + "grad_norm": 1.5392719282626255, + "learning_rate": 1.9700666353718593e-05, + "loss": 0.731717586517334, + "step": 706 + }, + { + "epoch": 0.24916299559471367, + "grad_norm": 1.5878589631749256, + "learning_rate": 1.9699249831798172e-05, + "loss": 0.7571220397949219, + "step": 707 + }, + { + "epoch": 0.24951541850220263, + "grad_norm": 1.5180992539956903, + "learning_rate": 1.969783001728429e-05, + "loss": 0.6112762689590454, + "step": 708 + }, + { + "epoch": 0.24986784140969162, + "grad_norm": 1.3651864060041954, + "learning_rate": 1.9696406910658918e-05, + "loss": 0.6737902164459229, + "step": 709 + }, + { + "epoch": 0.25022026431718064, + "grad_norm": 1.328645038543607, + "learning_rate": 1.9694980512405167e-05, + "loss": 0.6525848507881165, + "step": 710 + }, + { + "epoch": 0.2505726872246696, + "grad_norm": 1.302186292631501, + "learning_rate": 1.9693550823007248e-05, + "loss": 0.9107403755187988, + "step": 711 + }, + { + "epoch": 0.25092511013215857, + "grad_norm": 1.5423262639437814, + "learning_rate": 1.96921178429505e-05, + "loss": 0.7373934984207153, + "step": 712 + }, + { + "epoch": 0.25127753303964756, + "grad_norm": 1.4043304459804222, + "learning_rate": 1.9690681572721377e-05, + "loss": 0.6383399963378906, + "step": 713 + }, + { + "epoch": 0.25162995594713655, + "grad_norm": 1.3203935888344693, + "learning_rate": 1.9689242012807442e-05, + "loss": 0.6600236296653748, + "step": 714 + }, + { + "epoch": 0.25198237885462554, + "grad_norm": 1.6489156261044324, + "learning_rate": 1.9687799163697386e-05, + "loss": 0.9195891618728638, + "step": 715 + }, + { + "epoch": 0.25233480176211454, + "grad_norm": 1.300868905936819, + "learning_rate": 1.968635302588101e-05, + "loss": 0.7122433185577393, + "step": 716 + }, + { + "epoch": 0.2526872246696035, + "grad_norm": 1.467731789065586, + "learning_rate": 1.968490359984923e-05, + "loss": 0.7601606845855713, + "step": 717 + }, + { + "epoch": 0.2530396475770925, + "grad_norm": 1.2967441771844141, + "learning_rate": 1.9683450886094087e-05, + "loss": 0.8216352462768555, + "step": 718 + }, + { + "epoch": 0.2533920704845815, + "grad_norm": 1.4134852768930402, + "learning_rate": 1.9681994885108727e-05, + "loss": 0.8783165216445923, + "step": 719 + }, + { + "epoch": 0.2537444933920705, + "grad_norm": 1.5566095938184208, + "learning_rate": 1.9680535597387416e-05, + "loss": 0.7323269844055176, + "step": 720 + }, + { + "epoch": 0.2540969162995595, + "grad_norm": 1.6250423495927373, + "learning_rate": 1.9679073023425542e-05, + "loss": 0.93906170129776, + "step": 721 + }, + { + "epoch": 0.25444933920704843, + "grad_norm": 1.3857164700730882, + "learning_rate": 1.96776071637196e-05, + "loss": 0.774397611618042, + "step": 722 + }, + { + "epoch": 0.2548017621145374, + "grad_norm": 1.3653604324598565, + "learning_rate": 1.9676138018767204e-05, + "loss": 0.6634535789489746, + "step": 723 + }, + { + "epoch": 0.2551541850220264, + "grad_norm": 1.3364894441034205, + "learning_rate": 1.9674665589067082e-05, + "loss": 0.7705625295639038, + "step": 724 + }, + { + "epoch": 0.2555066079295154, + "grad_norm": 1.5708708799323368, + "learning_rate": 1.9673189875119082e-05, + "loss": 0.706364631652832, + "step": 725 + }, + { + "epoch": 0.2558590308370044, + "grad_norm": 1.2599963014034798, + "learning_rate": 1.9671710877424158e-05, + "loss": 0.7295894622802734, + "step": 726 + }, + { + "epoch": 0.2562114537444934, + "grad_norm": 1.6926806599843667, + "learning_rate": 1.9670228596484383e-05, + "loss": 0.8135089874267578, + "step": 727 + }, + { + "epoch": 0.2565638766519824, + "grad_norm": 1.5978181657651334, + "learning_rate": 1.966874303280295e-05, + "loss": 0.801734209060669, + "step": 728 + }, + { + "epoch": 0.2569162995594714, + "grad_norm": 1.728546952239603, + "learning_rate": 1.9667254186884164e-05, + "loss": 0.8405104875564575, + "step": 729 + }, + { + "epoch": 0.25726872246696036, + "grad_norm": 1.2523029350782668, + "learning_rate": 1.9665762059233434e-05, + "loss": 0.8320014476776123, + "step": 730 + }, + { + "epoch": 0.25762114537444936, + "grad_norm": 1.2667340666882572, + "learning_rate": 1.96642666503573e-05, + "loss": 0.8701308965682983, + "step": 731 + }, + { + "epoch": 0.25797356828193835, + "grad_norm": 1.1982399130470203, + "learning_rate": 1.9662767960763394e-05, + "loss": 0.7980693578720093, + "step": 732 + }, + { + "epoch": 0.25832599118942734, + "grad_norm": 1.3765503313855298, + "learning_rate": 1.9661265990960486e-05, + "loss": 0.7258214950561523, + "step": 733 + }, + { + "epoch": 0.2586784140969163, + "grad_norm": 1.1683887680739682, + "learning_rate": 1.9659760741458444e-05, + "loss": 0.6860172748565674, + "step": 734 + }, + { + "epoch": 0.25903083700440527, + "grad_norm": 1.4034749748766104, + "learning_rate": 1.9658252212768252e-05, + "loss": 0.7438071370124817, + "step": 735 + }, + { + "epoch": 0.25938325991189426, + "grad_norm": 1.6140837506314978, + "learning_rate": 1.9656740405402007e-05, + "loss": 0.8680309057235718, + "step": 736 + }, + { + "epoch": 0.25973568281938325, + "grad_norm": 1.5365221656010954, + "learning_rate": 1.9655225319872925e-05, + "loss": 0.933163046836853, + "step": 737 + }, + { + "epoch": 0.26008810572687224, + "grad_norm": 1.3636194628802456, + "learning_rate": 1.9653706956695333e-05, + "loss": 0.8746597170829773, + "step": 738 + }, + { + "epoch": 0.26044052863436123, + "grad_norm": 1.31799671460777, + "learning_rate": 1.965218531638466e-05, + "loss": 0.857211709022522, + "step": 739 + }, + { + "epoch": 0.2607929515418502, + "grad_norm": 1.313241643085953, + "learning_rate": 1.965066039945746e-05, + "loss": 0.7837733030319214, + "step": 740 + }, + { + "epoch": 0.2611453744493392, + "grad_norm": 1.3527479757495662, + "learning_rate": 1.9649132206431395e-05, + "loss": 0.8401491641998291, + "step": 741 + }, + { + "epoch": 0.2614977973568282, + "grad_norm": 1.14302378839197, + "learning_rate": 1.9647600737825235e-05, + "loss": 0.7070307731628418, + "step": 742 + }, + { + "epoch": 0.2618502202643172, + "grad_norm": 1.756317003631787, + "learning_rate": 1.9646065994158873e-05, + "loss": 0.7649509310722351, + "step": 743 + }, + { + "epoch": 0.2622026431718062, + "grad_norm": 1.5152987231460182, + "learning_rate": 1.9644527975953302e-05, + "loss": 0.7759182453155518, + "step": 744 + }, + { + "epoch": 0.26255506607929513, + "grad_norm": 1.5151017458848213, + "learning_rate": 1.9642986683730626e-05, + "loss": 0.8176295757293701, + "step": 745 + }, + { + "epoch": 0.2629074889867841, + "grad_norm": 1.2974538182792636, + "learning_rate": 1.9641442118014078e-05, + "loss": 0.8406162261962891, + "step": 746 + }, + { + "epoch": 0.2632599118942731, + "grad_norm": 1.3410871141615202, + "learning_rate": 1.9639894279327985e-05, + "loss": 0.8064795732498169, + "step": 747 + }, + { + "epoch": 0.2636123348017621, + "grad_norm": 1.2769637989850176, + "learning_rate": 1.9638343168197784e-05, + "loss": 0.6662956476211548, + "step": 748 + }, + { + "epoch": 0.2639647577092511, + "grad_norm": 1.5105008685571195, + "learning_rate": 1.9636788785150037e-05, + "loss": 0.8747783899307251, + "step": 749 + }, + { + "epoch": 0.2643171806167401, + "grad_norm": 1.4261291763421449, + "learning_rate": 1.9635231130712406e-05, + "loss": 0.7893349528312683, + "step": 750 + }, + { + "epoch": 0.2646696035242291, + "grad_norm": 1.2907133964100823, + "learning_rate": 1.9633670205413665e-05, + "loss": 0.7380903959274292, + "step": 751 + }, + { + "epoch": 0.26502202643171807, + "grad_norm": 1.5293000163357584, + "learning_rate": 1.96321060097837e-05, + "loss": 0.9164873957633972, + "step": 752 + }, + { + "epoch": 0.26537444933920706, + "grad_norm": 1.5448314355627197, + "learning_rate": 1.9630538544353505e-05, + "loss": 0.7664264440536499, + "step": 753 + }, + { + "epoch": 0.26572687224669606, + "grad_norm": 1.4037068281656377, + "learning_rate": 1.9628967809655187e-05, + "loss": 0.8117275238037109, + "step": 754 + }, + { + "epoch": 0.26607929515418505, + "grad_norm": 1.3044642797371147, + "learning_rate": 1.9627393806221967e-05, + "loss": 0.6203808784484863, + "step": 755 + }, + { + "epoch": 0.266431718061674, + "grad_norm": 1.5462507455011187, + "learning_rate": 1.9625816534588163e-05, + "loss": 0.8777878284454346, + "step": 756 + }, + { + "epoch": 0.266784140969163, + "grad_norm": 1.2883365910622429, + "learning_rate": 1.9624235995289212e-05, + "loss": 0.6984438300132751, + "step": 757 + }, + { + "epoch": 0.26713656387665197, + "grad_norm": 1.5746997664717406, + "learning_rate": 1.962265218886166e-05, + "loss": 0.7806228399276733, + "step": 758 + }, + { + "epoch": 0.26748898678414096, + "grad_norm": 1.318579751564355, + "learning_rate": 1.9621065115843155e-05, + "loss": 0.6924373507499695, + "step": 759 + }, + { + "epoch": 0.26784140969162995, + "grad_norm": 1.2867883287922122, + "learning_rate": 1.9619474776772462e-05, + "loss": 0.6809841394424438, + "step": 760 + }, + { + "epoch": 0.26819383259911894, + "grad_norm": 1.3766475304418688, + "learning_rate": 1.961788117218945e-05, + "loss": 0.8346723318099976, + "step": 761 + }, + { + "epoch": 0.26854625550660793, + "grad_norm": 1.3717126814625271, + "learning_rate": 1.96162843026351e-05, + "loss": 0.8000205755233765, + "step": 762 + }, + { + "epoch": 0.2688986784140969, + "grad_norm": 1.256040752163899, + "learning_rate": 1.9614684168651504e-05, + "loss": 0.8026692271232605, + "step": 763 + }, + { + "epoch": 0.2692511013215859, + "grad_norm": 1.4850412299335856, + "learning_rate": 1.961308077078185e-05, + "loss": 0.921292781829834, + "step": 764 + }, + { + "epoch": 0.2696035242290749, + "grad_norm": 1.6577133714061814, + "learning_rate": 1.9611474109570446e-05, + "loss": 0.8018487095832825, + "step": 765 + }, + { + "epoch": 0.2699559471365639, + "grad_norm": 1.2440268554728864, + "learning_rate": 1.9609864185562698e-05, + "loss": 0.7400588989257812, + "step": 766 + }, + { + "epoch": 0.27030837004405284, + "grad_norm": 1.2094824954459686, + "learning_rate": 1.960825099930513e-05, + "loss": 0.6243399977684021, + "step": 767 + }, + { + "epoch": 0.27066079295154183, + "grad_norm": 1.415024134390762, + "learning_rate": 1.9606634551345373e-05, + "loss": 0.7680903673171997, + "step": 768 + }, + { + "epoch": 0.2710132158590308, + "grad_norm": 1.3126349106428246, + "learning_rate": 1.960501484223215e-05, + "loss": 0.8783930540084839, + "step": 769 + }, + { + "epoch": 0.2713656387665198, + "grad_norm": 1.4964756858010921, + "learning_rate": 1.9603391872515308e-05, + "loss": 0.7910561561584473, + "step": 770 + }, + { + "epoch": 0.2717180616740088, + "grad_norm": 1.4400527227532898, + "learning_rate": 1.9601765642745795e-05, + "loss": 0.7325295209884644, + "step": 771 + }, + { + "epoch": 0.2720704845814978, + "grad_norm": 1.3018158119605838, + "learning_rate": 1.9600136153475666e-05, + "loss": 0.7017170190811157, + "step": 772 + }, + { + "epoch": 0.2724229074889868, + "grad_norm": 1.5395904311410002, + "learning_rate": 1.959850340525808e-05, + "loss": 0.9281908273696899, + "step": 773 + }, + { + "epoch": 0.2727753303964758, + "grad_norm": 1.256408104414643, + "learning_rate": 1.95968673986473e-05, + "loss": 0.7421029806137085, + "step": 774 + }, + { + "epoch": 0.27312775330396477, + "grad_norm": 1.3171523536350294, + "learning_rate": 1.9595228134198708e-05, + "loss": 0.7474848031997681, + "step": 775 + }, + { + "epoch": 0.27348017621145376, + "grad_norm": 1.3683438241049553, + "learning_rate": 1.9593585612468776e-05, + "loss": 0.7267760038375854, + "step": 776 + }, + { + "epoch": 0.27383259911894275, + "grad_norm": 1.4883233103137832, + "learning_rate": 1.9591939834015096e-05, + "loss": 0.739683985710144, + "step": 777 + }, + { + "epoch": 0.2741850220264317, + "grad_norm": 1.2437408403604437, + "learning_rate": 1.9590290799396353e-05, + "loss": 0.6615399122238159, + "step": 778 + }, + { + "epoch": 0.2745374449339207, + "grad_norm": 1.5863201035209105, + "learning_rate": 1.9588638509172343e-05, + "loss": 0.8045977354049683, + "step": 779 + }, + { + "epoch": 0.2748898678414097, + "grad_norm": 1.5522608295626732, + "learning_rate": 1.958698296390397e-05, + "loss": 0.8760169744491577, + "step": 780 + }, + { + "epoch": 0.27524229074889867, + "grad_norm": 1.5297761597873432, + "learning_rate": 1.9585324164153236e-05, + "loss": 0.6676662564277649, + "step": 781 + }, + { + "epoch": 0.27559471365638766, + "grad_norm": 1.1706549585314092, + "learning_rate": 1.958366211048326e-05, + "loss": 0.6650630235671997, + "step": 782 + }, + { + "epoch": 0.27594713656387665, + "grad_norm": 1.157826702613003, + "learning_rate": 1.9581996803458248e-05, + "loss": 0.7399466037750244, + "step": 783 + }, + { + "epoch": 0.27629955947136564, + "grad_norm": 1.316772401506473, + "learning_rate": 1.9580328243643528e-05, + "loss": 0.6121753454208374, + "step": 784 + }, + { + "epoch": 0.27665198237885463, + "grad_norm": 1.334268754223845, + "learning_rate": 1.9578656431605515e-05, + "loss": 0.8562870025634766, + "step": 785 + }, + { + "epoch": 0.2770044052863436, + "grad_norm": 1.441421130314338, + "learning_rate": 1.9576981367911746e-05, + "loss": 0.717842161655426, + "step": 786 + }, + { + "epoch": 0.2773568281938326, + "grad_norm": 1.582720247126145, + "learning_rate": 1.9575303053130847e-05, + "loss": 0.802294135093689, + "step": 787 + }, + { + "epoch": 0.2777092511013216, + "grad_norm": 1.2234390313515955, + "learning_rate": 1.957362148783256e-05, + "loss": 0.6636664867401123, + "step": 788 + }, + { + "epoch": 0.2780616740088106, + "grad_norm": 1.3850288575091645, + "learning_rate": 1.9571936672587718e-05, + "loss": 0.7177780866622925, + "step": 789 + }, + { + "epoch": 0.27841409691629954, + "grad_norm": 1.6181532263095582, + "learning_rate": 1.957024860796826e-05, + "loss": 0.8263623714447021, + "step": 790 + }, + { + "epoch": 0.27876651982378853, + "grad_norm": 1.4470113515398348, + "learning_rate": 1.9568557294547244e-05, + "loss": 0.7620534896850586, + "step": 791 + }, + { + "epoch": 0.2791189427312775, + "grad_norm": 1.5101791429302596, + "learning_rate": 1.956686273289881e-05, + "loss": 0.812814474105835, + "step": 792 + }, + { + "epoch": 0.2794713656387665, + "grad_norm": 1.2812553609430264, + "learning_rate": 1.956516492359821e-05, + "loss": 0.6494747400283813, + "step": 793 + }, + { + "epoch": 0.2798237885462555, + "grad_norm": 1.3413487769011592, + "learning_rate": 1.9563463867221793e-05, + "loss": 0.7152044773101807, + "step": 794 + }, + { + "epoch": 0.2801762114537445, + "grad_norm": 1.5718962936709213, + "learning_rate": 1.956175956434702e-05, + "loss": 0.7607219815254211, + "step": 795 + }, + { + "epoch": 0.2805286343612335, + "grad_norm": 1.4483911078118432, + "learning_rate": 1.9560052015552455e-05, + "loss": 0.8793845176696777, + "step": 796 + }, + { + "epoch": 0.2808810572687225, + "grad_norm": 1.509282266928049, + "learning_rate": 1.9558341221417744e-05, + "loss": 0.8314816951751709, + "step": 797 + }, + { + "epoch": 0.28123348017621147, + "grad_norm": 1.2634078342185056, + "learning_rate": 1.9556627182523656e-05, + "loss": 0.8195264339447021, + "step": 798 + }, + { + "epoch": 0.28158590308370046, + "grad_norm": 1.2808718319688324, + "learning_rate": 1.9554909899452055e-05, + "loss": 0.8079999685287476, + "step": 799 + }, + { + "epoch": 0.28193832599118945, + "grad_norm": 1.6867283155032318, + "learning_rate": 1.9553189372785903e-05, + "loss": 0.7614034414291382, + "step": 800 + }, + { + "epoch": 0.2822907488986784, + "grad_norm": 1.13179227916607, + "learning_rate": 1.9551465603109263e-05, + "loss": 0.6271458268165588, + "step": 801 + }, + { + "epoch": 0.2826431718061674, + "grad_norm": 1.5872328004173855, + "learning_rate": 1.9549738591007302e-05, + "loss": 0.8061915040016174, + "step": 802 + }, + { + "epoch": 0.2829955947136564, + "grad_norm": 1.3494758196376249, + "learning_rate": 1.9548008337066294e-05, + "loss": 0.663912296295166, + "step": 803 + }, + { + "epoch": 0.28334801762114536, + "grad_norm": 1.503624867364233, + "learning_rate": 1.9546274841873597e-05, + "loss": 0.7582170963287354, + "step": 804 + }, + { + "epoch": 0.28370044052863436, + "grad_norm": 1.3181032025931252, + "learning_rate": 1.9544538106017682e-05, + "loss": 0.7855465412139893, + "step": 805 + }, + { + "epoch": 0.28405286343612335, + "grad_norm": 1.4462567272335825, + "learning_rate": 1.9542798130088116e-05, + "loss": 0.6976481676101685, + "step": 806 + }, + { + "epoch": 0.28440528634361234, + "grad_norm": 1.8291330426153005, + "learning_rate": 1.954105491467557e-05, + "loss": 0.7678342461585999, + "step": 807 + }, + { + "epoch": 0.28475770925110133, + "grad_norm": 1.2407582020259869, + "learning_rate": 1.9539308460371812e-05, + "loss": 0.6238858699798584, + "step": 808 + }, + { + "epoch": 0.2851101321585903, + "grad_norm": 1.239785831064125, + "learning_rate": 1.95375587677697e-05, + "loss": 0.7756681442260742, + "step": 809 + }, + { + "epoch": 0.2854625550660793, + "grad_norm": 1.462836813794646, + "learning_rate": 1.953580583746321e-05, + "loss": 0.8908202648162842, + "step": 810 + }, + { + "epoch": 0.2858149779735683, + "grad_norm": 1.1795831445984086, + "learning_rate": 1.9534049670047402e-05, + "loss": 0.6769838929176331, + "step": 811 + }, + { + "epoch": 0.28616740088105724, + "grad_norm": 1.2674126564024601, + "learning_rate": 1.953229026611844e-05, + "loss": 0.8452527523040771, + "step": 812 + }, + { + "epoch": 0.28651982378854624, + "grad_norm": 1.1830287106246784, + "learning_rate": 1.9530527626273592e-05, + "loss": 0.7494348287582397, + "step": 813 + }, + { + "epoch": 0.2868722466960352, + "grad_norm": 1.399665317775642, + "learning_rate": 1.9528761751111215e-05, + "loss": 0.7691028714179993, + "step": 814 + }, + { + "epoch": 0.2872246696035242, + "grad_norm": 1.2077153417445161, + "learning_rate": 1.9526992641230768e-05, + "loss": 0.6854703426361084, + "step": 815 + }, + { + "epoch": 0.2875770925110132, + "grad_norm": 1.4832887577463363, + "learning_rate": 1.9525220297232815e-05, + "loss": 0.7520424127578735, + "step": 816 + }, + { + "epoch": 0.2879295154185022, + "grad_norm": 1.499896401145914, + "learning_rate": 1.9523444719719003e-05, + "loss": 0.7894444465637207, + "step": 817 + }, + { + "epoch": 0.2882819383259912, + "grad_norm": 1.4246285223246848, + "learning_rate": 1.952166590929209e-05, + "loss": 0.7835032939910889, + "step": 818 + }, + { + "epoch": 0.2886343612334802, + "grad_norm": 1.4284322886298129, + "learning_rate": 1.9519883866555928e-05, + "loss": 0.7932062149047852, + "step": 819 + }, + { + "epoch": 0.2889867841409692, + "grad_norm": 1.3689283839888975, + "learning_rate": 1.951809859211546e-05, + "loss": 0.7917006015777588, + "step": 820 + }, + { + "epoch": 0.28933920704845817, + "grad_norm": 1.1579342690806749, + "learning_rate": 1.9516310086576734e-05, + "loss": 0.5330606698989868, + "step": 821 + }, + { + "epoch": 0.28969162995594716, + "grad_norm": 1.3284680872988386, + "learning_rate": 1.9514518350546893e-05, + "loss": 0.7243788242340088, + "step": 822 + }, + { + "epoch": 0.2900440528634361, + "grad_norm": 1.5494348453743318, + "learning_rate": 1.9512723384634175e-05, + "loss": 0.7692278623580933, + "step": 823 + }, + { + "epoch": 0.2903964757709251, + "grad_norm": 1.4784711521599085, + "learning_rate": 1.9510925189447916e-05, + "loss": 0.7537804841995239, + "step": 824 + }, + { + "epoch": 0.2907488986784141, + "grad_norm": 1.6724318756605505, + "learning_rate": 1.9509123765598545e-05, + "loss": 0.9168751239776611, + "step": 825 + }, + { + "epoch": 0.2911013215859031, + "grad_norm": 1.3269710873120673, + "learning_rate": 1.9507319113697592e-05, + "loss": 0.7863682508468628, + "step": 826 + }, + { + "epoch": 0.29145374449339206, + "grad_norm": 1.3629585622585614, + "learning_rate": 1.9505511234357677e-05, + "loss": 0.7119239568710327, + "step": 827 + }, + { + "epoch": 0.29180616740088106, + "grad_norm": 1.3362093903256012, + "learning_rate": 1.950370012819252e-05, + "loss": 0.6071019172668457, + "step": 828 + }, + { + "epoch": 0.29215859030837005, + "grad_norm": 1.479432309492857, + "learning_rate": 1.9501885795816937e-05, + "loss": 0.9750580787658691, + "step": 829 + }, + { + "epoch": 0.29251101321585904, + "grad_norm": 1.3797663030734688, + "learning_rate": 1.9500068237846837e-05, + "loss": 0.7465370297431946, + "step": 830 + }, + { + "epoch": 0.29286343612334803, + "grad_norm": 1.3385246660479724, + "learning_rate": 1.949824745489922e-05, + "loss": 0.7821183204650879, + "step": 831 + }, + { + "epoch": 0.293215859030837, + "grad_norm": 1.4671979426722186, + "learning_rate": 1.949642344759219e-05, + "loss": 0.7555009126663208, + "step": 832 + }, + { + "epoch": 0.293568281938326, + "grad_norm": 1.4661056896012497, + "learning_rate": 1.9494596216544942e-05, + "loss": 0.841058075428009, + "step": 833 + }, + { + "epoch": 0.29392070484581495, + "grad_norm": 1.4108089015351581, + "learning_rate": 1.9492765762377762e-05, + "loss": 0.737910270690918, + "step": 834 + }, + { + "epoch": 0.29427312775330394, + "grad_norm": 1.430769526790491, + "learning_rate": 1.9490932085712027e-05, + "loss": 0.6817367076873779, + "step": 835 + }, + { + "epoch": 0.29462555066079293, + "grad_norm": 1.4513808156166068, + "learning_rate": 1.9489095187170218e-05, + "loss": 0.6739218235015869, + "step": 836 + }, + { + "epoch": 0.2949779735682819, + "grad_norm": 1.3318980758968664, + "learning_rate": 1.9487255067375907e-05, + "loss": 0.8632504940032959, + "step": 837 + }, + { + "epoch": 0.2953303964757709, + "grad_norm": 1.4421404890889282, + "learning_rate": 1.9485411726953753e-05, + "loss": 0.6615850925445557, + "step": 838 + }, + { + "epoch": 0.2956828193832599, + "grad_norm": 1.5159755088266125, + "learning_rate": 1.9483565166529515e-05, + "loss": 0.8647087812423706, + "step": 839 + }, + { + "epoch": 0.2960352422907489, + "grad_norm": 1.0418453596772383, + "learning_rate": 1.9481715386730044e-05, + "loss": 0.5152087807655334, + "step": 840 + }, + { + "epoch": 0.2963876651982379, + "grad_norm": 1.265898832717726, + "learning_rate": 1.9479862388183283e-05, + "loss": 0.7942806482315063, + "step": 841 + }, + { + "epoch": 0.2967400881057269, + "grad_norm": 1.4728087242398047, + "learning_rate": 1.947800617151826e-05, + "loss": 0.6364283561706543, + "step": 842 + }, + { + "epoch": 0.2970925110132159, + "grad_norm": 1.322764194763318, + "learning_rate": 1.9476146737365112e-05, + "loss": 0.8278179168701172, + "step": 843 + }, + { + "epoch": 0.29744493392070487, + "grad_norm": 1.4629329991948483, + "learning_rate": 1.9474284086355057e-05, + "loss": 0.7369956970214844, + "step": 844 + }, + { + "epoch": 0.29779735682819386, + "grad_norm": 1.314820640789224, + "learning_rate": 1.9472418219120403e-05, + "loss": 0.6879928112030029, + "step": 845 + }, + { + "epoch": 0.2981497797356828, + "grad_norm": 1.357319714737328, + "learning_rate": 1.9470549136294554e-05, + "loss": 0.8312973976135254, + "step": 846 + }, + { + "epoch": 0.2985022026431718, + "grad_norm": 1.3920317025034097, + "learning_rate": 1.946867683851201e-05, + "loss": 0.8102964162826538, + "step": 847 + }, + { + "epoch": 0.2988546255506608, + "grad_norm": 1.502648854525568, + "learning_rate": 1.9466801326408355e-05, + "loss": 0.6136792898178101, + "step": 848 + }, + { + "epoch": 0.29920704845814977, + "grad_norm": 1.4028028409959845, + "learning_rate": 1.946492260062027e-05, + "loss": 0.6388760805130005, + "step": 849 + }, + { + "epoch": 0.29955947136563876, + "grad_norm": 1.2376129930975477, + "learning_rate": 1.9463040661785516e-05, + "loss": 0.6443628072738647, + "step": 850 + }, + { + "epoch": 0.29991189427312775, + "grad_norm": 1.4811436209967876, + "learning_rate": 1.9461155510542962e-05, + "loss": 0.7763667702674866, + "step": 851 + }, + { + "epoch": 0.30026431718061675, + "grad_norm": 1.3770267697185403, + "learning_rate": 1.9459267147532555e-05, + "loss": 0.8040921688079834, + "step": 852 + }, + { + "epoch": 0.30061674008810574, + "grad_norm": 1.5848801035694915, + "learning_rate": 1.9457375573395334e-05, + "loss": 0.6271079778671265, + "step": 853 + }, + { + "epoch": 0.30096916299559473, + "grad_norm": 1.4490523944014555, + "learning_rate": 1.945548078877343e-05, + "loss": 0.6970022916793823, + "step": 854 + }, + { + "epoch": 0.3013215859030837, + "grad_norm": 1.5989299247861681, + "learning_rate": 1.9453582794310063e-05, + "loss": 0.8283002972602844, + "step": 855 + }, + { + "epoch": 0.3016740088105727, + "grad_norm": 1.3183590658260465, + "learning_rate": 1.9451681590649545e-05, + "loss": 0.7989551424980164, + "step": 856 + }, + { + "epoch": 0.30202643171806165, + "grad_norm": 1.6791390781024222, + "learning_rate": 1.9449777178437274e-05, + "loss": 0.7000687122344971, + "step": 857 + }, + { + "epoch": 0.30237885462555064, + "grad_norm": 1.8022925697135672, + "learning_rate": 1.944786955831974e-05, + "loss": 0.8005126714706421, + "step": 858 + }, + { + "epoch": 0.30273127753303963, + "grad_norm": 1.4999207234472591, + "learning_rate": 1.9445958730944515e-05, + "loss": 0.7060712575912476, + "step": 859 + }, + { + "epoch": 0.3030837004405286, + "grad_norm": 1.4072429406012825, + "learning_rate": 1.9444044696960277e-05, + "loss": 0.6979726552963257, + "step": 860 + }, + { + "epoch": 0.3034361233480176, + "grad_norm": 1.4515999764557612, + "learning_rate": 1.9442127457016768e-05, + "loss": 0.7916465401649475, + "step": 861 + }, + { + "epoch": 0.3037885462555066, + "grad_norm": 1.1660322947657744, + "learning_rate": 1.944020701176484e-05, + "loss": 0.6980502009391785, + "step": 862 + }, + { + "epoch": 0.3041409691629956, + "grad_norm": 1.7481448087127538, + "learning_rate": 1.943828336185642e-05, + "loss": 0.8479218482971191, + "step": 863 + }, + { + "epoch": 0.3044933920704846, + "grad_norm": 1.6916771358958562, + "learning_rate": 1.9436356507944532e-05, + "loss": 0.8374297022819519, + "step": 864 + }, + { + "epoch": 0.3048458149779736, + "grad_norm": 1.3059238044039985, + "learning_rate": 1.943442645068328e-05, + "loss": 0.6871248483657837, + "step": 865 + }, + { + "epoch": 0.3051982378854626, + "grad_norm": 1.4668202087885096, + "learning_rate": 1.9432493190727854e-05, + "loss": 0.92267906665802, + "step": 866 + }, + { + "epoch": 0.30555066079295157, + "grad_norm": 1.7147503992363287, + "learning_rate": 1.9430556728734543e-05, + "loss": 0.7068654298782349, + "step": 867 + }, + { + "epoch": 0.3059030837004405, + "grad_norm": 1.354783765213683, + "learning_rate": 1.942861706536071e-05, + "loss": 0.830272912979126, + "step": 868 + }, + { + "epoch": 0.3062555066079295, + "grad_norm": 1.5223972366721212, + "learning_rate": 1.9426674201264814e-05, + "loss": 0.7996113300323486, + "step": 869 + }, + { + "epoch": 0.3066079295154185, + "grad_norm": 1.7576483396811688, + "learning_rate": 1.9424728137106398e-05, + "loss": 0.7519441843032837, + "step": 870 + }, + { + "epoch": 0.3069603524229075, + "grad_norm": 1.92300313533063, + "learning_rate": 1.9422778873546084e-05, + "loss": 0.5812790393829346, + "step": 871 + }, + { + "epoch": 0.30731277533039647, + "grad_norm": 1.058437605318741, + "learning_rate": 1.9420826411245595e-05, + "loss": 0.5953323841094971, + "step": 872 + }, + { + "epoch": 0.30766519823788546, + "grad_norm": 1.3954802825469448, + "learning_rate": 1.941887075086772e-05, + "loss": 0.8307937979698181, + "step": 873 + }, + { + "epoch": 0.30801762114537445, + "grad_norm": 1.5122839417773277, + "learning_rate": 1.9416911893076358e-05, + "loss": 0.7753443121910095, + "step": 874 + }, + { + "epoch": 0.30837004405286345, + "grad_norm": 1.3848386830658772, + "learning_rate": 1.9414949838536468e-05, + "loss": 0.8803520798683167, + "step": 875 + }, + { + "epoch": 0.30872246696035244, + "grad_norm": 1.3111930018969615, + "learning_rate": 1.9412984587914115e-05, + "loss": 0.6811587810516357, + "step": 876 + }, + { + "epoch": 0.30907488986784143, + "grad_norm": 1.3880932208512609, + "learning_rate": 1.9411016141876438e-05, + "loss": 0.802099347114563, + "step": 877 + }, + { + "epoch": 0.3094273127753304, + "grad_norm": 1.560285458084049, + "learning_rate": 1.940904450109166e-05, + "loss": 0.7325229644775391, + "step": 878 + }, + { + "epoch": 0.30977973568281936, + "grad_norm": 1.5126812875374416, + "learning_rate": 1.9407069666229097e-05, + "loss": 0.6515973210334778, + "step": 879 + }, + { + "epoch": 0.31013215859030835, + "grad_norm": 1.2990709527675965, + "learning_rate": 1.9405091637959138e-05, + "loss": 0.7314589619636536, + "step": 880 + }, + { + "epoch": 0.31048458149779734, + "grad_norm": 1.2146229290292494, + "learning_rate": 1.9403110416953267e-05, + "loss": 0.6668078303337097, + "step": 881 + }, + { + "epoch": 0.31083700440528633, + "grad_norm": 1.4214853985415763, + "learning_rate": 1.9401126003884047e-05, + "loss": 0.693236231803894, + "step": 882 + }, + { + "epoch": 0.3111894273127753, + "grad_norm": 2.210010730425174, + "learning_rate": 1.939913839942512e-05, + "loss": 0.8242754936218262, + "step": 883 + }, + { + "epoch": 0.3115418502202643, + "grad_norm": 1.4121001226290237, + "learning_rate": 1.939714760425122e-05, + "loss": 0.7776592373847961, + "step": 884 + }, + { + "epoch": 0.3118942731277533, + "grad_norm": 1.6297557283357365, + "learning_rate": 1.9395153619038158e-05, + "loss": 0.7023555636405945, + "step": 885 + }, + { + "epoch": 0.3122466960352423, + "grad_norm": 1.333512905730993, + "learning_rate": 1.939315644446283e-05, + "loss": 0.690382182598114, + "step": 886 + }, + { + "epoch": 0.3125991189427313, + "grad_norm": 1.4632969046362096, + "learning_rate": 1.9391156081203214e-05, + "loss": 0.7590082287788391, + "step": 887 + }, + { + "epoch": 0.3129515418502203, + "grad_norm": 1.3672878296080273, + "learning_rate": 1.9389152529938377e-05, + "loss": 0.7378168702125549, + "step": 888 + }, + { + "epoch": 0.3133039647577093, + "grad_norm": 1.3616414763479574, + "learning_rate": 1.938714579134845e-05, + "loss": 0.7036890983581543, + "step": 889 + }, + { + "epoch": 0.3136563876651982, + "grad_norm": 1.4808362954559244, + "learning_rate": 1.938513586611467e-05, + "loss": 0.8881829977035522, + "step": 890 + }, + { + "epoch": 0.3140088105726872, + "grad_norm": 1.5370313355999317, + "learning_rate": 1.9383122754919342e-05, + "loss": 0.7467600107192993, + "step": 891 + }, + { + "epoch": 0.3143612334801762, + "grad_norm": 1.6168044285318155, + "learning_rate": 1.938110645844585e-05, + "loss": 0.9358077049255371, + "step": 892 + }, + { + "epoch": 0.3147136563876652, + "grad_norm": 1.3982581442164577, + "learning_rate": 1.9379086977378664e-05, + "loss": 0.7751256227493286, + "step": 893 + }, + { + "epoch": 0.3150660792951542, + "grad_norm": 1.3071717433837386, + "learning_rate": 1.9377064312403338e-05, + "loss": 0.8020666837692261, + "step": 894 + }, + { + "epoch": 0.31541850220264317, + "grad_norm": 1.2076526617304193, + "learning_rate": 1.9375038464206507e-05, + "loss": 0.7251513004302979, + "step": 895 + }, + { + "epoch": 0.31577092511013216, + "grad_norm": 1.3323484110232422, + "learning_rate": 1.9373009433475874e-05, + "loss": 0.7163990139961243, + "step": 896 + }, + { + "epoch": 0.31612334801762115, + "grad_norm": 1.7368098259899396, + "learning_rate": 1.937097722090024e-05, + "loss": 0.7208842039108276, + "step": 897 + }, + { + "epoch": 0.31647577092511014, + "grad_norm": 1.3890083085574685, + "learning_rate": 1.9368941827169475e-05, + "loss": 0.7660849690437317, + "step": 898 + }, + { + "epoch": 0.31682819383259914, + "grad_norm": 1.4598849060474621, + "learning_rate": 1.9366903252974532e-05, + "loss": 0.7017598152160645, + "step": 899 + }, + { + "epoch": 0.31718061674008813, + "grad_norm": 1.1578269588811556, + "learning_rate": 1.9364861499007443e-05, + "loss": 0.6831692457199097, + "step": 900 + }, + { + "epoch": 0.3175330396475771, + "grad_norm": 1.5110843884258551, + "learning_rate": 1.936281656596132e-05, + "loss": 0.6555520296096802, + "step": 901 + }, + { + "epoch": 0.31788546255506606, + "grad_norm": 1.5455350998398028, + "learning_rate": 1.9360768454530356e-05, + "loss": 0.7401334047317505, + "step": 902 + }, + { + "epoch": 0.31823788546255505, + "grad_norm": 1.445337217541868, + "learning_rate": 1.935871716540982e-05, + "loss": 0.7415893077850342, + "step": 903 + }, + { + "epoch": 0.31859030837004404, + "grad_norm": 1.280838808592201, + "learning_rate": 1.935666269929606e-05, + "loss": 0.8254752159118652, + "step": 904 + }, + { + "epoch": 0.31894273127753303, + "grad_norm": 1.4164353369528349, + "learning_rate": 1.9354605056886505e-05, + "loss": 0.708149254322052, + "step": 905 + }, + { + "epoch": 0.319295154185022, + "grad_norm": 5.868993531178127, + "learning_rate": 1.9352544238879654e-05, + "loss": 0.8084006905555725, + "step": 906 + }, + { + "epoch": 0.319647577092511, + "grad_norm": 1.264327413823813, + "learning_rate": 1.93504802459751e-05, + "loss": 0.8039542436599731, + "step": 907 + }, + { + "epoch": 0.32, + "grad_norm": 1.3552380315038073, + "learning_rate": 1.93484130788735e-05, + "loss": 0.7563241720199585, + "step": 908 + }, + { + "epoch": 0.320352422907489, + "grad_norm": 1.4802258000623036, + "learning_rate": 1.9346342738276593e-05, + "loss": 0.7972971200942993, + "step": 909 + }, + { + "epoch": 0.320704845814978, + "grad_norm": 1.2978401429696003, + "learning_rate": 1.93442692248872e-05, + "loss": 0.6693121790885925, + "step": 910 + }, + { + "epoch": 0.321057268722467, + "grad_norm": 1.567978048588056, + "learning_rate": 1.9342192539409203e-05, + "loss": 0.6597858667373657, + "step": 911 + }, + { + "epoch": 0.321409691629956, + "grad_norm": 1.368700143265877, + "learning_rate": 1.934011268254758e-05, + "loss": 0.6771499514579773, + "step": 912 + }, + { + "epoch": 0.3217621145374449, + "grad_norm": 1.2365987861589656, + "learning_rate": 1.9338029655008375e-05, + "loss": 0.6903397440910339, + "step": 913 + }, + { + "epoch": 0.3221145374449339, + "grad_norm": 1.1408319382533163, + "learning_rate": 1.9335943457498717e-05, + "loss": 0.6287999153137207, + "step": 914 + }, + { + "epoch": 0.3224669603524229, + "grad_norm": 1.6382789883498257, + "learning_rate": 1.93338540907268e-05, + "loss": 0.7199264764785767, + "step": 915 + }, + { + "epoch": 0.3228193832599119, + "grad_norm": 1.3951711845041654, + "learning_rate": 1.9331761555401896e-05, + "loss": 0.6960160732269287, + "step": 916 + }, + { + "epoch": 0.3231718061674009, + "grad_norm": 1.4692108732272398, + "learning_rate": 1.932966585223436e-05, + "loss": 0.8981958627700806, + "step": 917 + }, + { + "epoch": 0.32352422907488987, + "grad_norm": 1.5685980092664367, + "learning_rate": 1.932756698193562e-05, + "loss": 0.786432147026062, + "step": 918 + }, + { + "epoch": 0.32387665198237886, + "grad_norm": 1.5208274085752962, + "learning_rate": 1.9325464945218172e-05, + "loss": 0.7260904312133789, + "step": 919 + }, + { + "epoch": 0.32422907488986785, + "grad_norm": 1.5076861367086136, + "learning_rate": 1.9323359742795595e-05, + "loss": 0.715835452079773, + "step": 920 + }, + { + "epoch": 0.32458149779735684, + "grad_norm": 1.5022880591009429, + "learning_rate": 1.932125137538254e-05, + "loss": 0.6312157511711121, + "step": 921 + }, + { + "epoch": 0.32493392070484584, + "grad_norm": 1.3825485581433186, + "learning_rate": 1.931913984369473e-05, + "loss": 0.7565821409225464, + "step": 922 + }, + { + "epoch": 0.3252863436123348, + "grad_norm": 1.3787375139479208, + "learning_rate": 1.931702514844896e-05, + "loss": 0.6866531372070312, + "step": 923 + }, + { + "epoch": 0.32563876651982376, + "grad_norm": 2.06933203374066, + "learning_rate": 1.9314907290363117e-05, + "loss": 0.879021167755127, + "step": 924 + }, + { + "epoch": 0.32599118942731276, + "grad_norm": 1.4876230584538193, + "learning_rate": 1.9312786270156135e-05, + "loss": 0.6972150802612305, + "step": 925 + }, + { + "epoch": 0.32634361233480175, + "grad_norm": 1.5939077112190465, + "learning_rate": 1.9310662088548042e-05, + "loss": 0.8735189437866211, + "step": 926 + }, + { + "epoch": 0.32669603524229074, + "grad_norm": 1.4550040646675775, + "learning_rate": 1.930853474625993e-05, + "loss": 0.6114254593849182, + "step": 927 + }, + { + "epoch": 0.32704845814977973, + "grad_norm": 1.5850836788259668, + "learning_rate": 1.930640424401396e-05, + "loss": 0.8032322525978088, + "step": 928 + }, + { + "epoch": 0.3274008810572687, + "grad_norm": 1.3666090686487828, + "learning_rate": 1.9304270582533376e-05, + "loss": 0.7391160726547241, + "step": 929 + }, + { + "epoch": 0.3277533039647577, + "grad_norm": 1.3744182830455962, + "learning_rate": 1.930213376254249e-05, + "loss": 0.7055366039276123, + "step": 930 + }, + { + "epoch": 0.3281057268722467, + "grad_norm": 1.3717314993069374, + "learning_rate": 1.9299993784766684e-05, + "loss": 0.671670138835907, + "step": 931 + }, + { + "epoch": 0.3284581497797357, + "grad_norm": 1.4961694507376992, + "learning_rate": 1.9297850649932416e-05, + "loss": 0.7486976385116577, + "step": 932 + }, + { + "epoch": 0.3288105726872247, + "grad_norm": 1.3777653583239398, + "learning_rate": 1.929570435876721e-05, + "loss": 0.8767625093460083, + "step": 933 + }, + { + "epoch": 0.3291629955947137, + "grad_norm": 1.5767252427705674, + "learning_rate": 1.929355491199967e-05, + "loss": 0.6841862797737122, + "step": 934 + }, + { + "epoch": 0.3295154185022026, + "grad_norm": 1.4985001262879563, + "learning_rate": 1.929140231035946e-05, + "loss": 0.7745054960250854, + "step": 935 + }, + { + "epoch": 0.3298678414096916, + "grad_norm": 1.4538548583561628, + "learning_rate": 1.928924655457733e-05, + "loss": 0.5879434943199158, + "step": 936 + }, + { + "epoch": 0.3302202643171806, + "grad_norm": 1.4292680321712006, + "learning_rate": 1.9287087645385084e-05, + "loss": 0.8484170436859131, + "step": 937 + }, + { + "epoch": 0.3305726872246696, + "grad_norm": 1.3383126778675687, + "learning_rate": 1.9284925583515604e-05, + "loss": 0.6518877148628235, + "step": 938 + }, + { + "epoch": 0.3309251101321586, + "grad_norm": 1.3496744406534642, + "learning_rate": 1.928276036970285e-05, + "loss": 0.7694787383079529, + "step": 939 + }, + { + "epoch": 0.3312775330396476, + "grad_norm": 1.475669634065235, + "learning_rate": 1.928059200468184e-05, + "loss": 0.6893239617347717, + "step": 940 + }, + { + "epoch": 0.33162995594713657, + "grad_norm": 1.9386710613485005, + "learning_rate": 1.927842048918867e-05, + "loss": 0.7731181383132935, + "step": 941 + }, + { + "epoch": 0.33198237885462556, + "grad_norm": 1.2730945433300995, + "learning_rate": 1.9276245823960495e-05, + "loss": 0.652579665184021, + "step": 942 + }, + { + "epoch": 0.33233480176211455, + "grad_norm": 1.4590802585162193, + "learning_rate": 1.927406800973555e-05, + "loss": 0.7504575252532959, + "step": 943 + }, + { + "epoch": 0.33268722466960354, + "grad_norm": 1.2636242756085148, + "learning_rate": 1.927188704725314e-05, + "loss": 0.6199444532394409, + "step": 944 + }, + { + "epoch": 0.33303964757709253, + "grad_norm": 1.3381297141173314, + "learning_rate": 1.9269702937253623e-05, + "loss": 0.7452073693275452, + "step": 945 + }, + { + "epoch": 0.33339207048458147, + "grad_norm": 1.6220831494484687, + "learning_rate": 1.926751568047845e-05, + "loss": 0.7538012266159058, + "step": 946 + }, + { + "epoch": 0.33374449339207046, + "grad_norm": 1.298282312930767, + "learning_rate": 1.9265325277670114e-05, + "loss": 0.6670408248901367, + "step": 947 + }, + { + "epoch": 0.33409691629955945, + "grad_norm": 1.3861711594873305, + "learning_rate": 1.926313172957219e-05, + "loss": 0.8060495853424072, + "step": 948 + }, + { + "epoch": 0.33444933920704845, + "grad_norm": 1.497135036962013, + "learning_rate": 1.926093503692933e-05, + "loss": 0.7494044303894043, + "step": 949 + }, + { + "epoch": 0.33480176211453744, + "grad_norm": 1.4954420855155135, + "learning_rate": 1.9258735200487235e-05, + "loss": 0.5751914978027344, + "step": 950 + }, + { + "epoch": 0.33515418502202643, + "grad_norm": 1.3135496972020755, + "learning_rate": 1.9256532220992683e-05, + "loss": 0.7234281301498413, + "step": 951 + }, + { + "epoch": 0.3355066079295154, + "grad_norm": 1.648299384166419, + "learning_rate": 1.9254326099193515e-05, + "loss": 0.7721251249313354, + "step": 952 + }, + { + "epoch": 0.3358590308370044, + "grad_norm": 1.5273494870998061, + "learning_rate": 1.925211683583864e-05, + "loss": 0.7240835428237915, + "step": 953 + }, + { + "epoch": 0.3362114537444934, + "grad_norm": 1.5101195617398009, + "learning_rate": 1.9249904431678037e-05, + "loss": 0.6622776985168457, + "step": 954 + }, + { + "epoch": 0.3365638766519824, + "grad_norm": 1.7484785330432984, + "learning_rate": 1.9247688887462747e-05, + "loss": 0.9682766199111938, + "step": 955 + }, + { + "epoch": 0.3369162995594714, + "grad_norm": 1.5743447413941896, + "learning_rate": 1.9245470203944878e-05, + "loss": 0.8363134860992432, + "step": 956 + }, + { + "epoch": 0.3372687224669604, + "grad_norm": 1.4500608043156524, + "learning_rate": 1.9243248381877605e-05, + "loss": 0.6530857086181641, + "step": 957 + }, + { + "epoch": 0.3376211453744493, + "grad_norm": 1.2035108561422267, + "learning_rate": 1.924102342201517e-05, + "loss": 0.5186585187911987, + "step": 958 + }, + { + "epoch": 0.3379735682819383, + "grad_norm": 1.3827408215949344, + "learning_rate": 1.9238795325112867e-05, + "loss": 0.6729516983032227, + "step": 959 + }, + { + "epoch": 0.3383259911894273, + "grad_norm": 4.587971824519282, + "learning_rate": 1.9236564091927083e-05, + "loss": 0.6991842985153198, + "step": 960 + }, + { + "epoch": 0.3386784140969163, + "grad_norm": 1.516889979226708, + "learning_rate": 1.9234329723215235e-05, + "loss": 0.7738245725631714, + "step": 961 + }, + { + "epoch": 0.3390308370044053, + "grad_norm": 1.4574207335379696, + "learning_rate": 1.923209221973583e-05, + "loss": 0.7027466893196106, + "step": 962 + }, + { + "epoch": 0.3393832599118943, + "grad_norm": 1.401098486802875, + "learning_rate": 1.922985158224843e-05, + "loss": 0.7868508696556091, + "step": 963 + }, + { + "epoch": 0.33973568281938327, + "grad_norm": 1.3325223534105368, + "learning_rate": 1.9227607811513662e-05, + "loss": 0.7499512434005737, + "step": 964 + }, + { + "epoch": 0.34008810572687226, + "grad_norm": 1.3198116129339372, + "learning_rate": 1.9225360908293217e-05, + "loss": 0.6662228107452393, + "step": 965 + }, + { + "epoch": 0.34044052863436125, + "grad_norm": 1.4854956624988247, + "learning_rate": 1.9223110873349847e-05, + "loss": 0.8570939302444458, + "step": 966 + }, + { + "epoch": 0.34079295154185024, + "grad_norm": 1.3385040645698225, + "learning_rate": 1.9220857707447372e-05, + "loss": 0.7497669458389282, + "step": 967 + }, + { + "epoch": 0.34114537444933923, + "grad_norm": 1.2753268813313299, + "learning_rate": 1.9218601411350663e-05, + "loss": 0.7356737852096558, + "step": 968 + }, + { + "epoch": 0.34149779735682817, + "grad_norm": 2.3286924006274896, + "learning_rate": 1.9216341985825672e-05, + "loss": 0.7880491018295288, + "step": 969 + }, + { + "epoch": 0.34185022026431716, + "grad_norm": 1.4677269303314853, + "learning_rate": 1.92140794316394e-05, + "loss": 0.734922468662262, + "step": 970 + }, + { + "epoch": 0.34220264317180615, + "grad_norm": 1.5501144518696521, + "learning_rate": 1.9211813749559916e-05, + "loss": 0.6710363626480103, + "step": 971 + }, + { + "epoch": 0.34255506607929515, + "grad_norm": 1.256856073477316, + "learning_rate": 1.920954494035634e-05, + "loss": 0.7300584316253662, + "step": 972 + }, + { + "epoch": 0.34290748898678414, + "grad_norm": 1.5351698758546528, + "learning_rate": 1.9207273004798873e-05, + "loss": 0.8584152460098267, + "step": 973 + }, + { + "epoch": 0.34325991189427313, + "grad_norm": 1.4115351274616093, + "learning_rate": 1.9204997943658764e-05, + "loss": 0.7307419776916504, + "step": 974 + }, + { + "epoch": 0.3436123348017621, + "grad_norm": 1.434441373942747, + "learning_rate": 1.920271975770832e-05, + "loss": 0.6004960536956787, + "step": 975 + }, + { + "epoch": 0.3439647577092511, + "grad_norm": 1.4015679334218965, + "learning_rate": 1.920043844772092e-05, + "loss": 0.7951763868331909, + "step": 976 + }, + { + "epoch": 0.3443171806167401, + "grad_norm": 1.6575061104845086, + "learning_rate": 1.919815401447099e-05, + "loss": 0.6835082769393921, + "step": 977 + }, + { + "epoch": 0.3446696035242291, + "grad_norm": 1.3790503468449504, + "learning_rate": 1.9195866458734034e-05, + "loss": 0.7556526064872742, + "step": 978 + }, + { + "epoch": 0.3450220264317181, + "grad_norm": 1.2642410643718298, + "learning_rate": 1.91935757812866e-05, + "loss": 0.6918114423751831, + "step": 979 + }, + { + "epoch": 0.345374449339207, + "grad_norm": 1.6331863015088222, + "learning_rate": 1.9191281982906304e-05, + "loss": 0.8197037577629089, + "step": 980 + }, + { + "epoch": 0.345726872246696, + "grad_norm": 1.9820857497043596, + "learning_rate": 1.9188985064371818e-05, + "loss": 0.833138644695282, + "step": 981 + }, + { + "epoch": 0.346079295154185, + "grad_norm": 1.3877421520016489, + "learning_rate": 1.9186685026462874e-05, + "loss": 0.6593397855758667, + "step": 982 + }, + { + "epoch": 0.346431718061674, + "grad_norm": 1.750501089720619, + "learning_rate": 1.918438186996026e-05, + "loss": 0.7535643577575684, + "step": 983 + }, + { + "epoch": 0.346784140969163, + "grad_norm": 1.5295833510904033, + "learning_rate": 1.9182075595645836e-05, + "loss": 0.6959745287895203, + "step": 984 + }, + { + "epoch": 0.347136563876652, + "grad_norm": 1.4045200992789866, + "learning_rate": 1.91797662043025e-05, + "loss": 0.7349518537521362, + "step": 985 + }, + { + "epoch": 0.347488986784141, + "grad_norm": 1.3769518272852244, + "learning_rate": 1.9177453696714224e-05, + "loss": 0.7677974700927734, + "step": 986 + }, + { + "epoch": 0.34784140969162997, + "grad_norm": 1.4486626509256493, + "learning_rate": 1.917513807366603e-05, + "loss": 0.7302255630493164, + "step": 987 + }, + { + "epoch": 0.34819383259911896, + "grad_norm": 1.477891236612788, + "learning_rate": 1.9172819335944003e-05, + "loss": 0.838138222694397, + "step": 988 + }, + { + "epoch": 0.34854625550660795, + "grad_norm": 1.555345260078333, + "learning_rate": 1.9170497484335276e-05, + "loss": 0.8018180131912231, + "step": 989 + }, + { + "epoch": 0.34889867841409694, + "grad_norm": 1.4299439839627417, + "learning_rate": 1.9168172519628056e-05, + "loss": 0.8085787296295166, + "step": 990 + }, + { + "epoch": 0.3492511013215859, + "grad_norm": 1.407734167007011, + "learning_rate": 1.9165844442611584e-05, + "loss": 0.8419004082679749, + "step": 991 + }, + { + "epoch": 0.34960352422907487, + "grad_norm": 1.485093259368171, + "learning_rate": 1.916351325407618e-05, + "loss": 0.8255139589309692, + "step": 992 + }, + { + "epoch": 0.34995594713656386, + "grad_norm": 1.3581016847128187, + "learning_rate": 1.9161178954813203e-05, + "loss": 0.7588528990745544, + "step": 993 + }, + { + "epoch": 0.35030837004405285, + "grad_norm": 1.3722258517458088, + "learning_rate": 1.9158841545615076e-05, + "loss": 0.7057096362113953, + "step": 994 + }, + { + "epoch": 0.35066079295154184, + "grad_norm": 1.3264479954648483, + "learning_rate": 1.915650102727528e-05, + "loss": 0.6913125514984131, + "step": 995 + }, + { + "epoch": 0.35101321585903084, + "grad_norm": 1.4277288783882767, + "learning_rate": 1.9154157400588348e-05, + "loss": 0.7622898817062378, + "step": 996 + }, + { + "epoch": 0.3513656387665198, + "grad_norm": 1.3345359637809249, + "learning_rate": 1.915181066634986e-05, + "loss": 0.6918702125549316, + "step": 997 + }, + { + "epoch": 0.3517180616740088, + "grad_norm": 1.4330955991310976, + "learning_rate": 1.914946082535647e-05, + "loss": 0.8801462650299072, + "step": 998 + }, + { + "epoch": 0.3520704845814978, + "grad_norm": 1.6364104196010791, + "learning_rate": 1.9147107878405873e-05, + "loss": 0.7901172637939453, + "step": 999 + }, + { + "epoch": 0.3524229074889868, + "grad_norm": 1.3202428944557627, + "learning_rate": 1.9144751826296818e-05, + "loss": 0.7308447360992432, + "step": 1000 + }, + { + "epoch": 0.3527753303964758, + "grad_norm": 1.3152547105893029, + "learning_rate": 1.9142392669829114e-05, + "loss": 0.5733275413513184, + "step": 1001 + }, + { + "epoch": 0.35312775330396473, + "grad_norm": 1.4327185784306546, + "learning_rate": 1.9140030409803622e-05, + "loss": 0.7251306772232056, + "step": 1002 + }, + { + "epoch": 0.3534801762114537, + "grad_norm": 1.3492122584167072, + "learning_rate": 1.913766504702225e-05, + "loss": 0.7983027696609497, + "step": 1003 + }, + { + "epoch": 0.3538325991189427, + "grad_norm": 1.5284478719025472, + "learning_rate": 1.9135296582287973e-05, + "loss": 0.7464017868041992, + "step": 1004 + }, + { + "epoch": 0.3541850220264317, + "grad_norm": 1.3377291300677683, + "learning_rate": 1.9132925016404805e-05, + "loss": 0.7333002686500549, + "step": 1005 + }, + { + "epoch": 0.3545374449339207, + "grad_norm": 1.4170618275882645, + "learning_rate": 1.9130550350177823e-05, + "loss": 0.729085385799408, + "step": 1006 + }, + { + "epoch": 0.3548898678414097, + "grad_norm": 1.1531700234964573, + "learning_rate": 1.9128172584413148e-05, + "loss": 0.7599227428436279, + "step": 1007 + }, + { + "epoch": 0.3552422907488987, + "grad_norm": 1.3499603875621307, + "learning_rate": 1.9125791719917962e-05, + "loss": 0.8110464811325073, + "step": 1008 + }, + { + "epoch": 0.3555947136563877, + "grad_norm": 1.443391069493257, + "learning_rate": 1.912340775750049e-05, + "loss": 0.7431697845458984, + "step": 1009 + }, + { + "epoch": 0.35594713656387666, + "grad_norm": 1.3353700802371913, + "learning_rate": 1.9121020697970016e-05, + "loss": 0.7833640575408936, + "step": 1010 + }, + { + "epoch": 0.35629955947136566, + "grad_norm": 1.2927496434698726, + "learning_rate": 1.9118630542136874e-05, + "loss": 0.7693058252334595, + "step": 1011 + }, + { + "epoch": 0.35665198237885465, + "grad_norm": 1.3593779388270224, + "learning_rate": 1.9116237290812445e-05, + "loss": 0.7724676132202148, + "step": 1012 + }, + { + "epoch": 0.3570044052863436, + "grad_norm": 1.3849928303091037, + "learning_rate": 1.911384094480916e-05, + "loss": 0.6024055480957031, + "step": 1013 + }, + { + "epoch": 0.3573568281938326, + "grad_norm": 1.254237630036734, + "learning_rate": 1.9111441504940514e-05, + "loss": 0.7710703611373901, + "step": 1014 + }, + { + "epoch": 0.35770925110132157, + "grad_norm": 1.3917926832468532, + "learning_rate": 1.910903897202103e-05, + "loss": 0.7591651678085327, + "step": 1015 + }, + { + "epoch": 0.35806167400881056, + "grad_norm": 1.3250641662724636, + "learning_rate": 1.9106633346866302e-05, + "loss": 0.7721874713897705, + "step": 1016 + }, + { + "epoch": 0.35841409691629955, + "grad_norm": 1.3837097156983347, + "learning_rate": 1.910422463029296e-05, + "loss": 0.6767420172691345, + "step": 1017 + }, + { + "epoch": 0.35876651982378854, + "grad_norm": 1.5808312779065312, + "learning_rate": 1.910181282311869e-05, + "loss": 0.6704902648925781, + "step": 1018 + }, + { + "epoch": 0.35911894273127754, + "grad_norm": 1.3288966146848866, + "learning_rate": 1.9099397926162227e-05, + "loss": 0.8871079683303833, + "step": 1019 + }, + { + "epoch": 0.3594713656387665, + "grad_norm": 1.5716465127646195, + "learning_rate": 1.909697994024335e-05, + "loss": 0.7222549319267273, + "step": 1020 + }, + { + "epoch": 0.3598237885462555, + "grad_norm": 1.4050103839828958, + "learning_rate": 1.9094558866182892e-05, + "loss": 0.7443021535873413, + "step": 1021 + }, + { + "epoch": 0.3601762114537445, + "grad_norm": 1.3877313570980134, + "learning_rate": 1.9092134704802735e-05, + "loss": 0.7698349952697754, + "step": 1022 + }, + { + "epoch": 0.3605286343612335, + "grad_norm": 1.9010750041325926, + "learning_rate": 1.9089707456925798e-05, + "loss": 0.863248348236084, + "step": 1023 + }, + { + "epoch": 0.3608810572687225, + "grad_norm": 1.1572981545597187, + "learning_rate": 1.9087277123376068e-05, + "loss": 0.7036338448524475, + "step": 1024 + }, + { + "epoch": 0.36123348017621143, + "grad_norm": 1.5140044810060398, + "learning_rate": 1.9084843704978558e-05, + "loss": 0.7427274584770203, + "step": 1025 + }, + { + "epoch": 0.3615859030837004, + "grad_norm": 1.5903685422277276, + "learning_rate": 1.908240720255934e-05, + "loss": 0.6548313498497009, + "step": 1026 + }, + { + "epoch": 0.3619383259911894, + "grad_norm": 1.3326463394362358, + "learning_rate": 1.9079967616945534e-05, + "loss": 0.7586454749107361, + "step": 1027 + }, + { + "epoch": 0.3622907488986784, + "grad_norm": 1.45389698507953, + "learning_rate": 1.90775249489653e-05, + "loss": 0.6954889297485352, + "step": 1028 + }, + { + "epoch": 0.3626431718061674, + "grad_norm": 1.6543950271160617, + "learning_rate": 1.907507919944785e-05, + "loss": 0.8798770904541016, + "step": 1029 + }, + { + "epoch": 0.3629955947136564, + "grad_norm": 1.3815054682339305, + "learning_rate": 1.9072630369223433e-05, + "loss": 0.6600694060325623, + "step": 1030 + }, + { + "epoch": 0.3633480176211454, + "grad_norm": 1.5776995405913148, + "learning_rate": 1.9070178459123366e-05, + "loss": 0.6830897927284241, + "step": 1031 + }, + { + "epoch": 0.36370044052863437, + "grad_norm": 1.1973844620945089, + "learning_rate": 1.906772346997998e-05, + "loss": 0.6283613443374634, + "step": 1032 + }, + { + "epoch": 0.36405286343612336, + "grad_norm": 1.2892968799675324, + "learning_rate": 1.9065265402626676e-05, + "loss": 0.6451754570007324, + "step": 1033 + }, + { + "epoch": 0.36440528634361236, + "grad_norm": 1.4387559441313162, + "learning_rate": 1.9062804257897887e-05, + "loss": 0.7949883937835693, + "step": 1034 + }, + { + "epoch": 0.36475770925110135, + "grad_norm": 1.4366893391590683, + "learning_rate": 1.90603400366291e-05, + "loss": 0.625343918800354, + "step": 1035 + }, + { + "epoch": 0.3651101321585903, + "grad_norm": 1.5716897663583798, + "learning_rate": 1.9057872739656843e-05, + "loss": 0.8398839235305786, + "step": 1036 + }, + { + "epoch": 0.3654625550660793, + "grad_norm": 1.6515297053174456, + "learning_rate": 1.9055402367818673e-05, + "loss": 0.8628166913986206, + "step": 1037 + }, + { + "epoch": 0.36581497797356827, + "grad_norm": 1.6000244306696312, + "learning_rate": 1.905292892195322e-05, + "loss": 0.7494110465049744, + "step": 1038 + }, + { + "epoch": 0.36616740088105726, + "grad_norm": 1.6358981860019415, + "learning_rate": 1.9050452402900134e-05, + "loss": 0.7695099115371704, + "step": 1039 + }, + { + "epoch": 0.36651982378854625, + "grad_norm": 1.3948395289772064, + "learning_rate": 1.904797281150012e-05, + "loss": 0.8067067861557007, + "step": 1040 + }, + { + "epoch": 0.36687224669603524, + "grad_norm": 1.5430196098026252, + "learning_rate": 1.9045490148594917e-05, + "loss": 0.7542074918746948, + "step": 1041 + }, + { + "epoch": 0.36722466960352423, + "grad_norm": 1.4232871422135234, + "learning_rate": 1.9043004415027314e-05, + "loss": 0.7027335166931152, + "step": 1042 + }, + { + "epoch": 0.3675770925110132, + "grad_norm": 1.2842638834648272, + "learning_rate": 1.9040515611641142e-05, + "loss": 0.7779253721237183, + "step": 1043 + }, + { + "epoch": 0.3679295154185022, + "grad_norm": 1.4713589430159515, + "learning_rate": 1.9038023739281275e-05, + "loss": 0.6840049028396606, + "step": 1044 + }, + { + "epoch": 0.3682819383259912, + "grad_norm": 1.2252786450532585, + "learning_rate": 1.903552879879362e-05, + "loss": 0.6183794736862183, + "step": 1045 + }, + { + "epoch": 0.3686343612334802, + "grad_norm": 1.3239395642180716, + "learning_rate": 1.9033030791025127e-05, + "loss": 0.7770168781280518, + "step": 1046 + }, + { + "epoch": 0.36898678414096914, + "grad_norm": 1.5646813675557831, + "learning_rate": 1.9030529716823806e-05, + "loss": 0.7192036509513855, + "step": 1047 + }, + { + "epoch": 0.36933920704845813, + "grad_norm": 1.3179369082607764, + "learning_rate": 1.9028025577038688e-05, + "loss": 0.6604419946670532, + "step": 1048 + }, + { + "epoch": 0.3696916299559471, + "grad_norm": 1.7088212085954357, + "learning_rate": 1.9025518372519847e-05, + "loss": 0.7999060153961182, + "step": 1049 + }, + { + "epoch": 0.3700440528634361, + "grad_norm": 1.6369356635778263, + "learning_rate": 1.9023008104118404e-05, + "loss": 0.7487536668777466, + "step": 1050 + }, + { + "epoch": 0.3703964757709251, + "grad_norm": 1.4534592079598474, + "learning_rate": 1.9020494772686513e-05, + "loss": 0.7786455154418945, + "step": 1051 + }, + { + "epoch": 0.3707488986784141, + "grad_norm": 1.5556124976221868, + "learning_rate": 1.9017978379077378e-05, + "loss": 0.7592626214027405, + "step": 1052 + }, + { + "epoch": 0.3711013215859031, + "grad_norm": 1.3193440168525459, + "learning_rate": 1.901545892414523e-05, + "loss": 0.774850606918335, + "step": 1053 + }, + { + "epoch": 0.3714537444933921, + "grad_norm": 1.4859587321900767, + "learning_rate": 1.901293640874535e-05, + "loss": 0.5430009365081787, + "step": 1054 + }, + { + "epoch": 0.37180616740088107, + "grad_norm": 1.4541817899150224, + "learning_rate": 1.9010410833734053e-05, + "loss": 0.7459923624992371, + "step": 1055 + }, + { + "epoch": 0.37215859030837006, + "grad_norm": 1.6269332982530442, + "learning_rate": 1.9007882199968692e-05, + "loss": 0.6372017860412598, + "step": 1056 + }, + { + "epoch": 0.37251101321585905, + "grad_norm": 1.6522112420188226, + "learning_rate": 1.900535050830766e-05, + "loss": 0.6773583292961121, + "step": 1057 + }, + { + "epoch": 0.372863436123348, + "grad_norm": 1.7342256392022233, + "learning_rate": 1.900281575961039e-05, + "loss": 0.8431004285812378, + "step": 1058 + }, + { + "epoch": 0.373215859030837, + "grad_norm": 1.4085085883480681, + "learning_rate": 1.9000277954737342e-05, + "loss": 0.6361340284347534, + "step": 1059 + }, + { + "epoch": 0.373568281938326, + "grad_norm": 1.3793359019510345, + "learning_rate": 1.8997737094550033e-05, + "loss": 0.7153787612915039, + "step": 1060 + }, + { + "epoch": 0.37392070484581497, + "grad_norm": 1.4220392348844544, + "learning_rate": 1.8995193179911e-05, + "loss": 0.7244935631752014, + "step": 1061 + }, + { + "epoch": 0.37427312775330396, + "grad_norm": 1.4061330426818142, + "learning_rate": 1.8992646211683817e-05, + "loss": 0.6648202538490295, + "step": 1062 + }, + { + "epoch": 0.37462555066079295, + "grad_norm": 1.4217807346058315, + "learning_rate": 1.8990096190733113e-05, + "loss": 0.6528836488723755, + "step": 1063 + }, + { + "epoch": 0.37497797356828194, + "grad_norm": 1.4695679092519263, + "learning_rate": 1.8987543117924532e-05, + "loss": 0.6749341487884521, + "step": 1064 + }, + { + "epoch": 0.37533039647577093, + "grad_norm": 1.3287092803608218, + "learning_rate": 1.8984986994124766e-05, + "loss": 0.7402256727218628, + "step": 1065 + }, + { + "epoch": 0.3756828193832599, + "grad_norm": 1.2181513754192281, + "learning_rate": 1.898242782020154e-05, + "loss": 0.5638695955276489, + "step": 1066 + }, + { + "epoch": 0.3760352422907489, + "grad_norm": 1.5457056768133204, + "learning_rate": 1.897986559702361e-05, + "loss": 0.829822838306427, + "step": 1067 + }, + { + "epoch": 0.3763876651982379, + "grad_norm": 1.3351440834834858, + "learning_rate": 1.8977300325460774e-05, + "loss": 0.6796025037765503, + "step": 1068 + }, + { + "epoch": 0.37674008810572684, + "grad_norm": 1.3611135527247238, + "learning_rate": 1.897473200638386e-05, + "loss": 0.8584038615226746, + "step": 1069 + }, + { + "epoch": 0.37709251101321584, + "grad_norm": 1.4622377307020165, + "learning_rate": 1.897216064066474e-05, + "loss": 0.8069149255752563, + "step": 1070 + }, + { + "epoch": 0.37744493392070483, + "grad_norm": 1.2194173424769332, + "learning_rate": 1.89695862291763e-05, + "loss": 0.5762223601341248, + "step": 1071 + }, + { + "epoch": 0.3777973568281938, + "grad_norm": 1.3827918624348656, + "learning_rate": 1.8967008772792483e-05, + "loss": 0.6626466512680054, + "step": 1072 + }, + { + "epoch": 0.3781497797356828, + "grad_norm": 1.15359758590964, + "learning_rate": 1.896442827238825e-05, + "loss": 0.6260244250297546, + "step": 1073 + }, + { + "epoch": 0.3785022026431718, + "grad_norm": 1.8994686915407593, + "learning_rate": 1.8961844728839602e-05, + "loss": 0.8090343475341797, + "step": 1074 + }, + { + "epoch": 0.3788546255506608, + "grad_norm": 1.4116056126096472, + "learning_rate": 1.8959258143023575e-05, + "loss": 0.66957026720047, + "step": 1075 + }, + { + "epoch": 0.3792070484581498, + "grad_norm": 1.308974606662818, + "learning_rate": 1.8956668515818223e-05, + "loss": 0.7103087306022644, + "step": 1076 + }, + { + "epoch": 0.3795594713656388, + "grad_norm": 1.468914156940793, + "learning_rate": 1.895407584810266e-05, + "loss": 0.7469112277030945, + "step": 1077 + }, + { + "epoch": 0.37991189427312777, + "grad_norm": 1.624950928787921, + "learning_rate": 1.8951480140757003e-05, + "loss": 0.8252213001251221, + "step": 1078 + }, + { + "epoch": 0.38026431718061676, + "grad_norm": 1.4238044077341658, + "learning_rate": 1.8948881394662417e-05, + "loss": 0.7204562425613403, + "step": 1079 + }, + { + "epoch": 0.38061674008810575, + "grad_norm": 1.5659608304591812, + "learning_rate": 1.89462796107011e-05, + "loss": 0.7325669527053833, + "step": 1080 + }, + { + "epoch": 0.3809691629955947, + "grad_norm": 1.2964480504204927, + "learning_rate": 1.8943674789756276e-05, + "loss": 0.738972008228302, + "step": 1081 + }, + { + "epoch": 0.3813215859030837, + "grad_norm": 1.5892566433984823, + "learning_rate": 1.8941066932712194e-05, + "loss": 0.7468631267547607, + "step": 1082 + }, + { + "epoch": 0.3816740088105727, + "grad_norm": 1.6145182365902104, + "learning_rate": 1.893845604045415e-05, + "loss": 0.6479831337928772, + "step": 1083 + }, + { + "epoch": 0.38202643171806167, + "grad_norm": 1.3615750017210906, + "learning_rate": 1.893584211386845e-05, + "loss": 0.7615871429443359, + "step": 1084 + }, + { + "epoch": 0.38237885462555066, + "grad_norm": 1.8901071385329251, + "learning_rate": 1.8933225153842446e-05, + "loss": 0.6934449076652527, + "step": 1085 + }, + { + "epoch": 0.38273127753303965, + "grad_norm": 1.2384833194245852, + "learning_rate": 1.8930605161264517e-05, + "loss": 0.5267079472541809, + "step": 1086 + }, + { + "epoch": 0.38308370044052864, + "grad_norm": 1.524832028509735, + "learning_rate": 1.892798213702407e-05, + "loss": 0.7309125661849976, + "step": 1087 + }, + { + "epoch": 0.38343612334801763, + "grad_norm": 1.3743253361073855, + "learning_rate": 1.892535608201153e-05, + "loss": 0.8133678436279297, + "step": 1088 + }, + { + "epoch": 0.3837885462555066, + "grad_norm": 1.3915725940468886, + "learning_rate": 1.892272699711837e-05, + "loss": 0.6097027063369751, + "step": 1089 + }, + { + "epoch": 0.3841409691629956, + "grad_norm": 1.548287022579551, + "learning_rate": 1.8920094883237082e-05, + "loss": 0.70456862449646, + "step": 1090 + }, + { + "epoch": 0.3844933920704846, + "grad_norm": 1.2952569165029428, + "learning_rate": 1.8917459741261183e-05, + "loss": 0.7236523628234863, + "step": 1091 + }, + { + "epoch": 0.38484581497797354, + "grad_norm": 1.5039785189114319, + "learning_rate": 1.8914821572085224e-05, + "loss": 0.7251272201538086, + "step": 1092 + }, + { + "epoch": 0.38519823788546254, + "grad_norm": 1.271767676796452, + "learning_rate": 1.8912180376604777e-05, + "loss": 0.7381070852279663, + "step": 1093 + }, + { + "epoch": 0.3855506607929515, + "grad_norm": 1.6023999081974447, + "learning_rate": 1.8909536155716458e-05, + "loss": 0.6654129028320312, + "step": 1094 + }, + { + "epoch": 0.3859030837004405, + "grad_norm": 1.4351957388528893, + "learning_rate": 1.8906888910317883e-05, + "loss": 0.7823128700256348, + "step": 1095 + }, + { + "epoch": 0.3862555066079295, + "grad_norm": 1.2302320218391962, + "learning_rate": 1.8904238641307718e-05, + "loss": 0.5988126993179321, + "step": 1096 + }, + { + "epoch": 0.3866079295154185, + "grad_norm": 1.6745614533481283, + "learning_rate": 1.8901585349585643e-05, + "loss": 0.7671465873718262, + "step": 1097 + }, + { + "epoch": 0.3869603524229075, + "grad_norm": 1.4027982600434907, + "learning_rate": 1.889892903605237e-05, + "loss": 0.7878838777542114, + "step": 1098 + }, + { + "epoch": 0.3873127753303965, + "grad_norm": 1.2802181437962392, + "learning_rate": 1.8896269701609634e-05, + "loss": 0.72254878282547, + "step": 1099 + }, + { + "epoch": 0.3876651982378855, + "grad_norm": 1.4183908379879375, + "learning_rate": 1.8893607347160198e-05, + "loss": 0.6796868443489075, + "step": 1100 + }, + { + "epoch": 0.38801762114537447, + "grad_norm": 1.510469064523606, + "learning_rate": 1.8890941973607843e-05, + "loss": 0.6378471851348877, + "step": 1101 + }, + { + "epoch": 0.38837004405286346, + "grad_norm": 1.327169163711753, + "learning_rate": 1.888827358185739e-05, + "loss": 0.8473032712936401, + "step": 1102 + }, + { + "epoch": 0.3887224669603524, + "grad_norm": 1.4704779902492213, + "learning_rate": 1.8885602172814667e-05, + "loss": 0.8272742033004761, + "step": 1103 + }, + { + "epoch": 0.3890748898678414, + "grad_norm": 1.45593169268278, + "learning_rate": 1.8882927747386533e-05, + "loss": 0.7244507670402527, + "step": 1104 + }, + { + "epoch": 0.3894273127753304, + "grad_norm": 1.3081271484466186, + "learning_rate": 1.888025030648088e-05, + "loss": 0.5764014720916748, + "step": 1105 + }, + { + "epoch": 0.3897797356828194, + "grad_norm": 1.230279760550168, + "learning_rate": 1.887756985100661e-05, + "loss": 0.6944009065628052, + "step": 1106 + }, + { + "epoch": 0.39013215859030836, + "grad_norm": 1.381963017332696, + "learning_rate": 1.8874886381873657e-05, + "loss": 0.7096902132034302, + "step": 1107 + }, + { + "epoch": 0.39048458149779736, + "grad_norm": 1.6526795986169043, + "learning_rate": 1.8872199899992973e-05, + "loss": 0.6304805278778076, + "step": 1108 + }, + { + "epoch": 0.39083700440528635, + "grad_norm": 1.3081643743142675, + "learning_rate": 1.8869510406276538e-05, + "loss": 0.7091327905654907, + "step": 1109 + }, + { + "epoch": 0.39118942731277534, + "grad_norm": 1.4257979117717376, + "learning_rate": 1.886681790163735e-05, + "loss": 0.6575565338134766, + "step": 1110 + }, + { + "epoch": 0.39154185022026433, + "grad_norm": 1.6155582257297172, + "learning_rate": 1.8864122386989426e-05, + "loss": 0.837468147277832, + "step": 1111 + }, + { + "epoch": 0.3918942731277533, + "grad_norm": 1.4395330206284223, + "learning_rate": 1.8861423863247816e-05, + "loss": 0.6861380338668823, + "step": 1112 + }, + { + "epoch": 0.3922466960352423, + "grad_norm": 1.3206140573248442, + "learning_rate": 1.8858722331328582e-05, + "loss": 0.7421156167984009, + "step": 1113 + }, + { + "epoch": 0.39259911894273125, + "grad_norm": 1.4106532753820455, + "learning_rate": 1.8856017792148807e-05, + "loss": 0.8037575483322144, + "step": 1114 + }, + { + "epoch": 0.39295154185022024, + "grad_norm": 1.34412494732323, + "learning_rate": 1.8853310246626608e-05, + "loss": 0.6530179381370544, + "step": 1115 + }, + { + "epoch": 0.39330396475770923, + "grad_norm": 1.7480111733406445, + "learning_rate": 1.88505996956811e-05, + "loss": 0.9039478302001953, + "step": 1116 + }, + { + "epoch": 0.3936563876651982, + "grad_norm": 1.2556675250098766, + "learning_rate": 1.8847886140232438e-05, + "loss": 0.7734917998313904, + "step": 1117 + }, + { + "epoch": 0.3940088105726872, + "grad_norm": 1.4809117769611548, + "learning_rate": 1.8845169581201786e-05, + "loss": 0.7146204113960266, + "step": 1118 + }, + { + "epoch": 0.3943612334801762, + "grad_norm": 1.4108388267740644, + "learning_rate": 1.8842450019511337e-05, + "loss": 0.6427414417266846, + "step": 1119 + }, + { + "epoch": 0.3947136563876652, + "grad_norm": 1.462443026711516, + "learning_rate": 1.883972745608429e-05, + "loss": 0.7241504192352295, + "step": 1120 + }, + { + "epoch": 0.3950660792951542, + "grad_norm": 1.5796197427651677, + "learning_rate": 1.8837001891844875e-05, + "loss": 0.7085466384887695, + "step": 1121 + }, + { + "epoch": 0.3954185022026432, + "grad_norm": 1.220037664049328, + "learning_rate": 1.8834273327718334e-05, + "loss": 0.6099711656570435, + "step": 1122 + }, + { + "epoch": 0.3957709251101322, + "grad_norm": 1.7637467057266936, + "learning_rate": 1.8831541764630936e-05, + "loss": 0.9153809547424316, + "step": 1123 + }, + { + "epoch": 0.39612334801762117, + "grad_norm": 1.432058114739846, + "learning_rate": 1.8828807203509953e-05, + "loss": 0.7025514841079712, + "step": 1124 + }, + { + "epoch": 0.3964757709251101, + "grad_norm": 1.3170228531933665, + "learning_rate": 1.882606964528369e-05, + "loss": 0.8254855275154114, + "step": 1125 + }, + { + "epoch": 0.3968281938325991, + "grad_norm": 1.3015643549096694, + "learning_rate": 1.8823329090881457e-05, + "loss": 0.6812278032302856, + "step": 1126 + }, + { + "epoch": 0.3971806167400881, + "grad_norm": 1.4379402990614556, + "learning_rate": 1.8820585541233592e-05, + "loss": 0.6570114493370056, + "step": 1127 + }, + { + "epoch": 0.3975330396475771, + "grad_norm": 1.4245448514304093, + "learning_rate": 1.881783899727144e-05, + "loss": 0.636163592338562, + "step": 1128 + }, + { + "epoch": 0.39788546255506607, + "grad_norm": 1.4535684365173425, + "learning_rate": 1.8815089459927373e-05, + "loss": 0.6744807958602905, + "step": 1129 + }, + { + "epoch": 0.39823788546255506, + "grad_norm": 1.2654983836452696, + "learning_rate": 1.8812336930134768e-05, + "loss": 0.6739502549171448, + "step": 1130 + }, + { + "epoch": 0.39859030837004406, + "grad_norm": 1.5274150360278067, + "learning_rate": 1.8809581408828026e-05, + "loss": 0.800058126449585, + "step": 1131 + }, + { + "epoch": 0.39894273127753305, + "grad_norm": 1.293199138820765, + "learning_rate": 1.880682289694256e-05, + "loss": 0.7158734798431396, + "step": 1132 + }, + { + "epoch": 0.39929515418502204, + "grad_norm": 1.426620948967722, + "learning_rate": 1.8804061395414795e-05, + "loss": 0.7142150402069092, + "step": 1133 + }, + { + "epoch": 0.39964757709251103, + "grad_norm": 1.5712220679274596, + "learning_rate": 1.8801296905182184e-05, + "loss": 0.7830438613891602, + "step": 1134 + }, + { + "epoch": 0.4, + "grad_norm": 1.3789411964854812, + "learning_rate": 1.879852942718318e-05, + "loss": 0.7037091255187988, + "step": 1135 + }, + { + "epoch": 0.400352422907489, + "grad_norm": 1.5410576826642701, + "learning_rate": 1.8795758962357254e-05, + "loss": 0.7634316682815552, + "step": 1136 + }, + { + "epoch": 0.40070484581497795, + "grad_norm": 1.3380525485574057, + "learning_rate": 1.8792985511644895e-05, + "loss": 0.8569636344909668, + "step": 1137 + }, + { + "epoch": 0.40105726872246694, + "grad_norm": 1.4697640342217926, + "learning_rate": 1.8790209075987603e-05, + "loss": 0.8589881062507629, + "step": 1138 + }, + { + "epoch": 0.40140969162995593, + "grad_norm": 1.4119711578026037, + "learning_rate": 1.8787429656327892e-05, + "loss": 0.6667177677154541, + "step": 1139 + }, + { + "epoch": 0.4017621145374449, + "grad_norm": 1.5302691962759787, + "learning_rate": 1.8784647253609286e-05, + "loss": 0.8272922039031982, + "step": 1140 + }, + { + "epoch": 0.4021145374449339, + "grad_norm": 1.4934073596410382, + "learning_rate": 1.8781861868776328e-05, + "loss": 0.735906720161438, + "step": 1141 + }, + { + "epoch": 0.4024669603524229, + "grad_norm": 1.6214826290901958, + "learning_rate": 1.8779073502774567e-05, + "loss": 0.7496200799942017, + "step": 1142 + }, + { + "epoch": 0.4028193832599119, + "grad_norm": 1.5379634398249482, + "learning_rate": 1.8776282156550563e-05, + "loss": 0.741244912147522, + "step": 1143 + }, + { + "epoch": 0.4031718061674009, + "grad_norm": 1.6175484470841388, + "learning_rate": 1.87734878310519e-05, + "loss": 0.6074572205543518, + "step": 1144 + }, + { + "epoch": 0.4035242290748899, + "grad_norm": 1.5403137415943102, + "learning_rate": 1.8770690527227156e-05, + "loss": 0.7852963805198669, + "step": 1145 + }, + { + "epoch": 0.4038766519823789, + "grad_norm": 1.3167947695811832, + "learning_rate": 1.8767890246025934e-05, + "loss": 0.8041664361953735, + "step": 1146 + }, + { + "epoch": 0.40422907488986787, + "grad_norm": 1.2847896666293108, + "learning_rate": 1.876508698839884e-05, + "loss": 0.6014564037322998, + "step": 1147 + }, + { + "epoch": 0.4045814977973568, + "grad_norm": 1.6737775020761936, + "learning_rate": 1.876228075529749e-05, + "loss": 0.7389206886291504, + "step": 1148 + }, + { + "epoch": 0.4049339207048458, + "grad_norm": 1.5291026740622409, + "learning_rate": 1.875947154767452e-05, + "loss": 0.7540062665939331, + "step": 1149 + }, + { + "epoch": 0.4052863436123348, + "grad_norm": 1.5780731113626183, + "learning_rate": 1.8756659366483564e-05, + "loss": 0.6953487396240234, + "step": 1150 + }, + { + "epoch": 0.4056387665198238, + "grad_norm": 1.8069469411894516, + "learning_rate": 1.875384421267927e-05, + "loss": 0.6715666055679321, + "step": 1151 + }, + { + "epoch": 0.40599118942731277, + "grad_norm": 1.6113428960633331, + "learning_rate": 1.8751026087217294e-05, + "loss": 0.7763206362724304, + "step": 1152 + }, + { + "epoch": 0.40634361233480176, + "grad_norm": 1.7227531605547286, + "learning_rate": 1.8748204991054304e-05, + "loss": 0.8445626497268677, + "step": 1153 + }, + { + "epoch": 0.40669603524229075, + "grad_norm": 1.4170830085508515, + "learning_rate": 1.8745380925147976e-05, + "loss": 0.6789584159851074, + "step": 1154 + }, + { + "epoch": 0.40704845814977975, + "grad_norm": 1.403092590323935, + "learning_rate": 1.8742553890456986e-05, + "loss": 0.6301349401473999, + "step": 1155 + }, + { + "epoch": 0.40740088105726874, + "grad_norm": 1.243923442253091, + "learning_rate": 1.873972388794103e-05, + "loss": 0.5968909859657288, + "step": 1156 + }, + { + "epoch": 0.40775330396475773, + "grad_norm": 1.489269903668207, + "learning_rate": 1.873689091856081e-05, + "loss": 0.759127676486969, + "step": 1157 + }, + { + "epoch": 0.4081057268722467, + "grad_norm": 1.7062525426103168, + "learning_rate": 1.873405498327802e-05, + "loss": 0.8113895654678345, + "step": 1158 + }, + { + "epoch": 0.40845814977973566, + "grad_norm": 2.2841166697739266, + "learning_rate": 1.8731216083055373e-05, + "loss": 0.6294944286346436, + "step": 1159 + }, + { + "epoch": 0.40881057268722465, + "grad_norm": 1.7643300465666825, + "learning_rate": 1.87283742188566e-05, + "loss": 0.7024469375610352, + "step": 1160 + }, + { + "epoch": 0.40916299559471364, + "grad_norm": 1.6192702903054457, + "learning_rate": 1.8725529391646413e-05, + "loss": 0.6593793034553528, + "step": 1161 + }, + { + "epoch": 0.40951541850220263, + "grad_norm": 1.491465083071803, + "learning_rate": 1.8722681602390548e-05, + "loss": 0.72177654504776, + "step": 1162 + }, + { + "epoch": 0.4098678414096916, + "grad_norm": 1.5089448151062697, + "learning_rate": 1.8719830852055736e-05, + "loss": 0.7099393606185913, + "step": 1163 + }, + { + "epoch": 0.4102202643171806, + "grad_norm": 1.3870038981594819, + "learning_rate": 1.871697714160972e-05, + "loss": 0.6221687197685242, + "step": 1164 + }, + { + "epoch": 0.4105726872246696, + "grad_norm": 1.6034975452453926, + "learning_rate": 1.8714120472021252e-05, + "loss": 0.7236911058425903, + "step": 1165 + }, + { + "epoch": 0.4109251101321586, + "grad_norm": 1.6733335742616042, + "learning_rate": 1.8711260844260072e-05, + "loss": 0.6777583360671997, + "step": 1166 + }, + { + "epoch": 0.4112775330396476, + "grad_norm": 1.2685396486773262, + "learning_rate": 1.870839825929694e-05, + "loss": 0.6408713459968567, + "step": 1167 + }, + { + "epoch": 0.4116299559471366, + "grad_norm": 1.5501797457897155, + "learning_rate": 1.870553271810362e-05, + "loss": 0.6081968545913696, + "step": 1168 + }, + { + "epoch": 0.4119823788546256, + "grad_norm": 1.324315376857478, + "learning_rate": 1.8702664221652864e-05, + "loss": 0.7269757986068726, + "step": 1169 + }, + { + "epoch": 0.4123348017621145, + "grad_norm": 1.359571395974998, + "learning_rate": 1.8699792770918443e-05, + "loss": 0.6563149094581604, + "step": 1170 + }, + { + "epoch": 0.4126872246696035, + "grad_norm": 1.412304869808958, + "learning_rate": 1.8696918366875123e-05, + "loss": 0.6900039911270142, + "step": 1171 + }, + { + "epoch": 0.4130396475770925, + "grad_norm": 1.6666238046463622, + "learning_rate": 1.869404101049868e-05, + "loss": 0.6575014591217041, + "step": 1172 + }, + { + "epoch": 0.4133920704845815, + "grad_norm": 1.7453316480937289, + "learning_rate": 1.8691160702765878e-05, + "loss": 0.8178410530090332, + "step": 1173 + }, + { + "epoch": 0.4137444933920705, + "grad_norm": 1.2369225358107252, + "learning_rate": 1.8688277444654495e-05, + "loss": 0.6247331500053406, + "step": 1174 + }, + { + "epoch": 0.41409691629955947, + "grad_norm": 1.4809443864869283, + "learning_rate": 1.868539123714331e-05, + "loss": 0.7220792770385742, + "step": 1175 + }, + { + "epoch": 0.41444933920704846, + "grad_norm": 1.3133478143499064, + "learning_rate": 1.8682502081212104e-05, + "loss": 0.6279594302177429, + "step": 1176 + }, + { + "epoch": 0.41480176211453745, + "grad_norm": 1.9965951061666904, + "learning_rate": 1.8679609977841646e-05, + "loss": 0.8814467787742615, + "step": 1177 + }, + { + "epoch": 0.41515418502202645, + "grad_norm": 1.337413771448709, + "learning_rate": 1.867671492801372e-05, + "loss": 0.6601974368095398, + "step": 1178 + }, + { + "epoch": 0.41550660792951544, + "grad_norm": 1.5188708939818696, + "learning_rate": 1.8673816932711107e-05, + "loss": 0.7004785537719727, + "step": 1179 + }, + { + "epoch": 0.41585903083700443, + "grad_norm": 1.5057078901191085, + "learning_rate": 1.8670915992917586e-05, + "loss": 0.7409330606460571, + "step": 1180 + }, + { + "epoch": 0.41621145374449336, + "grad_norm": 1.4232223858260633, + "learning_rate": 1.8668012109617933e-05, + "loss": 0.6698065996170044, + "step": 1181 + }, + { + "epoch": 0.41656387665198236, + "grad_norm": 1.5925482634189316, + "learning_rate": 1.8665105283797927e-05, + "loss": 0.7420671582221985, + "step": 1182 + }, + { + "epoch": 0.41691629955947135, + "grad_norm": 1.5560634478711484, + "learning_rate": 1.8662195516444345e-05, + "loss": 0.7719774842262268, + "step": 1183 + }, + { + "epoch": 0.41726872246696034, + "grad_norm": 1.4792437797078573, + "learning_rate": 1.8659282808544966e-05, + "loss": 0.6206108331680298, + "step": 1184 + }, + { + "epoch": 0.41762114537444933, + "grad_norm": 1.3470893025550628, + "learning_rate": 1.865636716108856e-05, + "loss": 0.799741268157959, + "step": 1185 + }, + { + "epoch": 0.4179735682819383, + "grad_norm": 1.419455186886867, + "learning_rate": 1.8653448575064893e-05, + "loss": 0.6839771866798401, + "step": 1186 + }, + { + "epoch": 0.4183259911894273, + "grad_norm": 1.4763673797370565, + "learning_rate": 1.8650527051464744e-05, + "loss": 0.7937930822372437, + "step": 1187 + }, + { + "epoch": 0.4186784140969163, + "grad_norm": 2.8190993538517524, + "learning_rate": 1.8647602591279873e-05, + "loss": 0.6819020509719849, + "step": 1188 + }, + { + "epoch": 0.4190308370044053, + "grad_norm": 1.3567646132379503, + "learning_rate": 1.864467519550305e-05, + "loss": 0.75614994764328, + "step": 1189 + }, + { + "epoch": 0.4193832599118943, + "grad_norm": 1.567742841021855, + "learning_rate": 1.864174486512803e-05, + "loss": 0.6966177225112915, + "step": 1190 + }, + { + "epoch": 0.4197356828193833, + "grad_norm": 1.7710714107881367, + "learning_rate": 1.8638811601149568e-05, + "loss": 0.821509838104248, + "step": 1191 + }, + { + "epoch": 0.4200881057268723, + "grad_norm": 1.2328562386437087, + "learning_rate": 1.8635875404563414e-05, + "loss": 0.5905138254165649, + "step": 1192 + }, + { + "epoch": 0.4204405286343612, + "grad_norm": 1.4647056442197128, + "learning_rate": 1.8632936276366323e-05, + "loss": 0.6856247186660767, + "step": 1193 + }, + { + "epoch": 0.4207929515418502, + "grad_norm": 1.4886760353067057, + "learning_rate": 1.862999421755603e-05, + "loss": 0.745036244392395, + "step": 1194 + }, + { + "epoch": 0.4211453744493392, + "grad_norm": 1.1750279689329006, + "learning_rate": 1.8627049229131276e-05, + "loss": 0.6503005027770996, + "step": 1195 + }, + { + "epoch": 0.4214977973568282, + "grad_norm": 1.5431880343600168, + "learning_rate": 1.86241013120918e-05, + "loss": 0.7498307228088379, + "step": 1196 + }, + { + "epoch": 0.4218502202643172, + "grad_norm": 1.3468463845976426, + "learning_rate": 1.862115046743831e-05, + "loss": 0.7928652763366699, + "step": 1197 + }, + { + "epoch": 0.42220264317180617, + "grad_norm": 1.2342083264732957, + "learning_rate": 1.861819669617254e-05, + "loss": 0.6854137182235718, + "step": 1198 + }, + { + "epoch": 0.42255506607929516, + "grad_norm": 1.2078818370142543, + "learning_rate": 1.86152399992972e-05, + "loss": 0.6196715831756592, + "step": 1199 + }, + { + "epoch": 0.42290748898678415, + "grad_norm": 1.3970249114344502, + "learning_rate": 1.8612280377816e-05, + "loss": 0.6937464475631714, + "step": 1200 + }, + { + "epoch": 0.42325991189427314, + "grad_norm": 1.68603514212184, + "learning_rate": 1.860931783273363e-05, + "loss": 0.7681070566177368, + "step": 1201 + }, + { + "epoch": 0.42361233480176214, + "grad_norm": 1.1472443629032707, + "learning_rate": 1.860635236505579e-05, + "loss": 0.676302969455719, + "step": 1202 + }, + { + "epoch": 0.4239647577092511, + "grad_norm": 1.3856112594345633, + "learning_rate": 1.8603383975789168e-05, + "loss": 0.6533253192901611, + "step": 1203 + }, + { + "epoch": 0.42431718061674006, + "grad_norm": 1.3469284337535972, + "learning_rate": 1.860041266594143e-05, + "loss": 0.689995288848877, + "step": 1204 + }, + { + "epoch": 0.42466960352422906, + "grad_norm": 1.5007772835228577, + "learning_rate": 1.859743843652124e-05, + "loss": 0.8129922747612, + "step": 1205 + }, + { + "epoch": 0.42502202643171805, + "grad_norm": 1.5410683437680426, + "learning_rate": 1.859446128853827e-05, + "loss": 0.8388077616691589, + "step": 1206 + }, + { + "epoch": 0.42537444933920704, + "grad_norm": 1.5558529097869003, + "learning_rate": 1.859148122300316e-05, + "loss": 0.8795225024223328, + "step": 1207 + }, + { + "epoch": 0.42572687224669603, + "grad_norm": 1.1213374735945745, + "learning_rate": 1.858849824092755e-05, + "loss": 0.7340251803398132, + "step": 1208 + }, + { + "epoch": 0.426079295154185, + "grad_norm": 1.4951423694810024, + "learning_rate": 1.8585512343324073e-05, + "loss": 0.8028355240821838, + "step": 1209 + }, + { + "epoch": 0.426431718061674, + "grad_norm": 1.4585659256901293, + "learning_rate": 1.8582523531206345e-05, + "loss": 0.8469998836517334, + "step": 1210 + }, + { + "epoch": 0.426784140969163, + "grad_norm": 1.5383443322846213, + "learning_rate": 1.857953180558898e-05, + "loss": 0.7562716007232666, + "step": 1211 + }, + { + "epoch": 0.427136563876652, + "grad_norm": 1.4113837543209433, + "learning_rate": 1.857653716748757e-05, + "loss": 0.7166177034378052, + "step": 1212 + }, + { + "epoch": 0.427488986784141, + "grad_norm": 1.5418199345701933, + "learning_rate": 1.85735396179187e-05, + "loss": 0.6946159601211548, + "step": 1213 + }, + { + "epoch": 0.42784140969163, + "grad_norm": 1.317478160039542, + "learning_rate": 1.8570539157899953e-05, + "loss": 0.5341482758522034, + "step": 1214 + }, + { + "epoch": 0.4281938325991189, + "grad_norm": 1.4287482623115888, + "learning_rate": 1.8567535788449886e-05, + "loss": 0.8128249645233154, + "step": 1215 + }, + { + "epoch": 0.4285462555066079, + "grad_norm": 1.34325298688053, + "learning_rate": 1.8564529510588046e-05, + "loss": 0.7136335372924805, + "step": 1216 + }, + { + "epoch": 0.4288986784140969, + "grad_norm": 1.358163949395023, + "learning_rate": 1.856152032533498e-05, + "loss": 0.6737562417984009, + "step": 1217 + }, + { + "epoch": 0.4292511013215859, + "grad_norm": 1.306172251281951, + "learning_rate": 1.855850823371221e-05, + "loss": 0.8102772235870361, + "step": 1218 + }, + { + "epoch": 0.4296035242290749, + "grad_norm": 1.4109010281873726, + "learning_rate": 1.855549323674224e-05, + "loss": 0.7389130592346191, + "step": 1219 + }, + { + "epoch": 0.4299559471365639, + "grad_norm": 1.6519920374913426, + "learning_rate": 1.8552475335448575e-05, + "loss": 0.9127305746078491, + "step": 1220 + }, + { + "epoch": 0.43030837004405287, + "grad_norm": 1.4401162301668198, + "learning_rate": 1.8549454530855697e-05, + "loss": 0.7599691152572632, + "step": 1221 + }, + { + "epoch": 0.43066079295154186, + "grad_norm": 1.59834239528244, + "learning_rate": 1.8546430823989075e-05, + "loss": 0.8343819379806519, + "step": 1222 + }, + { + "epoch": 0.43101321585903085, + "grad_norm": 1.7081796080725813, + "learning_rate": 1.8543404215875163e-05, + "loss": 0.7759256362915039, + "step": 1223 + }, + { + "epoch": 0.43136563876651984, + "grad_norm": 1.3364188660639875, + "learning_rate": 1.8540374707541398e-05, + "loss": 0.7803373336791992, + "step": 1224 + }, + { + "epoch": 0.43171806167400884, + "grad_norm": 1.4538494145578122, + "learning_rate": 1.8537342300016208e-05, + "loss": 0.6292921304702759, + "step": 1225 + }, + { + "epoch": 0.43207048458149777, + "grad_norm": 1.4521641959343445, + "learning_rate": 1.8534306994329e-05, + "loss": 0.8495175838470459, + "step": 1226 + }, + { + "epoch": 0.43242290748898676, + "grad_norm": 1.3062742481146943, + "learning_rate": 1.8531268791510167e-05, + "loss": 0.6141406297683716, + "step": 1227 + }, + { + "epoch": 0.43277533039647575, + "grad_norm": 1.576341879030456, + "learning_rate": 1.8528227692591076e-05, + "loss": 0.7087793350219727, + "step": 1228 + }, + { + "epoch": 0.43312775330396475, + "grad_norm": 1.5442094308389636, + "learning_rate": 1.8525183698604098e-05, + "loss": 0.7919498682022095, + "step": 1229 + }, + { + "epoch": 0.43348017621145374, + "grad_norm": 1.317139155945084, + "learning_rate": 1.8522136810582563e-05, + "loss": 0.7408226728439331, + "step": 1230 + }, + { + "epoch": 0.43383259911894273, + "grad_norm": 1.407715848952146, + "learning_rate": 1.85190870295608e-05, + "loss": 0.7140083312988281, + "step": 1231 + }, + { + "epoch": 0.4341850220264317, + "grad_norm": 1.4117801977693214, + "learning_rate": 1.8516034356574118e-05, + "loss": 0.7211521863937378, + "step": 1232 + }, + { + "epoch": 0.4345374449339207, + "grad_norm": 1.1753876244240768, + "learning_rate": 1.85129787926588e-05, + "loss": 0.7103208303451538, + "step": 1233 + }, + { + "epoch": 0.4348898678414097, + "grad_norm": 1.4479636604064312, + "learning_rate": 1.850992033885211e-05, + "loss": 0.816985011100769, + "step": 1234 + }, + { + "epoch": 0.4352422907488987, + "grad_norm": 1.4368000528699751, + "learning_rate": 1.850685899619231e-05, + "loss": 0.6678498983383179, + "step": 1235 + }, + { + "epoch": 0.4355947136563877, + "grad_norm": 1.4259303259837681, + "learning_rate": 1.8503794765718622e-05, + "loss": 0.7895394563674927, + "step": 1236 + }, + { + "epoch": 0.4359471365638766, + "grad_norm": 1.4256180200365283, + "learning_rate": 1.8500727648471258e-05, + "loss": 0.7295971512794495, + "step": 1237 + }, + { + "epoch": 0.4362995594713656, + "grad_norm": 1.552299015894991, + "learning_rate": 1.849765764549141e-05, + "loss": 0.7216300964355469, + "step": 1238 + }, + { + "epoch": 0.4366519823788546, + "grad_norm": 2.585430848560662, + "learning_rate": 1.8494584757821252e-05, + "loss": 0.8088986873626709, + "step": 1239 + }, + { + "epoch": 0.4370044052863436, + "grad_norm": 1.3100612400703413, + "learning_rate": 1.8491508986503928e-05, + "loss": 0.7380663156509399, + "step": 1240 + }, + { + "epoch": 0.4373568281938326, + "grad_norm": 1.6225248085666293, + "learning_rate": 1.8488430332583566e-05, + "loss": 0.8671622276306152, + "step": 1241 + }, + { + "epoch": 0.4377092511013216, + "grad_norm": 1.2548349586148027, + "learning_rate": 1.8485348797105277e-05, + "loss": 0.6649274826049805, + "step": 1242 + }, + { + "epoch": 0.4380616740088106, + "grad_norm": 1.3492988450242405, + "learning_rate": 1.848226438111515e-05, + "loss": 0.740972638130188, + "step": 1243 + }, + { + "epoch": 0.43841409691629957, + "grad_norm": 1.4062352938849376, + "learning_rate": 1.8479177085660237e-05, + "loss": 0.6593915820121765, + "step": 1244 + }, + { + "epoch": 0.43876651982378856, + "grad_norm": 1.567811244473075, + "learning_rate": 1.8476086911788588e-05, + "loss": 0.792604923248291, + "step": 1245 + }, + { + "epoch": 0.43911894273127755, + "grad_norm": 1.583820790059346, + "learning_rate": 1.8472993860549216e-05, + "loss": 0.7521885633468628, + "step": 1246 + }, + { + "epoch": 0.43947136563876654, + "grad_norm": 1.4520072830804587, + "learning_rate": 1.846989793299212e-05, + "loss": 0.7242246270179749, + "step": 1247 + }, + { + "epoch": 0.43982378854625553, + "grad_norm": 1.2892821056189339, + "learning_rate": 1.846679913016827e-05, + "loss": 0.7343394160270691, + "step": 1248 + }, + { + "epoch": 0.44017621145374447, + "grad_norm": 1.2525729631593605, + "learning_rate": 1.846369745312961e-05, + "loss": 0.747876763343811, + "step": 1249 + }, + { + "epoch": 0.44052863436123346, + "grad_norm": 1.428983542355963, + "learning_rate": 1.8460592902929064e-05, + "loss": 0.7280946969985962, + "step": 1250 + }, + { + "epoch": 0.44088105726872245, + "grad_norm": 1.4254243168735732, + "learning_rate": 1.845748548062053e-05, + "loss": 0.7288519144058228, + "step": 1251 + }, + { + "epoch": 0.44123348017621145, + "grad_norm": 1.4847519735948493, + "learning_rate": 1.8454375187258885e-05, + "loss": 0.6269914507865906, + "step": 1252 + }, + { + "epoch": 0.44158590308370044, + "grad_norm": 1.5355271633317282, + "learning_rate": 1.8451262023899973e-05, + "loss": 0.7848949432373047, + "step": 1253 + }, + { + "epoch": 0.44193832599118943, + "grad_norm": 1.580356922946946, + "learning_rate": 1.8448145991600618e-05, + "loss": 0.7306517958641052, + "step": 1254 + }, + { + "epoch": 0.4422907488986784, + "grad_norm": 1.3971874565683924, + "learning_rate": 1.8445027091418614e-05, + "loss": 0.6933906078338623, + "step": 1255 + }, + { + "epoch": 0.4426431718061674, + "grad_norm": 1.2942221540854206, + "learning_rate": 1.8441905324412732e-05, + "loss": 0.8260579109191895, + "step": 1256 + }, + { + "epoch": 0.4429955947136564, + "grad_norm": 1.4276139754434451, + "learning_rate": 1.8438780691642712e-05, + "loss": 0.6818344593048096, + "step": 1257 + }, + { + "epoch": 0.4433480176211454, + "grad_norm": 1.5571344695334373, + "learning_rate": 1.8435653194169274e-05, + "loss": 0.5980014801025391, + "step": 1258 + }, + { + "epoch": 0.4437004405286344, + "grad_norm": 1.6363647319534165, + "learning_rate": 1.8432522833054102e-05, + "loss": 0.7694655656814575, + "step": 1259 + }, + { + "epoch": 0.4440528634361233, + "grad_norm": 1.4888452953161495, + "learning_rate": 1.842938960935986e-05, + "loss": 0.6861646771430969, + "step": 1260 + }, + { + "epoch": 0.4444052863436123, + "grad_norm": 1.5245731543783476, + "learning_rate": 1.8426253524150176e-05, + "loss": 0.7346323728561401, + "step": 1261 + }, + { + "epoch": 0.4447577092511013, + "grad_norm": 1.5555183873270297, + "learning_rate": 1.8423114578489657e-05, + "loss": 0.7116265296936035, + "step": 1262 + }, + { + "epoch": 0.4451101321585903, + "grad_norm": 1.3587295641859045, + "learning_rate": 1.8419972773443877e-05, + "loss": 0.7148594856262207, + "step": 1263 + }, + { + "epoch": 0.4454625550660793, + "grad_norm": 1.4208610042885819, + "learning_rate": 1.8416828110079378e-05, + "loss": 0.6629737615585327, + "step": 1264 + }, + { + "epoch": 0.4458149779735683, + "grad_norm": 1.2215430932959532, + "learning_rate": 1.8413680589463677e-05, + "loss": 0.5734454393386841, + "step": 1265 + }, + { + "epoch": 0.4461674008810573, + "grad_norm": 1.4728067026699625, + "learning_rate": 1.8410530212665258e-05, + "loss": 0.8129212260246277, + "step": 1266 + }, + { + "epoch": 0.44651982378854627, + "grad_norm": 1.5823039225136746, + "learning_rate": 1.8407376980753578e-05, + "loss": 0.7408754825592041, + "step": 1267 + }, + { + "epoch": 0.44687224669603526, + "grad_norm": 2.9520848026313633, + "learning_rate": 1.840422089479906e-05, + "loss": 0.7315034866333008, + "step": 1268 + }, + { + "epoch": 0.44722466960352425, + "grad_norm": 1.453693040198655, + "learning_rate": 1.8401061955873102e-05, + "loss": 0.6774684190750122, + "step": 1269 + }, + { + "epoch": 0.44757709251101324, + "grad_norm": 1.4189733125983666, + "learning_rate": 1.8397900165048055e-05, + "loss": 0.6615294814109802, + "step": 1270 + }, + { + "epoch": 0.4479295154185022, + "grad_norm": 1.465563156151872, + "learning_rate": 1.8394735523397258e-05, + "loss": 0.6757136583328247, + "step": 1271 + }, + { + "epoch": 0.44828193832599117, + "grad_norm": 1.3581337883847424, + "learning_rate": 1.8391568031995004e-05, + "loss": 0.6395466327667236, + "step": 1272 + }, + { + "epoch": 0.44863436123348016, + "grad_norm": 1.3957918327614203, + "learning_rate": 1.8388397691916556e-05, + "loss": 0.6436404585838318, + "step": 1273 + }, + { + "epoch": 0.44898678414096915, + "grad_norm": 1.2217258095016672, + "learning_rate": 1.838522450423815e-05, + "loss": 0.6280484199523926, + "step": 1274 + }, + { + "epoch": 0.44933920704845814, + "grad_norm": 1.3831470857016404, + "learning_rate": 1.8382048470036983e-05, + "loss": 0.7485225200653076, + "step": 1275 + }, + { + "epoch": 0.44969162995594714, + "grad_norm": 1.5437699808102354, + "learning_rate": 1.8378869590391217e-05, + "loss": 0.745079517364502, + "step": 1276 + }, + { + "epoch": 0.45004405286343613, + "grad_norm": 1.5902187054867891, + "learning_rate": 1.8375687866379988e-05, + "loss": 0.656510591506958, + "step": 1277 + }, + { + "epoch": 0.4503964757709251, + "grad_norm": 1.542738255105748, + "learning_rate": 1.8372503299083392e-05, + "loss": 0.7122445106506348, + "step": 1278 + }, + { + "epoch": 0.4507488986784141, + "grad_norm": 1.5368544285826038, + "learning_rate": 1.8369315889582483e-05, + "loss": 0.7402621507644653, + "step": 1279 + }, + { + "epoch": 0.4511013215859031, + "grad_norm": 1.539047411882563, + "learning_rate": 1.8366125638959292e-05, + "loss": 0.79311203956604, + "step": 1280 + }, + { + "epoch": 0.4514537444933921, + "grad_norm": 1.5615114889746888, + "learning_rate": 1.8362932548296815e-05, + "loss": 0.7748456001281738, + "step": 1281 + }, + { + "epoch": 0.45180616740088103, + "grad_norm": 1.4203050333533118, + "learning_rate": 1.8359736618679e-05, + "loss": 0.8285728096961975, + "step": 1282 + }, + { + "epoch": 0.45215859030837, + "grad_norm": 1.5541412727714081, + "learning_rate": 1.835653785119076e-05, + "loss": 0.7874733209609985, + "step": 1283 + }, + { + "epoch": 0.452511013215859, + "grad_norm": 1.990742110424804, + "learning_rate": 1.8353336246917996e-05, + "loss": 0.8984566926956177, + "step": 1284 + }, + { + "epoch": 0.452863436123348, + "grad_norm": 1.5779572276747513, + "learning_rate": 1.8350131806947537e-05, + "loss": 0.7730413675308228, + "step": 1285 + }, + { + "epoch": 0.453215859030837, + "grad_norm": 1.7109096071986905, + "learning_rate": 1.8346924532367195e-05, + "loss": 0.6064612865447998, + "step": 1286 + }, + { + "epoch": 0.453568281938326, + "grad_norm": 1.2417304411100711, + "learning_rate": 1.8343714424265742e-05, + "loss": 0.6946402192115784, + "step": 1287 + }, + { + "epoch": 0.453920704845815, + "grad_norm": 1.4035686433407615, + "learning_rate": 1.8340501483732908e-05, + "loss": 0.6131751537322998, + "step": 1288 + }, + { + "epoch": 0.454273127753304, + "grad_norm": 1.5800587203565855, + "learning_rate": 1.833728571185938e-05, + "loss": 0.7251182794570923, + "step": 1289 + }, + { + "epoch": 0.45462555066079297, + "grad_norm": 1.4036983560957499, + "learning_rate": 1.8334067109736826e-05, + "loss": 0.6548069715499878, + "step": 1290 + }, + { + "epoch": 0.45497797356828196, + "grad_norm": 1.3998869795024185, + "learning_rate": 1.833084567845785e-05, + "loss": 0.7416098117828369, + "step": 1291 + }, + { + "epoch": 0.45533039647577095, + "grad_norm": 2.508404695128388, + "learning_rate": 1.8327621419116034e-05, + "loss": 0.7320964932441711, + "step": 1292 + }, + { + "epoch": 0.4556828193832599, + "grad_norm": 1.3052290617356537, + "learning_rate": 1.8324394332805913e-05, + "loss": 0.5926196575164795, + "step": 1293 + }, + { + "epoch": 0.4560352422907489, + "grad_norm": 1.5674410721277312, + "learning_rate": 1.8321164420622977e-05, + "loss": 0.5294085741043091, + "step": 1294 + }, + { + "epoch": 0.45638766519823787, + "grad_norm": 1.2785938430138426, + "learning_rate": 1.8317931683663688e-05, + "loss": 0.6332723498344421, + "step": 1295 + }, + { + "epoch": 0.45674008810572686, + "grad_norm": 1.5962686180302166, + "learning_rate": 1.8314696123025456e-05, + "loss": 0.8361148834228516, + "step": 1296 + }, + { + "epoch": 0.45709251101321585, + "grad_norm": 1.4587382180744954, + "learning_rate": 1.8311457739806648e-05, + "loss": 0.8097354173660278, + "step": 1297 + }, + { + "epoch": 0.45744493392070484, + "grad_norm": 1.5247898400944095, + "learning_rate": 1.8308216535106606e-05, + "loss": 0.8619102239608765, + "step": 1298 + }, + { + "epoch": 0.45779735682819384, + "grad_norm": 1.7222438621078806, + "learning_rate": 1.8304972510025607e-05, + "loss": 0.8149014711380005, + "step": 1299 + }, + { + "epoch": 0.4581497797356828, + "grad_norm": 1.4821216839710079, + "learning_rate": 1.8301725665664904e-05, + "loss": 0.6217210292816162, + "step": 1300 + }, + { + "epoch": 0.4585022026431718, + "grad_norm": 1.3606031472973286, + "learning_rate": 1.8298476003126695e-05, + "loss": 0.7496612071990967, + "step": 1301 + }, + { + "epoch": 0.4588546255506608, + "grad_norm": 1.3221676149271377, + "learning_rate": 1.8295223523514144e-05, + "loss": 0.743242084980011, + "step": 1302 + }, + { + "epoch": 0.4592070484581498, + "grad_norm": 1.3745674408132749, + "learning_rate": 1.829196822793136e-05, + "loss": 0.6425061821937561, + "step": 1303 + }, + { + "epoch": 0.4595594713656388, + "grad_norm": 1.6216951689157317, + "learning_rate": 1.828871011748342e-05, + "loss": 0.8274835348129272, + "step": 1304 + }, + { + "epoch": 0.45991189427312773, + "grad_norm": 1.2722833909738493, + "learning_rate": 1.828544919327635e-05, + "loss": 0.6403865814208984, + "step": 1305 + }, + { + "epoch": 0.4602643171806167, + "grad_norm": 1.234115960449283, + "learning_rate": 1.828218545641713e-05, + "loss": 0.6585257053375244, + "step": 1306 + }, + { + "epoch": 0.4606167400881057, + "grad_norm": 1.2325421263478973, + "learning_rate": 1.82789189080137e-05, + "loss": 0.6467862129211426, + "step": 1307 + }, + { + "epoch": 0.4609691629955947, + "grad_norm": 1.403654297681647, + "learning_rate": 1.827564954917495e-05, + "loss": 0.8656524419784546, + "step": 1308 + }, + { + "epoch": 0.4613215859030837, + "grad_norm": 1.449712147167455, + "learning_rate": 1.8272377381010726e-05, + "loss": 0.7298469543457031, + "step": 1309 + }, + { + "epoch": 0.4616740088105727, + "grad_norm": 1.575558340533703, + "learning_rate": 1.8269102404631826e-05, + "loss": 0.7342871427536011, + "step": 1310 + }, + { + "epoch": 0.4620264317180617, + "grad_norm": 1.4177026442874099, + "learning_rate": 1.8265824621150005e-05, + "loss": 0.7437269687652588, + "step": 1311 + }, + { + "epoch": 0.4623788546255507, + "grad_norm": 1.370008690924395, + "learning_rate": 1.8262544031677965e-05, + "loss": 0.6761496067047119, + "step": 1312 + }, + { + "epoch": 0.46273127753303966, + "grad_norm": 1.3488719018465838, + "learning_rate": 1.825926063732937e-05, + "loss": 0.6504565477371216, + "step": 1313 + }, + { + "epoch": 0.46308370044052866, + "grad_norm": 1.5002490307110308, + "learning_rate": 1.8255974439218826e-05, + "loss": 0.7058892250061035, + "step": 1314 + }, + { + "epoch": 0.46343612334801765, + "grad_norm": 1.37061056314256, + "learning_rate": 1.8252685438461893e-05, + "loss": 0.704500675201416, + "step": 1315 + }, + { + "epoch": 0.4637885462555066, + "grad_norm": 1.3921050444029468, + "learning_rate": 1.824939363617509e-05, + "loss": 0.7438445091247559, + "step": 1316 + }, + { + "epoch": 0.4641409691629956, + "grad_norm": 1.4372002500080507, + "learning_rate": 1.8246099033475872e-05, + "loss": 0.6610915660858154, + "step": 1317 + }, + { + "epoch": 0.46449339207048457, + "grad_norm": 1.0745723869419856, + "learning_rate": 1.8242801631482666e-05, + "loss": 0.5868711471557617, + "step": 1318 + }, + { + "epoch": 0.46484581497797356, + "grad_norm": 1.192238188456442, + "learning_rate": 1.8239501431314828e-05, + "loss": 0.7403215765953064, + "step": 1319 + }, + { + "epoch": 0.46519823788546255, + "grad_norm": 1.2444894883495399, + "learning_rate": 1.823619843409268e-05, + "loss": 0.6836927533149719, + "step": 1320 + }, + { + "epoch": 0.46555066079295154, + "grad_norm": 1.4619703465719247, + "learning_rate": 1.8232892640937482e-05, + "loss": 0.744488537311554, + "step": 1321 + }, + { + "epoch": 0.46590308370044053, + "grad_norm": 1.6337099192848834, + "learning_rate": 1.822958405297145e-05, + "loss": 0.8203051090240479, + "step": 1322 + }, + { + "epoch": 0.4662555066079295, + "grad_norm": 1.184261838198034, + "learning_rate": 1.8226272671317747e-05, + "loss": 0.6452913284301758, + "step": 1323 + }, + { + "epoch": 0.4666079295154185, + "grad_norm": 1.6458345614686154, + "learning_rate": 1.8222958497100482e-05, + "loss": 0.7362639307975769, + "step": 1324 + }, + { + "epoch": 0.4669603524229075, + "grad_norm": 6.608293048647877, + "learning_rate": 1.8219641531444713e-05, + "loss": 0.8192600011825562, + "step": 1325 + }, + { + "epoch": 0.4673127753303965, + "grad_norm": 1.4257376230679313, + "learning_rate": 1.8216321775476452e-05, + "loss": 0.8391410112380981, + "step": 1326 + }, + { + "epoch": 0.46766519823788544, + "grad_norm": 1.3133795307817668, + "learning_rate": 1.8212999230322648e-05, + "loss": 0.8723593354225159, + "step": 1327 + }, + { + "epoch": 0.46801762114537443, + "grad_norm": 1.4218119484201381, + "learning_rate": 1.8209673897111208e-05, + "loss": 0.6891233921051025, + "step": 1328 + }, + { + "epoch": 0.4683700440528634, + "grad_norm": 1.414801660380672, + "learning_rate": 1.820634577697097e-05, + "loss": 0.6585180759429932, + "step": 1329 + }, + { + "epoch": 0.4687224669603524, + "grad_norm": 1.503205293925671, + "learning_rate": 1.8203014871031732e-05, + "loss": 0.9556418657302856, + "step": 1330 + }, + { + "epoch": 0.4690748898678414, + "grad_norm": 1.491345239113851, + "learning_rate": 1.8199681180424234e-05, + "loss": 0.803380012512207, + "step": 1331 + }, + { + "epoch": 0.4694273127753304, + "grad_norm": 1.6217603270172032, + "learning_rate": 1.819634470628016e-05, + "loss": 0.7090115547180176, + "step": 1332 + }, + { + "epoch": 0.4697797356828194, + "grad_norm": 1.6705712009535991, + "learning_rate": 1.8193005449732134e-05, + "loss": 0.6314720511436462, + "step": 1333 + }, + { + "epoch": 0.4701321585903084, + "grad_norm": 1.4756439095691731, + "learning_rate": 1.8189663411913737e-05, + "loss": 0.72248375415802, + "step": 1334 + }, + { + "epoch": 0.47048458149779737, + "grad_norm": 1.2477075880097683, + "learning_rate": 1.818631859395948e-05, + "loss": 0.6192474961280823, + "step": 1335 + }, + { + "epoch": 0.47083700440528636, + "grad_norm": 1.4944381119847567, + "learning_rate": 1.818297099700483e-05, + "loss": 0.6354564428329468, + "step": 1336 + }, + { + "epoch": 0.47118942731277536, + "grad_norm": 1.3129251382794922, + "learning_rate": 1.817962062218619e-05, + "loss": 0.7577195167541504, + "step": 1337 + }, + { + "epoch": 0.4715418502202643, + "grad_norm": 1.5504293722974503, + "learning_rate": 1.8176267470640908e-05, + "loss": 0.8064994812011719, + "step": 1338 + }, + { + "epoch": 0.4718942731277533, + "grad_norm": 1.211182925950848, + "learning_rate": 1.8172911543507276e-05, + "loss": 0.5994154214859009, + "step": 1339 + }, + { + "epoch": 0.4722466960352423, + "grad_norm": 1.701641381957404, + "learning_rate": 1.8169552841924524e-05, + "loss": 0.7483634948730469, + "step": 1340 + }, + { + "epoch": 0.47259911894273127, + "grad_norm": 1.5346948984560977, + "learning_rate": 1.8166191367032828e-05, + "loss": 0.817699134349823, + "step": 1341 + }, + { + "epoch": 0.47295154185022026, + "grad_norm": 1.4634504483386954, + "learning_rate": 1.8162827119973305e-05, + "loss": 0.7262923717498779, + "step": 1342 + }, + { + "epoch": 0.47330396475770925, + "grad_norm": 1.6796646988667925, + "learning_rate": 1.8159460101888013e-05, + "loss": 0.6097851991653442, + "step": 1343 + }, + { + "epoch": 0.47365638766519824, + "grad_norm": 1.3148094915971675, + "learning_rate": 1.8156090313919944e-05, + "loss": 0.7258971929550171, + "step": 1344 + }, + { + "epoch": 0.47400881057268723, + "grad_norm": 1.198607169385478, + "learning_rate": 1.8152717757213045e-05, + "loss": 0.6300361156463623, + "step": 1345 + }, + { + "epoch": 0.4743612334801762, + "grad_norm": 1.397827708634256, + "learning_rate": 1.8149342432912184e-05, + "loss": 0.7339942455291748, + "step": 1346 + }, + { + "epoch": 0.4747136563876652, + "grad_norm": 1.4524082687419129, + "learning_rate": 1.8145964342163188e-05, + "loss": 0.7520095109939575, + "step": 1347 + }, + { + "epoch": 0.4750660792951542, + "grad_norm": 1.6587168399408485, + "learning_rate": 1.814258348611281e-05, + "loss": 0.7276853322982788, + "step": 1348 + }, + { + "epoch": 0.47541850220264315, + "grad_norm": 1.4463166573664321, + "learning_rate": 1.8139199865908742e-05, + "loss": 0.8004029989242554, + "step": 1349 + }, + { + "epoch": 0.47577092511013214, + "grad_norm": 1.4508723815154267, + "learning_rate": 1.8135813482699623e-05, + "loss": 0.6932536363601685, + "step": 1350 + }, + { + "epoch": 0.47612334801762113, + "grad_norm": 1.8868515127553653, + "learning_rate": 1.8132424337635026e-05, + "loss": 0.7697082161903381, + "step": 1351 + }, + { + "epoch": 0.4764757709251101, + "grad_norm": 1.246718000700102, + "learning_rate": 1.8129032431865453e-05, + "loss": 0.6472513675689697, + "step": 1352 + }, + { + "epoch": 0.4768281938325991, + "grad_norm": 1.413046013449196, + "learning_rate": 1.8125637766542353e-05, + "loss": 0.6483110785484314, + "step": 1353 + }, + { + "epoch": 0.4771806167400881, + "grad_norm": 1.4854860856809686, + "learning_rate": 1.8122240342818113e-05, + "loss": 0.5495485067367554, + "step": 1354 + }, + { + "epoch": 0.4775330396475771, + "grad_norm": 1.2801602602197804, + "learning_rate": 1.811884016184605e-05, + "loss": 0.5235577821731567, + "step": 1355 + }, + { + "epoch": 0.4778854625550661, + "grad_norm": 1.734412256759482, + "learning_rate": 1.811543722478042e-05, + "loss": 0.7852121591567993, + "step": 1356 + }, + { + "epoch": 0.4782378854625551, + "grad_norm": 1.3650060645350073, + "learning_rate": 1.811203153277641e-05, + "loss": 0.6704862713813782, + "step": 1357 + }, + { + "epoch": 0.47859030837004407, + "grad_norm": 1.6553040991032588, + "learning_rate": 1.8108623086990156e-05, + "loss": 0.5964453220367432, + "step": 1358 + }, + { + "epoch": 0.47894273127753306, + "grad_norm": 1.3936312619950861, + "learning_rate": 1.8105211888578708e-05, + "loss": 0.6697995066642761, + "step": 1359 + }, + { + "epoch": 0.479295154185022, + "grad_norm": 1.5031130965144783, + "learning_rate": 1.810179793870007e-05, + "loss": 0.6335821151733398, + "step": 1360 + }, + { + "epoch": 0.479647577092511, + "grad_norm": 1.5635708705560234, + "learning_rate": 1.8098381238513173e-05, + "loss": 0.7925145626068115, + "step": 1361 + }, + { + "epoch": 0.48, + "grad_norm": 1.3011545804458011, + "learning_rate": 1.809496178917787e-05, + "loss": 0.6567563414573669, + "step": 1362 + }, + { + "epoch": 0.480352422907489, + "grad_norm": 1.6816341182204335, + "learning_rate": 1.809153959185497e-05, + "loss": 0.6318811178207397, + "step": 1363 + }, + { + "epoch": 0.48070484581497797, + "grad_norm": 1.6781349693525882, + "learning_rate": 1.8088114647706195e-05, + "loss": 0.7309727668762207, + "step": 1364 + }, + { + "epoch": 0.48105726872246696, + "grad_norm": 1.689289351270497, + "learning_rate": 1.8084686957894207e-05, + "loss": 0.7109836339950562, + "step": 1365 + }, + { + "epoch": 0.48140969162995595, + "grad_norm": 1.5638040238741844, + "learning_rate": 1.8081256523582604e-05, + "loss": 0.7475707530975342, + "step": 1366 + }, + { + "epoch": 0.48176211453744494, + "grad_norm": 1.492251829838995, + "learning_rate": 1.8077823345935904e-05, + "loss": 0.7149914503097534, + "step": 1367 + }, + { + "epoch": 0.48211453744493393, + "grad_norm": 1.5575297411632822, + "learning_rate": 1.8074387426119574e-05, + "loss": 0.7294478416442871, + "step": 1368 + }, + { + "epoch": 0.4824669603524229, + "grad_norm": 1.4689289799329066, + "learning_rate": 1.8070948765299995e-05, + "loss": 0.7115635871887207, + "step": 1369 + }, + { + "epoch": 0.4828193832599119, + "grad_norm": 1.5506146763507274, + "learning_rate": 1.806750736464449e-05, + "loss": 0.7046270966529846, + "step": 1370 + }, + { + "epoch": 0.4831718061674009, + "grad_norm": 1.42427078791196, + "learning_rate": 1.8064063225321305e-05, + "loss": 0.6206589937210083, + "step": 1371 + }, + { + "epoch": 0.48352422907488984, + "grad_norm": 1.2993997688945442, + "learning_rate": 1.8060616348499612e-05, + "loss": 0.7135940194129944, + "step": 1372 + }, + { + "epoch": 0.48387665198237884, + "grad_norm": 1.4643276104475023, + "learning_rate": 1.8057166735349533e-05, + "loss": 0.8360849618911743, + "step": 1373 + }, + { + "epoch": 0.4842290748898678, + "grad_norm": 1.541707883618089, + "learning_rate": 1.805371438704209e-05, + "loss": 0.6842360496520996, + "step": 1374 + }, + { + "epoch": 0.4845814977973568, + "grad_norm": 1.5840103891509227, + "learning_rate": 1.8050259304749254e-05, + "loss": 0.7615031003952026, + "step": 1375 + }, + { + "epoch": 0.4849339207048458, + "grad_norm": 1.655426139564667, + "learning_rate": 1.804680148964392e-05, + "loss": 0.8019323348999023, + "step": 1376 + }, + { + "epoch": 0.4852863436123348, + "grad_norm": 1.4674909380859245, + "learning_rate": 1.8043340942899906e-05, + "loss": 0.7882958054542542, + "step": 1377 + }, + { + "epoch": 0.4856387665198238, + "grad_norm": 1.2708485815687132, + "learning_rate": 1.8039877665691955e-05, + "loss": 0.7504314184188843, + "step": 1378 + }, + { + "epoch": 0.4859911894273128, + "grad_norm": 1.4258268452315883, + "learning_rate": 1.803641165919575e-05, + "loss": 0.6634547710418701, + "step": 1379 + }, + { + "epoch": 0.4863436123348018, + "grad_norm": 1.5654330696713128, + "learning_rate": 1.803294292458789e-05, + "loss": 0.7744965553283691, + "step": 1380 + }, + { + "epoch": 0.48669603524229077, + "grad_norm": 1.4537322254817193, + "learning_rate": 1.8029471463045904e-05, + "loss": 0.6322098970413208, + "step": 1381 + }, + { + "epoch": 0.48704845814977976, + "grad_norm": 1.4410033770501562, + "learning_rate": 1.8025997275748237e-05, + "loss": 0.7675940990447998, + "step": 1382 + }, + { + "epoch": 0.4874008810572687, + "grad_norm": 1.4138967124963124, + "learning_rate": 1.8022520363874275e-05, + "loss": 0.805001974105835, + "step": 1383 + }, + { + "epoch": 0.4877533039647577, + "grad_norm": 1.4447058519334661, + "learning_rate": 1.8019040728604322e-05, + "loss": 0.7647902369499207, + "step": 1384 + }, + { + "epoch": 0.4881057268722467, + "grad_norm": 1.4676048165311881, + "learning_rate": 1.8015558371119604e-05, + "loss": 0.7267208099365234, + "step": 1385 + }, + { + "epoch": 0.4884581497797357, + "grad_norm": 1.698344162431053, + "learning_rate": 1.801207329260227e-05, + "loss": 0.9259899854660034, + "step": 1386 + }, + { + "epoch": 0.48881057268722466, + "grad_norm": 1.6007666753359713, + "learning_rate": 1.8008585494235398e-05, + "loss": 0.7127895951271057, + "step": 1387 + }, + { + "epoch": 0.48916299559471366, + "grad_norm": 1.3612199688450533, + "learning_rate": 1.8005094977202987e-05, + "loss": 0.5890867710113525, + "step": 1388 + }, + { + "epoch": 0.48951541850220265, + "grad_norm": 1.355680060820382, + "learning_rate": 1.800160174268996e-05, + "loss": 0.9388052225112915, + "step": 1389 + }, + { + "epoch": 0.48986784140969164, + "grad_norm": 1.3938222391852138, + "learning_rate": 1.799810579188216e-05, + "loss": 0.7282747626304626, + "step": 1390 + }, + { + "epoch": 0.49022026431718063, + "grad_norm": 1.3481077360000804, + "learning_rate": 1.7994607125966354e-05, + "loss": 0.743558943271637, + "step": 1391 + }, + { + "epoch": 0.4905726872246696, + "grad_norm": 1.5830453320245632, + "learning_rate": 1.7991105746130234e-05, + "loss": 0.794719934463501, + "step": 1392 + }, + { + "epoch": 0.4909251101321586, + "grad_norm": 1.2758935421604947, + "learning_rate": 1.7987601653562402e-05, + "loss": 0.7320685982704163, + "step": 1393 + }, + { + "epoch": 0.49127753303964755, + "grad_norm": 1.7642547814838838, + "learning_rate": 1.798409484945239e-05, + "loss": 0.7376105785369873, + "step": 1394 + }, + { + "epoch": 0.49162995594713654, + "grad_norm": 1.2029848235346605, + "learning_rate": 1.7980585334990652e-05, + "loss": 0.7474706172943115, + "step": 1395 + }, + { + "epoch": 0.49198237885462553, + "grad_norm": 1.2018884579546327, + "learning_rate": 1.797707311136856e-05, + "loss": 0.5799805521965027, + "step": 1396 + }, + { + "epoch": 0.4923348017621145, + "grad_norm": 1.4260726798049534, + "learning_rate": 1.79735581797784e-05, + "loss": 0.7515959739685059, + "step": 1397 + }, + { + "epoch": 0.4926872246696035, + "grad_norm": 1.4843732287701579, + "learning_rate": 1.797004054141339e-05, + "loss": 0.6035799980163574, + "step": 1398 + }, + { + "epoch": 0.4930396475770925, + "grad_norm": 1.4699634461145672, + "learning_rate": 1.796652019746765e-05, + "loss": 0.7613668441772461, + "step": 1399 + }, + { + "epoch": 0.4933920704845815, + "grad_norm": 1.5395256627563776, + "learning_rate": 1.7962997149136226e-05, + "loss": 0.8780882954597473, + "step": 1400 + }, + { + "epoch": 0.4937444933920705, + "grad_norm": 1.4849311758521768, + "learning_rate": 1.795947139761509e-05, + "loss": 0.8661091327667236, + "step": 1401 + }, + { + "epoch": 0.4940969162995595, + "grad_norm": 1.2531714361223334, + "learning_rate": 1.7955942944101124e-05, + "loss": 0.6893571019172668, + "step": 1402 + }, + { + "epoch": 0.4944493392070485, + "grad_norm": 1.4079915487364913, + "learning_rate": 1.7952411789792125e-05, + "loss": 0.787032961845398, + "step": 1403 + }, + { + "epoch": 0.49480176211453747, + "grad_norm": 1.3474472991478739, + "learning_rate": 1.7948877935886812e-05, + "loss": 0.5346347689628601, + "step": 1404 + }, + { + "epoch": 0.4951541850220264, + "grad_norm": 1.5512557601329955, + "learning_rate": 1.7945341383584818e-05, + "loss": 0.8090060949325562, + "step": 1405 + }, + { + "epoch": 0.4955066079295154, + "grad_norm": 1.4268796756971738, + "learning_rate": 1.7941802134086695e-05, + "loss": 0.6321496963500977, + "step": 1406 + }, + { + "epoch": 0.4958590308370044, + "grad_norm": 1.4602718850691796, + "learning_rate": 1.7938260188593903e-05, + "loss": 0.6405632495880127, + "step": 1407 + }, + { + "epoch": 0.4962114537444934, + "grad_norm": 1.3838752085896924, + "learning_rate": 1.7934715548308825e-05, + "loss": 0.7665356397628784, + "step": 1408 + }, + { + "epoch": 0.49656387665198237, + "grad_norm": 1.6983169415711221, + "learning_rate": 1.7931168214434757e-05, + "loss": 0.7960416078567505, + "step": 1409 + }, + { + "epoch": 0.49691629955947136, + "grad_norm": 1.3842449461014021, + "learning_rate": 1.7927618188175908e-05, + "loss": 0.8080639839172363, + "step": 1410 + }, + { + "epoch": 0.49726872246696036, + "grad_norm": 1.3034648934851016, + "learning_rate": 1.79240654707374e-05, + "loss": 0.6503266096115112, + "step": 1411 + }, + { + "epoch": 0.49762114537444935, + "grad_norm": 1.3378534420648176, + "learning_rate": 1.792051006332527e-05, + "loss": 0.6063007116317749, + "step": 1412 + }, + { + "epoch": 0.49797356828193834, + "grad_norm": 1.849150255820523, + "learning_rate": 1.791695196714647e-05, + "loss": 0.6861660480499268, + "step": 1413 + }, + { + "epoch": 0.49832599118942733, + "grad_norm": 1.2217791382902905, + "learning_rate": 1.791339118340886e-05, + "loss": 0.7064980268478394, + "step": 1414 + }, + { + "epoch": 0.4986784140969163, + "grad_norm": 1.4370359000865323, + "learning_rate": 1.7909827713321214e-05, + "loss": 0.6102496981620789, + "step": 1415 + }, + { + "epoch": 0.49903083700440526, + "grad_norm": 1.446734818664789, + "learning_rate": 1.790626155809323e-05, + "loss": 0.7460618019104004, + "step": 1416 + }, + { + "epoch": 0.49938325991189425, + "grad_norm": 1.2988677548719765, + "learning_rate": 1.7902692718935496e-05, + "loss": 0.7124448418617249, + "step": 1417 + }, + { + "epoch": 0.49973568281938324, + "grad_norm": 1.322744101240627, + "learning_rate": 1.7899121197059525e-05, + "loss": 0.7194923162460327, + "step": 1418 + }, + { + "epoch": 0.5000881057268722, + "grad_norm": 1.4429377947794157, + "learning_rate": 1.7895546993677736e-05, + "loss": 0.6633901596069336, + "step": 1419 + }, + { + "epoch": 0.5004405286343613, + "grad_norm": 1.5531583469807302, + "learning_rate": 1.7891970110003463e-05, + "loss": 0.8554216623306274, + "step": 1420 + }, + { + "epoch": 0.5007929515418502, + "grad_norm": 1.4541421669927512, + "learning_rate": 1.7888390547250944e-05, + "loss": 0.7259502410888672, + "step": 1421 + }, + { + "epoch": 0.5011453744493392, + "grad_norm": 1.4299229413313208, + "learning_rate": 1.788480830663533e-05, + "loss": 0.7330816984176636, + "step": 1422 + }, + { + "epoch": 0.5014977973568282, + "grad_norm": 1.5727227347094554, + "learning_rate": 1.7881223389372678e-05, + "loss": 0.7793391346931458, + "step": 1423 + }, + { + "epoch": 0.5018502202643171, + "grad_norm": 1.5101282054621992, + "learning_rate": 1.787763579667996e-05, + "loss": 0.7387483716011047, + "step": 1424 + }, + { + "epoch": 0.5022026431718062, + "grad_norm": 1.2875272836020812, + "learning_rate": 1.787404552977505e-05, + "loss": 0.6665850877761841, + "step": 1425 + }, + { + "epoch": 0.5025550660792951, + "grad_norm": 1.6443234538305773, + "learning_rate": 1.7870452589876733e-05, + "loss": 0.7487791180610657, + "step": 1426 + }, + { + "epoch": 0.5029074889867842, + "grad_norm": 1.5494170755115177, + "learning_rate": 1.78668569782047e-05, + "loss": 0.6048247814178467, + "step": 1427 + }, + { + "epoch": 0.5032599118942731, + "grad_norm": 1.2664597501734751, + "learning_rate": 1.786325869597955e-05, + "loss": 0.7196261882781982, + "step": 1428 + }, + { + "epoch": 0.5036123348017622, + "grad_norm": 1.8296774166979555, + "learning_rate": 1.785965774442278e-05, + "loss": 0.6845135688781738, + "step": 1429 + }, + { + "epoch": 0.5039647577092511, + "grad_norm": 1.4157663102240734, + "learning_rate": 1.785605412475681e-05, + "loss": 0.7314398288726807, + "step": 1430 + }, + { + "epoch": 0.5043171806167401, + "grad_norm": 1.4666969447710358, + "learning_rate": 1.7852447838204957e-05, + "loss": 0.7171268463134766, + "step": 1431 + }, + { + "epoch": 0.5046696035242291, + "grad_norm": 1.33657009662446, + "learning_rate": 1.784883888599144e-05, + "loss": 0.8349916338920593, + "step": 1432 + }, + { + "epoch": 0.505022026431718, + "grad_norm": 1.3501942895276628, + "learning_rate": 1.7845227269341387e-05, + "loss": 0.6375530958175659, + "step": 1433 + }, + { + "epoch": 0.505374449339207, + "grad_norm": 1.42707048545369, + "learning_rate": 1.7841612989480824e-05, + "loss": 0.8156824707984924, + "step": 1434 + }, + { + "epoch": 0.505726872246696, + "grad_norm": 1.4408580248696123, + "learning_rate": 1.7837996047636696e-05, + "loss": 0.7186283469200134, + "step": 1435 + }, + { + "epoch": 0.506079295154185, + "grad_norm": 1.3439268630529597, + "learning_rate": 1.7834376445036834e-05, + "loss": 0.6130756139755249, + "step": 1436 + }, + { + "epoch": 0.506431718061674, + "grad_norm": 1.5285659623162418, + "learning_rate": 1.7830754182909985e-05, + "loss": 0.6948508024215698, + "step": 1437 + }, + { + "epoch": 0.506784140969163, + "grad_norm": 1.3759453423428971, + "learning_rate": 1.7827129262485793e-05, + "loss": 0.7049688100814819, + "step": 1438 + }, + { + "epoch": 0.507136563876652, + "grad_norm": 1.457151343686531, + "learning_rate": 1.7823501684994805e-05, + "loss": 0.7491527795791626, + "step": 1439 + }, + { + "epoch": 0.507488986784141, + "grad_norm": 1.6101324796455516, + "learning_rate": 1.781987145166847e-05, + "loss": 0.8718780279159546, + "step": 1440 + }, + { + "epoch": 0.5078414096916299, + "grad_norm": 1.2572878912363772, + "learning_rate": 1.7816238563739144e-05, + "loss": 0.5675592422485352, + "step": 1441 + }, + { + "epoch": 0.508193832599119, + "grad_norm": 1.4044509323540495, + "learning_rate": 1.7812603022440076e-05, + "loss": 0.7472085952758789, + "step": 1442 + }, + { + "epoch": 0.5085462555066079, + "grad_norm": 1.3371129648202849, + "learning_rate": 1.7808964829005416e-05, + "loss": 0.7673810720443726, + "step": 1443 + }, + { + "epoch": 0.5088986784140969, + "grad_norm": 1.5506550607349072, + "learning_rate": 1.7805323984670224e-05, + "loss": 0.8245630264282227, + "step": 1444 + }, + { + "epoch": 0.5092511013215859, + "grad_norm": 1.488734758513416, + "learning_rate": 1.780168049067045e-05, + "loss": 0.8578429222106934, + "step": 1445 + }, + { + "epoch": 0.5096035242290748, + "grad_norm": 1.3892444083620181, + "learning_rate": 1.7798034348242944e-05, + "loss": 0.6631708145141602, + "step": 1446 + }, + { + "epoch": 0.5099559471365639, + "grad_norm": 1.3121030116229568, + "learning_rate": 1.779438555862546e-05, + "loss": 0.8106615543365479, + "step": 1447 + }, + { + "epoch": 0.5103083700440528, + "grad_norm": 1.1486822439059632, + "learning_rate": 1.7790734123056654e-05, + "loss": 0.7033256888389587, + "step": 1448 + }, + { + "epoch": 0.5106607929515419, + "grad_norm": 1.2259259255559172, + "learning_rate": 1.7787080042776065e-05, + "loss": 0.7124278545379639, + "step": 1449 + }, + { + "epoch": 0.5110132158590308, + "grad_norm": 1.4546377837760451, + "learning_rate": 1.7783423319024144e-05, + "loss": 0.7834827899932861, + "step": 1450 + }, + { + "epoch": 0.5113656387665199, + "grad_norm": 1.4580618513432573, + "learning_rate": 1.777976395304224e-05, + "loss": 0.6762892603874207, + "step": 1451 + }, + { + "epoch": 0.5117180616740088, + "grad_norm": 1.4220157860300873, + "learning_rate": 1.7776101946072586e-05, + "loss": 0.7317261695861816, + "step": 1452 + }, + { + "epoch": 0.5120704845814978, + "grad_norm": 1.3265767127223091, + "learning_rate": 1.7772437299358324e-05, + "loss": 0.6278417110443115, + "step": 1453 + }, + { + "epoch": 0.5124229074889868, + "grad_norm": 1.319373459720871, + "learning_rate": 1.7768770014143485e-05, + "loss": 0.6638025045394897, + "step": 1454 + }, + { + "epoch": 0.5127753303964758, + "grad_norm": 1.400198364176684, + "learning_rate": 1.7765100091673e-05, + "loss": 0.786564826965332, + "step": 1455 + }, + { + "epoch": 0.5131277533039648, + "grad_norm": 1.4579007616104753, + "learning_rate": 1.776142753319269e-05, + "loss": 0.7483570575714111, + "step": 1456 + }, + { + "epoch": 0.5134801762114537, + "grad_norm": 1.5755517235246568, + "learning_rate": 1.7757752339949284e-05, + "loss": 0.7036221027374268, + "step": 1457 + }, + { + "epoch": 0.5138325991189427, + "grad_norm": 1.4840022330643747, + "learning_rate": 1.7754074513190384e-05, + "loss": 0.6903718709945679, + "step": 1458 + }, + { + "epoch": 0.5141850220264317, + "grad_norm": 1.2882226376562813, + "learning_rate": 1.77503940541645e-05, + "loss": 0.7728221416473389, + "step": 1459 + }, + { + "epoch": 0.5145374449339207, + "grad_norm": 1.327669814898394, + "learning_rate": 1.774671096412104e-05, + "loss": 0.7127183675765991, + "step": 1460 + }, + { + "epoch": 0.5148898678414097, + "grad_norm": 1.6330052955229915, + "learning_rate": 1.7743025244310293e-05, + "loss": 0.7801295518875122, + "step": 1461 + }, + { + "epoch": 0.5152422907488987, + "grad_norm": 1.1623220195345323, + "learning_rate": 1.773933689598345e-05, + "loss": 0.632892906665802, + "step": 1462 + }, + { + "epoch": 0.5155947136563876, + "grad_norm": 1.2497961025206838, + "learning_rate": 1.7735645920392587e-05, + "loss": 0.7347458600997925, + "step": 1463 + }, + { + "epoch": 0.5159471365638767, + "grad_norm": 1.5115996209276181, + "learning_rate": 1.7731952318790673e-05, + "loss": 0.6705365777015686, + "step": 1464 + }, + { + "epoch": 0.5162995594713656, + "grad_norm": 1.4475904564128834, + "learning_rate": 1.7728256092431577e-05, + "loss": 0.696006715297699, + "step": 1465 + }, + { + "epoch": 0.5166519823788547, + "grad_norm": 1.3978951424570836, + "learning_rate": 1.7724557242570045e-05, + "loss": 0.5922254323959351, + "step": 1466 + }, + { + "epoch": 0.5170044052863436, + "grad_norm": 1.2709448074189098, + "learning_rate": 1.7720855770461733e-05, + "loss": 0.6162985563278198, + "step": 1467 + }, + { + "epoch": 0.5173568281938326, + "grad_norm": 1.432801158502027, + "learning_rate": 1.7717151677363164e-05, + "loss": 0.7319275140762329, + "step": 1468 + }, + { + "epoch": 0.5177092511013216, + "grad_norm": 1.3993642551309304, + "learning_rate": 1.771344496453177e-05, + "loss": 0.7349969148635864, + "step": 1469 + }, + { + "epoch": 0.5180616740088105, + "grad_norm": 1.287285806622758, + "learning_rate": 1.7709735633225863e-05, + "loss": 0.8153162598609924, + "step": 1470 + }, + { + "epoch": 0.5184140969162996, + "grad_norm": 1.3919169087311665, + "learning_rate": 1.7706023684704642e-05, + "loss": 0.6582974195480347, + "step": 1471 + }, + { + "epoch": 0.5187665198237885, + "grad_norm": 1.704010679983685, + "learning_rate": 1.77023091202282e-05, + "loss": 0.696917712688446, + "step": 1472 + }, + { + "epoch": 0.5191189427312776, + "grad_norm": 1.4310702415713368, + "learning_rate": 1.769859194105752e-05, + "loss": 0.6281285285949707, + "step": 1473 + }, + { + "epoch": 0.5194713656387665, + "grad_norm": 1.6327610148964462, + "learning_rate": 1.7694872148454463e-05, + "loss": 0.7687089443206787, + "step": 1474 + }, + { + "epoch": 0.5198237885462555, + "grad_norm": 1.386403275153257, + "learning_rate": 1.7691149743681783e-05, + "loss": 0.6928491592407227, + "step": 1475 + }, + { + "epoch": 0.5201762114537445, + "grad_norm": 1.3972840676283895, + "learning_rate": 1.7687424728003126e-05, + "loss": 0.63843834400177, + "step": 1476 + }, + { + "epoch": 0.5205286343612335, + "grad_norm": 1.7893361899671325, + "learning_rate": 1.7683697102683012e-05, + "loss": 0.8987904787063599, + "step": 1477 + }, + { + "epoch": 0.5208810572687225, + "grad_norm": 1.3513150269139367, + "learning_rate": 1.767996686898686e-05, + "loss": 0.7027539014816284, + "step": 1478 + }, + { + "epoch": 0.5212334801762114, + "grad_norm": 1.5924485741299983, + "learning_rate": 1.7676234028180964e-05, + "loss": 0.8490183353424072, + "step": 1479 + }, + { + "epoch": 0.5215859030837005, + "grad_norm": 1.251712414046886, + "learning_rate": 1.7672498581532508e-05, + "loss": 0.5885729789733887, + "step": 1480 + }, + { + "epoch": 0.5219383259911894, + "grad_norm": 1.289415742432068, + "learning_rate": 1.766876053030956e-05, + "loss": 0.627627968788147, + "step": 1481 + }, + { + "epoch": 0.5222907488986784, + "grad_norm": 1.2948972408498374, + "learning_rate": 1.766501987578108e-05, + "loss": 0.6441413164138794, + "step": 1482 + }, + { + "epoch": 0.5226431718061674, + "grad_norm": 1.3508329997529829, + "learning_rate": 1.7661276619216888e-05, + "loss": 0.6199722290039062, + "step": 1483 + }, + { + "epoch": 0.5229955947136564, + "grad_norm": 1.2931208995237342, + "learning_rate": 1.7657530761887715e-05, + "loss": 0.6364887952804565, + "step": 1484 + }, + { + "epoch": 0.5233480176211454, + "grad_norm": 1.281527242811407, + "learning_rate": 1.7653782305065158e-05, + "loss": 0.7279890775680542, + "step": 1485 + }, + { + "epoch": 0.5237004405286344, + "grad_norm": 1.5228486275670003, + "learning_rate": 1.7650031250021704e-05, + "loss": 0.6552719473838806, + "step": 1486 + }, + { + "epoch": 0.5240528634361233, + "grad_norm": 1.4461703633182712, + "learning_rate": 1.7646277598030717e-05, + "loss": 0.6778907775878906, + "step": 1487 + }, + { + "epoch": 0.5244052863436124, + "grad_norm": 1.3941119820637071, + "learning_rate": 1.7642521350366447e-05, + "loss": 0.6581870317459106, + "step": 1488 + }, + { + "epoch": 0.5247577092511013, + "grad_norm": 1.6198722329881745, + "learning_rate": 1.7638762508304025e-05, + "loss": 0.8529388904571533, + "step": 1489 + }, + { + "epoch": 0.5251101321585903, + "grad_norm": 1.659639253256808, + "learning_rate": 1.7635001073119458e-05, + "loss": 0.6601512432098389, + "step": 1490 + }, + { + "epoch": 0.5254625550660793, + "grad_norm": 1.5073764890239725, + "learning_rate": 1.7631237046089637e-05, + "loss": 0.6347510814666748, + "step": 1491 + }, + { + "epoch": 0.5258149779735682, + "grad_norm": 1.3256786239827452, + "learning_rate": 1.762747042849233e-05, + "loss": 0.700560986995697, + "step": 1492 + }, + { + "epoch": 0.5261674008810573, + "grad_norm": 1.4060287838972836, + "learning_rate": 1.762370122160619e-05, + "loss": 0.5641219019889832, + "step": 1493 + }, + { + "epoch": 0.5265198237885462, + "grad_norm": 1.3124518756577959, + "learning_rate": 1.761992942671074e-05, + "loss": 0.8017370700836182, + "step": 1494 + }, + { + "epoch": 0.5268722466960353, + "grad_norm": 1.334442798992846, + "learning_rate": 1.7616155045086394e-05, + "loss": 0.6345353126525879, + "step": 1495 + }, + { + "epoch": 0.5272246696035242, + "grad_norm": 1.6841165394853315, + "learning_rate": 1.7612378078014432e-05, + "loss": 0.7118426561355591, + "step": 1496 + }, + { + "epoch": 0.5275770925110133, + "grad_norm": 1.6748084277774182, + "learning_rate": 1.7608598526777017e-05, + "loss": 0.6186550855636597, + "step": 1497 + }, + { + "epoch": 0.5279295154185022, + "grad_norm": 1.4676540893387768, + "learning_rate": 1.7604816392657195e-05, + "loss": 0.8351110219955444, + "step": 1498 + }, + { + "epoch": 0.5282819383259912, + "grad_norm": 1.3183866002309903, + "learning_rate": 1.7601031676938877e-05, + "loss": 0.638684868812561, + "step": 1499 + }, + { + "epoch": 0.5286343612334802, + "grad_norm": 1.291067085285626, + "learning_rate": 1.7597244380906856e-05, + "loss": 0.5118356943130493, + "step": 1500 + }, + { + "epoch": 0.5289867841409691, + "grad_norm": 1.2880504132355877, + "learning_rate": 1.7593454505846807e-05, + "loss": 0.637636125087738, + "step": 1501 + }, + { + "epoch": 0.5293392070484582, + "grad_norm": 1.3905967147162603, + "learning_rate": 1.7589662053045264e-05, + "loss": 0.8412563800811768, + "step": 1502 + }, + { + "epoch": 0.5296916299559471, + "grad_norm": 1.6432072453017084, + "learning_rate": 1.758586702378966e-05, + "loss": 0.7940464019775391, + "step": 1503 + }, + { + "epoch": 0.5300440528634361, + "grad_norm": 1.4898667206132308, + "learning_rate": 1.7582069419368276e-05, + "loss": 0.7136783599853516, + "step": 1504 + }, + { + "epoch": 0.5303964757709251, + "grad_norm": 1.5677232979916986, + "learning_rate": 1.757826924107029e-05, + "loss": 0.7212727069854736, + "step": 1505 + }, + { + "epoch": 0.5307488986784141, + "grad_norm": 2.968905268892082, + "learning_rate": 1.757446649018574e-05, + "loss": 0.7026032209396362, + "step": 1506 + }, + { + "epoch": 0.5311013215859031, + "grad_norm": 1.3050484980835664, + "learning_rate": 1.7570661168005544e-05, + "loss": 0.541954755783081, + "step": 1507 + }, + { + "epoch": 0.5314537444933921, + "grad_norm": 1.3422366313712581, + "learning_rate": 1.7566853275821488e-05, + "loss": 0.6927075386047363, + "step": 1508 + }, + { + "epoch": 0.531806167400881, + "grad_norm": 1.4069640098530838, + "learning_rate": 1.7563042814926237e-05, + "loss": 0.6556441783905029, + "step": 1509 + }, + { + "epoch": 0.5321585903083701, + "grad_norm": 1.710258111864569, + "learning_rate": 1.7559229786613317e-05, + "loss": 0.6895149946212769, + "step": 1510 + }, + { + "epoch": 0.532511013215859, + "grad_norm": 1.3737730722509855, + "learning_rate": 1.755541419217714e-05, + "loss": 0.6178139448165894, + "step": 1511 + }, + { + "epoch": 0.532863436123348, + "grad_norm": 1.5162737493672722, + "learning_rate": 1.7551596032912975e-05, + "loss": 0.7645368576049805, + "step": 1512 + }, + { + "epoch": 0.533215859030837, + "grad_norm": 1.3652252290806937, + "learning_rate": 1.7547775310116973e-05, + "loss": 0.8247367143630981, + "step": 1513 + }, + { + "epoch": 0.533568281938326, + "grad_norm": 1.2941657542151124, + "learning_rate": 1.7543952025086147e-05, + "loss": 0.535837709903717, + "step": 1514 + }, + { + "epoch": 0.533920704845815, + "grad_norm": 1.3232982615818571, + "learning_rate": 1.7540126179118387e-05, + "loss": 0.51450514793396, + "step": 1515 + }, + { + "epoch": 0.5342731277533039, + "grad_norm": 1.3863880461090508, + "learning_rate": 1.7536297773512444e-05, + "loss": 0.7962276935577393, + "step": 1516 + }, + { + "epoch": 0.534625550660793, + "grad_norm": 1.4799750544282257, + "learning_rate": 1.753246680956795e-05, + "loss": 0.7586444616317749, + "step": 1517 + }, + { + "epoch": 0.5349779735682819, + "grad_norm": 1.4967875396536634, + "learning_rate": 1.752863328858539e-05, + "loss": 0.7074990272521973, + "step": 1518 + }, + { + "epoch": 0.535330396475771, + "grad_norm": 1.3158842776684478, + "learning_rate": 1.7524797211866126e-05, + "loss": 0.7409921884536743, + "step": 1519 + }, + { + "epoch": 0.5356828193832599, + "grad_norm": 1.3752676962962187, + "learning_rate": 1.7520958580712394e-05, + "loss": 0.7889251708984375, + "step": 1520 + }, + { + "epoch": 0.536035242290749, + "grad_norm": 2.0871001645404776, + "learning_rate": 1.751711739642728e-05, + "loss": 0.8244975209236145, + "step": 1521 + }, + { + "epoch": 0.5363876651982379, + "grad_norm": 1.441127888748836, + "learning_rate": 1.7513273660314753e-05, + "loss": 0.6573888659477234, + "step": 1522 + }, + { + "epoch": 0.5367400881057268, + "grad_norm": 1.3793459185222714, + "learning_rate": 1.7509427373679643e-05, + "loss": 0.71816086769104, + "step": 1523 + }, + { + "epoch": 0.5370925110132159, + "grad_norm": 1.5200593368820163, + "learning_rate": 1.750557853782764e-05, + "loss": 0.7681000232696533, + "step": 1524 + }, + { + "epoch": 0.5374449339207048, + "grad_norm": 1.443442982592023, + "learning_rate": 1.7501727154065304e-05, + "loss": 0.6777461767196655, + "step": 1525 + }, + { + "epoch": 0.5377973568281939, + "grad_norm": 1.5538840121485165, + "learning_rate": 1.7497873223700063e-05, + "loss": 0.714499831199646, + "step": 1526 + }, + { + "epoch": 0.5381497797356828, + "grad_norm": 1.6085901588908644, + "learning_rate": 1.7494016748040206e-05, + "loss": 0.6587036848068237, + "step": 1527 + }, + { + "epoch": 0.5385022026431718, + "grad_norm": 1.5748960305246453, + "learning_rate": 1.7490157728394887e-05, + "loss": 0.7256105542182922, + "step": 1528 + }, + { + "epoch": 0.5388546255506608, + "grad_norm": 1.7818844853131433, + "learning_rate": 1.7486296166074116e-05, + "loss": 0.6551185846328735, + "step": 1529 + }, + { + "epoch": 0.5392070484581498, + "grad_norm": 1.5961201900224617, + "learning_rate": 1.7482432062388782e-05, + "loss": 0.710479736328125, + "step": 1530 + }, + { + "epoch": 0.5395594713656388, + "grad_norm": 1.3063302832130508, + "learning_rate": 1.7478565418650625e-05, + "loss": 0.7882527709007263, + "step": 1531 + }, + { + "epoch": 0.5399118942731278, + "grad_norm": 1.4227949027781848, + "learning_rate": 1.7474696236172247e-05, + "loss": 0.6163671612739563, + "step": 1532 + }, + { + "epoch": 0.5402643171806167, + "grad_norm": 1.3516530648193832, + "learning_rate": 1.7470824516267125e-05, + "loss": 0.6923140287399292, + "step": 1533 + }, + { + "epoch": 0.5406167400881057, + "grad_norm": 1.259724627030556, + "learning_rate": 1.7466950260249573e-05, + "loss": 0.6473938226699829, + "step": 1534 + }, + { + "epoch": 0.5409691629955947, + "grad_norm": 1.7187178399062975, + "learning_rate": 1.7463073469434792e-05, + "loss": 0.631247878074646, + "step": 1535 + }, + { + "epoch": 0.5413215859030837, + "grad_norm": 1.8932273669088504, + "learning_rate": 1.745919414513883e-05, + "loss": 0.8113377690315247, + "step": 1536 + }, + { + "epoch": 0.5416740088105727, + "grad_norm": 1.4958838672098407, + "learning_rate": 1.7455312288678588e-05, + "loss": 0.7950010299682617, + "step": 1537 + }, + { + "epoch": 0.5420264317180616, + "grad_norm": 1.5066443226404773, + "learning_rate": 1.7451427901371843e-05, + "loss": 0.7279125452041626, + "step": 1538 + }, + { + "epoch": 0.5423788546255507, + "grad_norm": 1.304013044677209, + "learning_rate": 1.7447540984537225e-05, + "loss": 0.6746084690093994, + "step": 1539 + }, + { + "epoch": 0.5427312775330396, + "grad_norm": 1.2714028280363416, + "learning_rate": 1.744365153949422e-05, + "loss": 0.5818569660186768, + "step": 1540 + }, + { + "epoch": 0.5430837004405287, + "grad_norm": 1.3192138998270364, + "learning_rate": 1.743975956756317e-05, + "loss": 0.7408111095428467, + "step": 1541 + }, + { + "epoch": 0.5434361233480176, + "grad_norm": 1.4913068245941434, + "learning_rate": 1.7435865070065282e-05, + "loss": 0.6842402815818787, + "step": 1542 + }, + { + "epoch": 0.5437885462555067, + "grad_norm": 1.3316441616353596, + "learning_rate": 1.7431968048322617e-05, + "loss": 0.6179615259170532, + "step": 1543 + }, + { + "epoch": 0.5441409691629956, + "grad_norm": 1.3347896582759051, + "learning_rate": 1.7428068503658085e-05, + "loss": 0.5943140983581543, + "step": 1544 + }, + { + "epoch": 0.5444933920704845, + "grad_norm": 1.797026236227536, + "learning_rate": 1.742416643739547e-05, + "loss": 0.7901419401168823, + "step": 1545 + }, + { + "epoch": 0.5448458149779736, + "grad_norm": 1.4636864955433957, + "learning_rate": 1.74202618508594e-05, + "loss": 0.7630521655082703, + "step": 1546 + }, + { + "epoch": 0.5451982378854625, + "grad_norm": 1.5322711938826543, + "learning_rate": 1.7416354745375355e-05, + "loss": 0.7662566900253296, + "step": 1547 + }, + { + "epoch": 0.5455506607929516, + "grad_norm": 1.52556111103991, + "learning_rate": 1.7412445122269683e-05, + "loss": 0.5758601427078247, + "step": 1548 + }, + { + "epoch": 0.5459030837004405, + "grad_norm": 1.3681670353760105, + "learning_rate": 1.7408532982869573e-05, + "loss": 0.753425121307373, + "step": 1549 + }, + { + "epoch": 0.5462555066079295, + "grad_norm": 1.7147625296386437, + "learning_rate": 1.7404618328503082e-05, + "loss": 0.6954981088638306, + "step": 1550 + }, + { + "epoch": 0.5466079295154185, + "grad_norm": 1.7209819459128521, + "learning_rate": 1.7400701160499107e-05, + "loss": 0.7608321905136108, + "step": 1551 + }, + { + "epoch": 0.5469603524229075, + "grad_norm": 1.3289181862839086, + "learning_rate": 1.7396781480187403e-05, + "loss": 0.679731011390686, + "step": 1552 + }, + { + "epoch": 0.5473127753303965, + "grad_norm": 1.547015119464835, + "learning_rate": 1.7392859288898586e-05, + "loss": 0.7101309299468994, + "step": 1553 + }, + { + "epoch": 0.5476651982378855, + "grad_norm": 1.3483315531721025, + "learning_rate": 1.7388934587964114e-05, + "loss": 0.7975757122039795, + "step": 1554 + }, + { + "epoch": 0.5480176211453744, + "grad_norm": 1.612241763199232, + "learning_rate": 1.73850073787163e-05, + "loss": 0.9132372140884399, + "step": 1555 + }, + { + "epoch": 0.5483700440528634, + "grad_norm": 1.2910456692590726, + "learning_rate": 1.7381077662488313e-05, + "loss": 0.7375202178955078, + "step": 1556 + }, + { + "epoch": 0.5487224669603524, + "grad_norm": 1.387618503105513, + "learning_rate": 1.7377145440614165e-05, + "loss": 0.7066434025764465, + "step": 1557 + }, + { + "epoch": 0.5490748898678414, + "grad_norm": 1.3715154650071018, + "learning_rate": 1.737321071442873e-05, + "loss": 0.8217945098876953, + "step": 1558 + }, + { + "epoch": 0.5494273127753304, + "grad_norm": 1.629646959297134, + "learning_rate": 1.7369273485267716e-05, + "loss": 0.6946529150009155, + "step": 1559 + }, + { + "epoch": 0.5497797356828193, + "grad_norm": 1.525535566210846, + "learning_rate": 1.7365333754467694e-05, + "loss": 0.7671442627906799, + "step": 1560 + }, + { + "epoch": 0.5501321585903084, + "grad_norm": 1.3988607690634036, + "learning_rate": 1.736139152336608e-05, + "loss": 0.7044692039489746, + "step": 1561 + }, + { + "epoch": 0.5504845814977973, + "grad_norm": 1.368099127753493, + "learning_rate": 1.735744679330114e-05, + "loss": 0.6654937267303467, + "step": 1562 + }, + { + "epoch": 0.5508370044052864, + "grad_norm": 1.2951614076743367, + "learning_rate": 1.7353499565611986e-05, + "loss": 0.6683400869369507, + "step": 1563 + }, + { + "epoch": 0.5511894273127753, + "grad_norm": 1.236687311626723, + "learning_rate": 1.734954984163858e-05, + "loss": 0.6360758543014526, + "step": 1564 + }, + { + "epoch": 0.5515418502202644, + "grad_norm": 1.3363599776517268, + "learning_rate": 1.7345597622721727e-05, + "loss": 0.6982365846633911, + "step": 1565 + }, + { + "epoch": 0.5518942731277533, + "grad_norm": 1.559537817461735, + "learning_rate": 1.7341642910203087e-05, + "loss": 0.8011882305145264, + "step": 1566 + }, + { + "epoch": 0.5522466960352423, + "grad_norm": 2.285492930360211, + "learning_rate": 1.7337685705425156e-05, + "loss": 0.8203347325325012, + "step": 1567 + }, + { + "epoch": 0.5525991189427313, + "grad_norm": 1.2535380811715755, + "learning_rate": 1.7333726009731288e-05, + "loss": 0.653145432472229, + "step": 1568 + }, + { + "epoch": 0.5529515418502202, + "grad_norm": 1.4482870728586805, + "learning_rate": 1.7329763824465676e-05, + "loss": 0.6527417302131653, + "step": 1569 + }, + { + "epoch": 0.5533039647577093, + "grad_norm": 1.44377376134513, + "learning_rate": 1.7325799150973353e-05, + "loss": 0.6965492963790894, + "step": 1570 + }, + { + "epoch": 0.5536563876651982, + "grad_norm": 1.64534067222521, + "learning_rate": 1.7321831990600206e-05, + "loss": 0.6779811382293701, + "step": 1571 + }, + { + "epoch": 0.5540088105726872, + "grad_norm": 1.4062562776851213, + "learning_rate": 1.731786234469296e-05, + "loss": 0.733130931854248, + "step": 1572 + }, + { + "epoch": 0.5543612334801762, + "grad_norm": 1.3711228848627866, + "learning_rate": 1.7313890214599195e-05, + "loss": 0.6482118964195251, + "step": 1573 + }, + { + "epoch": 0.5547136563876652, + "grad_norm": 1.300564860417972, + "learning_rate": 1.7309915601667312e-05, + "loss": 0.5167185068130493, + "step": 1574 + }, + { + "epoch": 0.5550660792951542, + "grad_norm": 1.5636119075248611, + "learning_rate": 1.730593850724658e-05, + "loss": 0.7172712087631226, + "step": 1575 + }, + { + "epoch": 0.5554185022026432, + "grad_norm": 1.3031139662778384, + "learning_rate": 1.730195893268709e-05, + "loss": 0.6786075830459595, + "step": 1576 + }, + { + "epoch": 0.5557709251101322, + "grad_norm": 1.272621756820605, + "learning_rate": 1.7297976879339787e-05, + "loss": 0.6823022365570068, + "step": 1577 + }, + { + "epoch": 0.5561233480176212, + "grad_norm": 1.5517603954080275, + "learning_rate": 1.7293992348556462e-05, + "loss": 0.6882521510124207, + "step": 1578 + }, + { + "epoch": 0.5564757709251101, + "grad_norm": 1.3633741314626187, + "learning_rate": 1.7290005341689726e-05, + "loss": 0.6028990745544434, + "step": 1579 + }, + { + "epoch": 0.5568281938325991, + "grad_norm": 1.6493192591020644, + "learning_rate": 1.728601586009305e-05, + "loss": 0.7759981155395508, + "step": 1580 + }, + { + "epoch": 0.5571806167400881, + "grad_norm": 1.4356044858306343, + "learning_rate": 1.7282023905120743e-05, + "loss": 0.7067322134971619, + "step": 1581 + }, + { + "epoch": 0.5575330396475771, + "grad_norm": 1.6158791701222606, + "learning_rate": 1.727802947812794e-05, + "loss": 0.7972309589385986, + "step": 1582 + }, + { + "epoch": 0.5578854625550661, + "grad_norm": 1.7662811513100274, + "learning_rate": 1.7274032580470634e-05, + "loss": 0.780463457107544, + "step": 1583 + }, + { + "epoch": 0.558237885462555, + "grad_norm": 1.4053617141185595, + "learning_rate": 1.7270033213505638e-05, + "loss": 0.647217869758606, + "step": 1584 + }, + { + "epoch": 0.5585903083700441, + "grad_norm": 1.3125952525291176, + "learning_rate": 1.7266031378590624e-05, + "loss": 0.6253752112388611, + "step": 1585 + }, + { + "epoch": 0.558942731277533, + "grad_norm": 11.7060219187992, + "learning_rate": 1.7262027077084083e-05, + "loss": 0.8427211046218872, + "step": 1586 + }, + { + "epoch": 0.5592951541850221, + "grad_norm": 1.344046568539196, + "learning_rate": 1.7258020310345348e-05, + "loss": 0.6763455867767334, + "step": 1587 + }, + { + "epoch": 0.559647577092511, + "grad_norm": 1.593422172771999, + "learning_rate": 1.72540110797346e-05, + "loss": 0.7333850264549255, + "step": 1588 + }, + { + "epoch": 0.56, + "grad_norm": 1.6040079500892586, + "learning_rate": 1.7249999386612844e-05, + "loss": 0.8572328090667725, + "step": 1589 + }, + { + "epoch": 0.560352422907489, + "grad_norm": 1.5035390542036942, + "learning_rate": 1.7245985232341923e-05, + "loss": 0.7960183620452881, + "step": 1590 + }, + { + "epoch": 0.5607048458149779, + "grad_norm": 1.484772075429922, + "learning_rate": 1.7241968618284518e-05, + "loss": 0.6750795841217041, + "step": 1591 + }, + { + "epoch": 0.561057268722467, + "grad_norm": 1.9239116239416003, + "learning_rate": 1.7237949545804145e-05, + "loss": 0.7828525304794312, + "step": 1592 + }, + { + "epoch": 0.5614096916299559, + "grad_norm": 1.4415449299886975, + "learning_rate": 1.7233928016265158e-05, + "loss": 0.7414604425430298, + "step": 1593 + }, + { + "epoch": 0.561762114537445, + "grad_norm": 1.4483242479736562, + "learning_rate": 1.7229904031032736e-05, + "loss": 0.6853663921356201, + "step": 1594 + }, + { + "epoch": 0.5621145374449339, + "grad_norm": 1.9067676423331832, + "learning_rate": 1.72258775914729e-05, + "loss": 0.7923493385314941, + "step": 1595 + }, + { + "epoch": 0.5624669603524229, + "grad_norm": 1.6239202976244251, + "learning_rate": 1.7221848698952496e-05, + "loss": 0.6776527166366577, + "step": 1596 + }, + { + "epoch": 0.5628193832599119, + "grad_norm": 1.4721879083766742, + "learning_rate": 1.721781735483921e-05, + "loss": 0.6036615371704102, + "step": 1597 + }, + { + "epoch": 0.5631718061674009, + "grad_norm": 1.271294238053108, + "learning_rate": 1.7213783560501564e-05, + "loss": 0.7175784111022949, + "step": 1598 + }, + { + "epoch": 0.5635242290748899, + "grad_norm": 1.609537856897954, + "learning_rate": 1.7209747317308897e-05, + "loss": 0.790808379650116, + "step": 1599 + }, + { + "epoch": 0.5638766519823789, + "grad_norm": 1.211639696248482, + "learning_rate": 1.7205708626631392e-05, + "loss": 0.6230301856994629, + "step": 1600 + }, + { + "epoch": 0.5642290748898678, + "grad_norm": 1.120326299832536, + "learning_rate": 1.720166748984006e-05, + "loss": 0.712124228477478, + "step": 1601 + }, + { + "epoch": 0.5645814977973568, + "grad_norm": 1.1185092917911836, + "learning_rate": 1.719762390830674e-05, + "loss": 0.543883740901947, + "step": 1602 + }, + { + "epoch": 0.5649339207048458, + "grad_norm": 1.3866183721479424, + "learning_rate": 1.71935778834041e-05, + "loss": 0.7619644999504089, + "step": 1603 + }, + { + "epoch": 0.5652863436123348, + "grad_norm": 1.3869247346305908, + "learning_rate": 1.718952941650564e-05, + "loss": 0.6447019577026367, + "step": 1604 + }, + { + "epoch": 0.5656387665198238, + "grad_norm": 1.4175373147115695, + "learning_rate": 1.718547850898569e-05, + "loss": 0.7254266738891602, + "step": 1605 + }, + { + "epoch": 0.5659911894273127, + "grad_norm": 1.3621762521360266, + "learning_rate": 1.7181425162219406e-05, + "loss": 0.632878839969635, + "step": 1606 + }, + { + "epoch": 0.5663436123348018, + "grad_norm": 1.3921274088807207, + "learning_rate": 1.7177369377582776e-05, + "loss": 0.7711806893348694, + "step": 1607 + }, + { + "epoch": 0.5666960352422907, + "grad_norm": 1.1613347832568823, + "learning_rate": 1.7173311156452607e-05, + "loss": 0.6639282703399658, + "step": 1608 + }, + { + "epoch": 0.5670484581497798, + "grad_norm": 1.4423463303361395, + "learning_rate": 1.7169250500206544e-05, + "loss": 0.6918407082557678, + "step": 1609 + }, + { + "epoch": 0.5674008810572687, + "grad_norm": 2.283192950596924, + "learning_rate": 1.716518741022305e-05, + "loss": 0.6602861881256104, + "step": 1610 + }, + { + "epoch": 0.5677533039647578, + "grad_norm": 1.401616641880741, + "learning_rate": 1.7161121887881424e-05, + "loss": 0.5853942632675171, + "step": 1611 + }, + { + "epoch": 0.5681057268722467, + "grad_norm": 1.4206445071697613, + "learning_rate": 1.7157053934561775e-05, + "loss": 0.6793895959854126, + "step": 1612 + }, + { + "epoch": 0.5684581497797356, + "grad_norm": 1.43055320760408, + "learning_rate": 1.7152983551645054e-05, + "loss": 0.7882634401321411, + "step": 1613 + }, + { + "epoch": 0.5688105726872247, + "grad_norm": 1.4602086959676452, + "learning_rate": 1.7148910740513023e-05, + "loss": 0.6530553698539734, + "step": 1614 + }, + { + "epoch": 0.5691629955947136, + "grad_norm": 1.2905537135464573, + "learning_rate": 1.714483550254828e-05, + "loss": 0.6405597925186157, + "step": 1615 + }, + { + "epoch": 0.5695154185022027, + "grad_norm": 1.4236330365126968, + "learning_rate": 1.714075783913424e-05, + "loss": 0.7356796860694885, + "step": 1616 + }, + { + "epoch": 0.5698678414096916, + "grad_norm": 1.3877607090316109, + "learning_rate": 1.7136677751655142e-05, + "loss": 0.7393465042114258, + "step": 1617 + }, + { + "epoch": 0.5702202643171806, + "grad_norm": 1.6092126006316967, + "learning_rate": 1.7132595241496045e-05, + "loss": 0.7205296158790588, + "step": 1618 + }, + { + "epoch": 0.5705726872246696, + "grad_norm": 1.291376266983401, + "learning_rate": 1.7128510310042842e-05, + "loss": 0.7359808683395386, + "step": 1619 + }, + { + "epoch": 0.5709251101321586, + "grad_norm": 1.3759135749970453, + "learning_rate": 1.712442295868224e-05, + "loss": 0.7097065448760986, + "step": 1620 + }, + { + "epoch": 0.5712775330396476, + "grad_norm": 1.3905917375530226, + "learning_rate": 1.7120333188801756e-05, + "loss": 0.66839599609375, + "step": 1621 + }, + { + "epoch": 0.5716299559471366, + "grad_norm": 1.7035593754714837, + "learning_rate": 1.7116241001789753e-05, + "loss": 0.8373857736587524, + "step": 1622 + }, + { + "epoch": 0.5719823788546256, + "grad_norm": 1.4514044348034505, + "learning_rate": 1.7112146399035393e-05, + "loss": 0.6405144333839417, + "step": 1623 + }, + { + "epoch": 0.5723348017621145, + "grad_norm": 1.3537498495813336, + "learning_rate": 1.710804938192867e-05, + "loss": 0.622218906879425, + "step": 1624 + }, + { + "epoch": 0.5726872246696035, + "grad_norm": 1.3235233015291856, + "learning_rate": 1.710394995186039e-05, + "loss": 0.6728596687316895, + "step": 1625 + }, + { + "epoch": 0.5730396475770925, + "grad_norm": 1.457353775792826, + "learning_rate": 1.7099848110222188e-05, + "loss": 0.7749369144439697, + "step": 1626 + }, + { + "epoch": 0.5733920704845815, + "grad_norm": 1.5414707611626788, + "learning_rate": 1.7095743858406506e-05, + "loss": 0.7230759859085083, + "step": 1627 + }, + { + "epoch": 0.5737444933920705, + "grad_norm": 1.540981219180448, + "learning_rate": 1.7091637197806614e-05, + "loss": 0.8243547677993774, + "step": 1628 + }, + { + "epoch": 0.5740969162995595, + "grad_norm": 1.38043003521811, + "learning_rate": 1.708752812981659e-05, + "loss": 0.5860315561294556, + "step": 1629 + }, + { + "epoch": 0.5744493392070484, + "grad_norm": 1.6273061636094053, + "learning_rate": 1.708341665583134e-05, + "loss": 0.6623368859291077, + "step": 1630 + }, + { + "epoch": 0.5748017621145375, + "grad_norm": 1.8119651381751527, + "learning_rate": 1.7079302777246577e-05, + "loss": 0.6467370986938477, + "step": 1631 + }, + { + "epoch": 0.5751541850220264, + "grad_norm": 1.5119118761679917, + "learning_rate": 1.707518649545884e-05, + "loss": 0.6443271636962891, + "step": 1632 + }, + { + "epoch": 0.5755066079295155, + "grad_norm": 1.3128080413830525, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.6995208263397217, + "step": 1633 + }, + { + "epoch": 0.5758590308370044, + "grad_norm": 1.4660315838841709, + "learning_rate": 1.706694672786465e-05, + "loss": 0.698627233505249, + "step": 1634 + }, + { + "epoch": 0.5762114537444933, + "grad_norm": 1.3788458614759633, + "learning_rate": 1.706282324485534e-05, + "loss": 0.713565468788147, + "step": 1635 + }, + { + "epoch": 0.5765638766519824, + "grad_norm": 1.4050651409728825, + "learning_rate": 1.7058697364237342e-05, + "loss": 0.7978894710540771, + "step": 1636 + }, + { + "epoch": 0.5769162995594713, + "grad_norm": 1.374012134646938, + "learning_rate": 1.7054569087411262e-05, + "loss": 0.7361177206039429, + "step": 1637 + }, + { + "epoch": 0.5772687224669604, + "grad_norm": 1.3640656150089683, + "learning_rate": 1.705043841577853e-05, + "loss": 0.5904364585876465, + "step": 1638 + }, + { + "epoch": 0.5776211453744493, + "grad_norm": 1.4706525609098695, + "learning_rate": 1.7046305350741365e-05, + "loss": 0.7122133374214172, + "step": 1639 + }, + { + "epoch": 0.5779735682819384, + "grad_norm": 1.5208627357939872, + "learning_rate": 1.7042169893702826e-05, + "loss": 0.6350806951522827, + "step": 1640 + }, + { + "epoch": 0.5783259911894273, + "grad_norm": 1.4511692718944456, + "learning_rate": 1.7038032046066767e-05, + "loss": 0.6332669258117676, + "step": 1641 + }, + { + "epoch": 0.5786784140969163, + "grad_norm": 1.415207402865657, + "learning_rate": 1.7033891809237865e-05, + "loss": 0.6645903587341309, + "step": 1642 + }, + { + "epoch": 0.5790308370044053, + "grad_norm": 1.6697269215763402, + "learning_rate": 1.7029749184621593e-05, + "loss": 0.8156411051750183, + "step": 1643 + }, + { + "epoch": 0.5793832599118943, + "grad_norm": 1.3789808786486863, + "learning_rate": 1.7025604173624247e-05, + "loss": 0.6778720617294312, + "step": 1644 + }, + { + "epoch": 0.5797356828193833, + "grad_norm": 1.5882994058774447, + "learning_rate": 1.702145677765293e-05, + "loss": 0.6774875521659851, + "step": 1645 + }, + { + "epoch": 0.5800881057268722, + "grad_norm": 1.7790432286964633, + "learning_rate": 1.701730699811555e-05, + "loss": 0.9239652156829834, + "step": 1646 + }, + { + "epoch": 0.5804405286343612, + "grad_norm": 1.3647594896468807, + "learning_rate": 1.701315483642083e-05, + "loss": 0.6841437816619873, + "step": 1647 + }, + { + "epoch": 0.5807929515418502, + "grad_norm": 1.7199469103031315, + "learning_rate": 1.7009000293978308e-05, + "loss": 0.7540775537490845, + "step": 1648 + }, + { + "epoch": 0.5811453744493392, + "grad_norm": 1.0742597088843755, + "learning_rate": 1.7004843372198306e-05, + "loss": 0.5534735321998596, + "step": 1649 + }, + { + "epoch": 0.5814977973568282, + "grad_norm": 1.326312979627632, + "learning_rate": 1.7000684072491984e-05, + "loss": 0.5398745536804199, + "step": 1650 + }, + { + "epoch": 0.5818502202643172, + "grad_norm": 1.583833147288038, + "learning_rate": 1.6996522396271285e-05, + "loss": 0.7249305248260498, + "step": 1651 + }, + { + "epoch": 0.5822026431718061, + "grad_norm": 2.3893378173132973, + "learning_rate": 1.6992358344948976e-05, + "loss": 0.819263219833374, + "step": 1652 + }, + { + "epoch": 0.5825550660792952, + "grad_norm": 1.4489156713328724, + "learning_rate": 1.6988191919938618e-05, + "loss": 0.7421448826789856, + "step": 1653 + }, + { + "epoch": 0.5829074889867841, + "grad_norm": 1.832209725536692, + "learning_rate": 1.6984023122654584e-05, + "loss": 0.7665672302246094, + "step": 1654 + }, + { + "epoch": 0.5832599118942732, + "grad_norm": 1.390589552129084, + "learning_rate": 1.697985195451205e-05, + "loss": 0.7226558327674866, + "step": 1655 + }, + { + "epoch": 0.5836123348017621, + "grad_norm": 1.5091001050977364, + "learning_rate": 1.6975678416926995e-05, + "loss": 0.6702080965042114, + "step": 1656 + }, + { + "epoch": 0.583964757709251, + "grad_norm": 1.460442381139403, + "learning_rate": 1.697150251131621e-05, + "loss": 0.5843878984451294, + "step": 1657 + }, + { + "epoch": 0.5843171806167401, + "grad_norm": 1.37517469234843, + "learning_rate": 1.6967324239097287e-05, + "loss": 0.707448422908783, + "step": 1658 + }, + { + "epoch": 0.584669603524229, + "grad_norm": 1.8436282149841139, + "learning_rate": 1.6963143601688615e-05, + "loss": 0.7619093060493469, + "step": 1659 + }, + { + "epoch": 0.5850220264317181, + "grad_norm": 1.5399166464925174, + "learning_rate": 1.695896060050939e-05, + "loss": 0.6550310850143433, + "step": 1660 + }, + { + "epoch": 0.585374449339207, + "grad_norm": 1.6689625417691945, + "learning_rate": 1.6954775236979616e-05, + "loss": 0.7202504277229309, + "step": 1661 + }, + { + "epoch": 0.5857268722466961, + "grad_norm": 1.4936106294591966, + "learning_rate": 1.6950587512520085e-05, + "loss": 0.7941907644271851, + "step": 1662 + }, + { + "epoch": 0.586079295154185, + "grad_norm": 1.3939181305394832, + "learning_rate": 1.6946397428552406e-05, + "loss": 0.6349755525588989, + "step": 1663 + }, + { + "epoch": 0.586431718061674, + "grad_norm": 1.4663377684980818, + "learning_rate": 1.6942204986498978e-05, + "loss": 0.6220123171806335, + "step": 1664 + }, + { + "epoch": 0.586784140969163, + "grad_norm": 1.3729457618271874, + "learning_rate": 1.693801018778301e-05, + "loss": 0.6617282629013062, + "step": 1665 + }, + { + "epoch": 0.587136563876652, + "grad_norm": 1.6745607368825612, + "learning_rate": 1.6933813033828496e-05, + "loss": 0.7424415349960327, + "step": 1666 + }, + { + "epoch": 0.587488986784141, + "grad_norm": 1.4332695932293307, + "learning_rate": 1.6929613526060254e-05, + "loss": 0.7245291471481323, + "step": 1667 + }, + { + "epoch": 0.5878414096916299, + "grad_norm": 1.7631957554533126, + "learning_rate": 1.692541166590387e-05, + "loss": 0.7037352323532104, + "step": 1668 + }, + { + "epoch": 0.588193832599119, + "grad_norm": 1.563153866597813, + "learning_rate": 1.6921207454785754e-05, + "loss": 0.7452583312988281, + "step": 1669 + }, + { + "epoch": 0.5885462555066079, + "grad_norm": 1.8223456889525438, + "learning_rate": 1.6917000894133106e-05, + "loss": 0.7773720026016235, + "step": 1670 + }, + { + "epoch": 0.5888986784140969, + "grad_norm": 1.6663522681826546, + "learning_rate": 1.6912791985373916e-05, + "loss": 0.5820617079734802, + "step": 1671 + }, + { + "epoch": 0.5892511013215859, + "grad_norm": 1.4638050818442514, + "learning_rate": 1.6908580729936983e-05, + "loss": 0.7513154745101929, + "step": 1672 + }, + { + "epoch": 0.5896035242290749, + "grad_norm": 1.4916906386520274, + "learning_rate": 1.6904367129251898e-05, + "loss": 0.6741763949394226, + "step": 1673 + }, + { + "epoch": 0.5899559471365639, + "grad_norm": 1.4430048165358413, + "learning_rate": 1.690015118474904e-05, + "loss": 0.7290149331092834, + "step": 1674 + }, + { + "epoch": 0.5903083700440529, + "grad_norm": 1.431209358109114, + "learning_rate": 1.6895932897859596e-05, + "loss": 0.651113748550415, + "step": 1675 + }, + { + "epoch": 0.5906607929515418, + "grad_norm": 1.5475090754915908, + "learning_rate": 1.6891712270015546e-05, + "loss": 0.8062121272087097, + "step": 1676 + }, + { + "epoch": 0.5910132158590309, + "grad_norm": 1.6532405105419041, + "learning_rate": 1.6887489302649657e-05, + "loss": 0.7168683409690857, + "step": 1677 + }, + { + "epoch": 0.5913656387665198, + "grad_norm": 1.5137609810465338, + "learning_rate": 1.6883263997195497e-05, + "loss": 0.6751970052719116, + "step": 1678 + }, + { + "epoch": 0.5917180616740088, + "grad_norm": 1.3244566227591112, + "learning_rate": 1.687903635508742e-05, + "loss": 0.5176222324371338, + "step": 1679 + }, + { + "epoch": 0.5920704845814978, + "grad_norm": 1.532290203616517, + "learning_rate": 1.6874806377760587e-05, + "loss": 0.605686366558075, + "step": 1680 + }, + { + "epoch": 0.5924229074889867, + "grad_norm": 1.55000273332987, + "learning_rate": 1.6870574066650945e-05, + "loss": 0.6927961111068726, + "step": 1681 + }, + { + "epoch": 0.5927753303964758, + "grad_norm": 1.7129667821490024, + "learning_rate": 1.6866339423195223e-05, + "loss": 0.7434122562408447, + "step": 1682 + }, + { + "epoch": 0.5931277533039647, + "grad_norm": 1.6508583062240207, + "learning_rate": 1.6862102448830956e-05, + "loss": 0.5646539926528931, + "step": 1683 + }, + { + "epoch": 0.5934801762114538, + "grad_norm": 1.6845514517525704, + "learning_rate": 1.6857863144996464e-05, + "loss": 0.6666921377182007, + "step": 1684 + }, + { + "epoch": 0.5938325991189427, + "grad_norm": 1.7487162446625693, + "learning_rate": 1.6853621513130857e-05, + "loss": 0.6630325317382812, + "step": 1685 + }, + { + "epoch": 0.5941850220264318, + "grad_norm": 1.6744610818707069, + "learning_rate": 1.6849377554674042e-05, + "loss": 0.6519981622695923, + "step": 1686 + }, + { + "epoch": 0.5945374449339207, + "grad_norm": 1.523793082989738, + "learning_rate": 1.6845131271066705e-05, + "loss": 0.7958102822303772, + "step": 1687 + }, + { + "epoch": 0.5948898678414097, + "grad_norm": 1.6258620864429363, + "learning_rate": 1.6840882663750333e-05, + "loss": 0.6136632561683655, + "step": 1688 + }, + { + "epoch": 0.5952422907488987, + "grad_norm": 1.5696515726783535, + "learning_rate": 1.683663173416719e-05, + "loss": 0.6177657842636108, + "step": 1689 + }, + { + "epoch": 0.5955947136563877, + "grad_norm": 1.3990187120155009, + "learning_rate": 1.683237848376034e-05, + "loss": 0.7489751577377319, + "step": 1690 + }, + { + "epoch": 0.5959471365638767, + "grad_norm": 1.7037734397554838, + "learning_rate": 1.6828122913973625e-05, + "loss": 0.6749632954597473, + "step": 1691 + }, + { + "epoch": 0.5962995594713656, + "grad_norm": 1.7564038851615957, + "learning_rate": 1.682386502625168e-05, + "loss": 0.6340545415878296, + "step": 1692 + }, + { + "epoch": 0.5966519823788546, + "grad_norm": 1.2684465272191359, + "learning_rate": 1.6819604822039924e-05, + "loss": 0.6141117811203003, + "step": 1693 + }, + { + "epoch": 0.5970044052863436, + "grad_norm": 1.6665435860950566, + "learning_rate": 1.681534230278457e-05, + "loss": 0.7937319874763489, + "step": 1694 + }, + { + "epoch": 0.5973568281938326, + "grad_norm": 1.376760638279742, + "learning_rate": 1.68110774699326e-05, + "loss": 0.6196104288101196, + "step": 1695 + }, + { + "epoch": 0.5977092511013216, + "grad_norm": 1.755256295612453, + "learning_rate": 1.68068103249318e-05, + "loss": 0.6856463551521301, + "step": 1696 + }, + { + "epoch": 0.5980616740088106, + "grad_norm": 1.423055172614558, + "learning_rate": 1.680254086923073e-05, + "loss": 0.754359245300293, + "step": 1697 + }, + { + "epoch": 0.5984140969162995, + "grad_norm": 1.5540819723583295, + "learning_rate": 1.6798269104278738e-05, + "loss": 0.6663862466812134, + "step": 1698 + }, + { + "epoch": 0.5987665198237886, + "grad_norm": 1.8192134096199304, + "learning_rate": 1.6793995031525955e-05, + "loss": 0.7072615027427673, + "step": 1699 + }, + { + "epoch": 0.5991189427312775, + "grad_norm": 1.3664015344189913, + "learning_rate": 1.678971865242329e-05, + "loss": 0.5722007751464844, + "step": 1700 + }, + { + "epoch": 0.5994713656387666, + "grad_norm": 1.5146739460913152, + "learning_rate": 1.6785439968422456e-05, + "loss": 0.8254455327987671, + "step": 1701 + }, + { + "epoch": 0.5998237885462555, + "grad_norm": 1.6221807995806083, + "learning_rate": 1.678115898097592e-05, + "loss": 0.5726041793823242, + "step": 1702 + }, + { + "epoch": 0.6001762114537444, + "grad_norm": 1.7173506198717712, + "learning_rate": 1.6776875691536946e-05, + "loss": 0.6480926275253296, + "step": 1703 + }, + { + "epoch": 0.6005286343612335, + "grad_norm": 1.801703791100917, + "learning_rate": 1.677259010155958e-05, + "loss": 0.6469742059707642, + "step": 1704 + }, + { + "epoch": 0.6008810572687224, + "grad_norm": 1.6534691770392222, + "learning_rate": 1.6768302212498647e-05, + "loss": 0.814565896987915, + "step": 1705 + }, + { + "epoch": 0.6012334801762115, + "grad_norm": 1.6212192399903926, + "learning_rate": 1.6764012025809745e-05, + "loss": 0.7063060402870178, + "step": 1706 + }, + { + "epoch": 0.6015859030837004, + "grad_norm": 1.2993416112883407, + "learning_rate": 1.6759719542949268e-05, + "loss": 0.6523685455322266, + "step": 1707 + }, + { + "epoch": 0.6019383259911895, + "grad_norm": 1.7291371377992661, + "learning_rate": 1.6755424765374378e-05, + "loss": 0.7361165285110474, + "step": 1708 + }, + { + "epoch": 0.6022907488986784, + "grad_norm": 1.1307969866596985, + "learning_rate": 1.6751127694543012e-05, + "loss": 0.45241934061050415, + "step": 1709 + }, + { + "epoch": 0.6026431718061674, + "grad_norm": 1.3734078208692269, + "learning_rate": 1.6746828331913903e-05, + "loss": 0.6610431671142578, + "step": 1710 + }, + { + "epoch": 0.6029955947136564, + "grad_norm": 1.6659887779271019, + "learning_rate": 1.674252667894654e-05, + "loss": 0.7572601437568665, + "step": 1711 + }, + { + "epoch": 0.6033480176211454, + "grad_norm": 1.3828996049540105, + "learning_rate": 1.6738222737101205e-05, + "loss": 0.7021572589874268, + "step": 1712 + }, + { + "epoch": 0.6037004405286344, + "grad_norm": 1.5581462402658262, + "learning_rate": 1.6733916507838952e-05, + "loss": 0.7742347121238708, + "step": 1713 + }, + { + "epoch": 0.6040528634361233, + "grad_norm": 1.5666267075277038, + "learning_rate": 1.6729607992621613e-05, + "loss": 0.6453407406806946, + "step": 1714 + }, + { + "epoch": 0.6044052863436123, + "grad_norm": 1.279025328652212, + "learning_rate": 1.6725297192911793e-05, + "loss": 0.7004555463790894, + "step": 1715 + }, + { + "epoch": 0.6047577092511013, + "grad_norm": 1.3482721305547676, + "learning_rate": 1.6720984110172875e-05, + "loss": 0.6979051232337952, + "step": 1716 + }, + { + "epoch": 0.6051101321585903, + "grad_norm": 1.5059245296578512, + "learning_rate": 1.671666874586902e-05, + "loss": 0.6387851238250732, + "step": 1717 + }, + { + "epoch": 0.6054625550660793, + "grad_norm": 1.5397561778856637, + "learning_rate": 1.671235110146515e-05, + "loss": 0.9083811044692993, + "step": 1718 + }, + { + "epoch": 0.6058149779735683, + "grad_norm": 1.637790853716126, + "learning_rate": 1.6708031178426984e-05, + "loss": 0.747002363204956, + "step": 1719 + }, + { + "epoch": 0.6061674008810573, + "grad_norm": 1.6617583077406621, + "learning_rate": 1.6703708978220986e-05, + "loss": 0.7553372383117676, + "step": 1720 + }, + { + "epoch": 0.6065198237885463, + "grad_norm": 1.72002611544435, + "learning_rate": 1.669938450231442e-05, + "loss": 0.762795090675354, + "step": 1721 + }, + { + "epoch": 0.6068722466960352, + "grad_norm": 1.3894206198813077, + "learning_rate": 1.669505775217531e-05, + "loss": 0.739936113357544, + "step": 1722 + }, + { + "epoch": 0.6072246696035243, + "grad_norm": 1.625344781935558, + "learning_rate": 1.6690728729272456e-05, + "loss": 0.8439112305641174, + "step": 1723 + }, + { + "epoch": 0.6075770925110132, + "grad_norm": 1.5345011506472854, + "learning_rate": 1.6686397435075416e-05, + "loss": 0.6144756078720093, + "step": 1724 + }, + { + "epoch": 0.6079295154185022, + "grad_norm": 1.3674442510472364, + "learning_rate": 1.6682063871054534e-05, + "loss": 0.569161057472229, + "step": 1725 + }, + { + "epoch": 0.6082819383259912, + "grad_norm": 1.6372827589624075, + "learning_rate": 1.6677728038680926e-05, + "loss": 0.7523979544639587, + "step": 1726 + }, + { + "epoch": 0.6086343612334801, + "grad_norm": 1.453986649514636, + "learning_rate": 1.6673389939426463e-05, + "loss": 0.6394520401954651, + "step": 1727 + }, + { + "epoch": 0.6089867841409692, + "grad_norm": 1.358198647287584, + "learning_rate": 1.66690495747638e-05, + "loss": 0.5975633859634399, + "step": 1728 + }, + { + "epoch": 0.6093392070484581, + "grad_norm": 1.6192297143942058, + "learning_rate": 1.666470694616636e-05, + "loss": 0.736790657043457, + "step": 1729 + }, + { + "epoch": 0.6096916299559472, + "grad_norm": 1.4234241508654442, + "learning_rate": 1.6660362055108316e-05, + "loss": 0.7693831920623779, + "step": 1730 + }, + { + "epoch": 0.6100440528634361, + "grad_norm": 1.8032471376275176, + "learning_rate": 1.665601490306464e-05, + "loss": 0.7322608232498169, + "step": 1731 + }, + { + "epoch": 0.6103964757709252, + "grad_norm": 1.3709677099617412, + "learning_rate": 1.6651665491511043e-05, + "loss": 0.6478679180145264, + "step": 1732 + }, + { + "epoch": 0.6107488986784141, + "grad_norm": 1.8838571148858527, + "learning_rate": 1.6647313821924022e-05, + "loss": 0.7125877141952515, + "step": 1733 + }, + { + "epoch": 0.6111013215859031, + "grad_norm": 1.5594770538222507, + "learning_rate": 1.664295989578083e-05, + "loss": 0.8999321460723877, + "step": 1734 + }, + { + "epoch": 0.6114537444933921, + "grad_norm": 1.618421596120734, + "learning_rate": 1.663860371455949e-05, + "loss": 0.6908334493637085, + "step": 1735 + }, + { + "epoch": 0.611806167400881, + "grad_norm": 1.5552403174407248, + "learning_rate": 1.663424527973879e-05, + "loss": 0.6708767414093018, + "step": 1736 + }, + { + "epoch": 0.61215859030837, + "grad_norm": 1.4907630752773764, + "learning_rate": 1.6629884592798283e-05, + "loss": 0.6991565823554993, + "step": 1737 + }, + { + "epoch": 0.612511013215859, + "grad_norm": 1.430459100414143, + "learning_rate": 1.6625521655218287e-05, + "loss": 0.6224193572998047, + "step": 1738 + }, + { + "epoch": 0.612863436123348, + "grad_norm": 1.6355889531807317, + "learning_rate": 1.662115646847988e-05, + "loss": 0.701459527015686, + "step": 1739 + }, + { + "epoch": 0.613215859030837, + "grad_norm": 1.508424771304017, + "learning_rate": 1.6616789034064914e-05, + "loss": 0.784063458442688, + "step": 1740 + }, + { + "epoch": 0.613568281938326, + "grad_norm": 1.4868333492675876, + "learning_rate": 1.661241935345599e-05, + "loss": 0.7604146003723145, + "step": 1741 + }, + { + "epoch": 0.613920704845815, + "grad_norm": 1.7090188741959023, + "learning_rate": 1.6608047428136482e-05, + "loss": 0.6347941160202026, + "step": 1742 + }, + { + "epoch": 0.614273127753304, + "grad_norm": 1.6487656059998825, + "learning_rate": 1.6603673259590524e-05, + "loss": 0.7559434175491333, + "step": 1743 + }, + { + "epoch": 0.6146255506607929, + "grad_norm": 1.5969979245345363, + "learning_rate": 1.6599296849303007e-05, + "loss": 0.742524266242981, + "step": 1744 + }, + { + "epoch": 0.614977973568282, + "grad_norm": 1.2238633556789393, + "learning_rate": 1.6594918198759586e-05, + "loss": 0.697594165802002, + "step": 1745 + }, + { + "epoch": 0.6153303964757709, + "grad_norm": 1.4536023257551807, + "learning_rate": 1.659053730944668e-05, + "loss": 0.7876765131950378, + "step": 1746 + }, + { + "epoch": 0.6156828193832599, + "grad_norm": 1.489887595585156, + "learning_rate": 1.658615418285146e-05, + "loss": 0.7514386177062988, + "step": 1747 + }, + { + "epoch": 0.6160352422907489, + "grad_norm": 1.6935500501856253, + "learning_rate": 1.658176882046187e-05, + "loss": 0.6220899820327759, + "step": 1748 + }, + { + "epoch": 0.6163876651982378, + "grad_norm": 1.9395284146525182, + "learning_rate": 1.6577381223766592e-05, + "loss": 0.7376539707183838, + "step": 1749 + }, + { + "epoch": 0.6167400881057269, + "grad_norm": 1.6373866531670291, + "learning_rate": 1.6572991394255084e-05, + "loss": 0.8296281099319458, + "step": 1750 + }, + { + "epoch": 0.6170925110132158, + "grad_norm": 1.545978766740828, + "learning_rate": 1.656859933341756e-05, + "loss": 0.7316757440567017, + "step": 1751 + }, + { + "epoch": 0.6174449339207049, + "grad_norm": 1.5280854263636194, + "learning_rate": 1.6564205042744986e-05, + "loss": 0.6933871507644653, + "step": 1752 + }, + { + "epoch": 0.6177973568281938, + "grad_norm": 1.890269396017501, + "learning_rate": 1.655980852372908e-05, + "loss": 0.6835601329803467, + "step": 1753 + }, + { + "epoch": 0.6181497797356829, + "grad_norm": 1.3967466693425752, + "learning_rate": 1.655540977786233e-05, + "loss": 0.6752027869224548, + "step": 1754 + }, + { + "epoch": 0.6185022026431718, + "grad_norm": 1.4944496246124994, + "learning_rate": 1.6551008806637976e-05, + "loss": 0.6092851758003235, + "step": 1755 + }, + { + "epoch": 0.6188546255506608, + "grad_norm": 1.3266652259646856, + "learning_rate": 1.6546605611550008e-05, + "loss": 0.682563066482544, + "step": 1756 + }, + { + "epoch": 0.6192070484581498, + "grad_norm": 1.5302981352911342, + "learning_rate": 1.654220019409317e-05, + "loss": 0.8674311637878418, + "step": 1757 + }, + { + "epoch": 0.6195594713656387, + "grad_norm": 1.4437314589210788, + "learning_rate": 1.6537792555762966e-05, + "loss": 0.7209165096282959, + "step": 1758 + }, + { + "epoch": 0.6199118942731278, + "grad_norm": 1.5958855115050472, + "learning_rate": 1.6533382698055655e-05, + "loss": 0.7795991897583008, + "step": 1759 + }, + { + "epoch": 0.6202643171806167, + "grad_norm": 1.6392261912532398, + "learning_rate": 1.6528970622468245e-05, + "loss": 0.6749448776245117, + "step": 1760 + }, + { + "epoch": 0.6206167400881057, + "grad_norm": 1.5291165267411688, + "learning_rate": 1.6524556330498494e-05, + "loss": 0.9127920866012573, + "step": 1761 + }, + { + "epoch": 0.6209691629955947, + "grad_norm": 1.5402491362904795, + "learning_rate": 1.6520139823644922e-05, + "loss": 0.6224071979522705, + "step": 1762 + }, + { + "epoch": 0.6213215859030837, + "grad_norm": 1.426673111398807, + "learning_rate": 1.6515721103406798e-05, + "loss": 0.6955251693725586, + "step": 1763 + }, + { + "epoch": 0.6216740088105727, + "grad_norm": 1.7187740007003602, + "learning_rate": 1.6511300171284132e-05, + "loss": 0.676613986492157, + "step": 1764 + }, + { + "epoch": 0.6220264317180617, + "grad_norm": 1.4024924612217573, + "learning_rate": 1.65068770287777e-05, + "loss": 0.7482033967971802, + "step": 1765 + }, + { + "epoch": 0.6223788546255506, + "grad_norm": 1.4659804586317469, + "learning_rate": 1.6502451677389015e-05, + "loss": 0.6019684076309204, + "step": 1766 + }, + { + "epoch": 0.6227312775330397, + "grad_norm": 1.419796458872072, + "learning_rate": 1.649802411862035e-05, + "loss": 0.6796068549156189, + "step": 1767 + }, + { + "epoch": 0.6230837004405286, + "grad_norm": 2.234008541241949, + "learning_rate": 1.6493594353974724e-05, + "loss": 0.6351302862167358, + "step": 1768 + }, + { + "epoch": 0.6234361233480176, + "grad_norm": 1.4257561009443, + "learning_rate": 1.6489162384955906e-05, + "loss": 0.6093732714653015, + "step": 1769 + }, + { + "epoch": 0.6237885462555066, + "grad_norm": 1.842168854503522, + "learning_rate": 1.6484728213068405e-05, + "loss": 0.8181271553039551, + "step": 1770 + }, + { + "epoch": 0.6241409691629956, + "grad_norm": 1.821206401126196, + "learning_rate": 1.6480291839817488e-05, + "loss": 0.7093993425369263, + "step": 1771 + }, + { + "epoch": 0.6244933920704846, + "grad_norm": 1.416340976430299, + "learning_rate": 1.6475853266709165e-05, + "loss": 0.6895081996917725, + "step": 1772 + }, + { + "epoch": 0.6248458149779735, + "grad_norm": 1.5970315552720198, + "learning_rate": 1.6471412495250195e-05, + "loss": 0.6706013679504395, + "step": 1773 + }, + { + "epoch": 0.6251982378854626, + "grad_norm": 1.5170788749866242, + "learning_rate": 1.6466969526948082e-05, + "loss": 0.6700015664100647, + "step": 1774 + }, + { + "epoch": 0.6255506607929515, + "grad_norm": 1.5173815641058028, + "learning_rate": 1.6462524363311072e-05, + "loss": 0.6591087579727173, + "step": 1775 + }, + { + "epoch": 0.6259030837004406, + "grad_norm": 1.6219345446237772, + "learning_rate": 1.6458077005848164e-05, + "loss": 0.7775006294250488, + "step": 1776 + }, + { + "epoch": 0.6262555066079295, + "grad_norm": 1.6260525304572828, + "learning_rate": 1.6453627456069093e-05, + "loss": 0.8459682464599609, + "step": 1777 + }, + { + "epoch": 0.6266079295154185, + "grad_norm": 1.4031571304990242, + "learning_rate": 1.6449175715484346e-05, + "loss": 0.6536898612976074, + "step": 1778 + }, + { + "epoch": 0.6269603524229075, + "grad_norm": 1.5129603585000657, + "learning_rate": 1.6444721785605148e-05, + "loss": 0.7543610334396362, + "step": 1779 + }, + { + "epoch": 0.6273127753303964, + "grad_norm": 1.6228520645077271, + "learning_rate": 1.6440265667943474e-05, + "loss": 0.7416362762451172, + "step": 1780 + }, + { + "epoch": 0.6276651982378855, + "grad_norm": 1.4583654660578542, + "learning_rate": 1.6435807364012035e-05, + "loss": 0.5505499839782715, + "step": 1781 + }, + { + "epoch": 0.6280176211453744, + "grad_norm": 1.5252426453600672, + "learning_rate": 1.6431346875324284e-05, + "loss": 0.792723536491394, + "step": 1782 + }, + { + "epoch": 0.6283700440528635, + "grad_norm": 1.3655475423968058, + "learning_rate": 1.6426884203394416e-05, + "loss": 0.6313158273696899, + "step": 1783 + }, + { + "epoch": 0.6287224669603524, + "grad_norm": 1.6057168635576118, + "learning_rate": 1.642241934973738e-05, + "loss": 0.6168874502182007, + "step": 1784 + }, + { + "epoch": 0.6290748898678414, + "grad_norm": 1.78997265433784, + "learning_rate": 1.6417952315868845e-05, + "loss": 0.6995766162872314, + "step": 1785 + }, + { + "epoch": 0.6294273127753304, + "grad_norm": 1.4835625331683349, + "learning_rate": 1.641348310330523e-05, + "loss": 0.8046826124191284, + "step": 1786 + }, + { + "epoch": 0.6297797356828194, + "grad_norm": 1.4892920408023869, + "learning_rate": 1.6409011713563697e-05, + "loss": 0.7227291464805603, + "step": 1787 + }, + { + "epoch": 0.6301321585903084, + "grad_norm": 1.4682105257113767, + "learning_rate": 1.6404538148162145e-05, + "loss": 0.6463631391525269, + "step": 1788 + }, + { + "epoch": 0.6304845814977974, + "grad_norm": 2.4977643907634, + "learning_rate": 1.640006240861921e-05, + "loss": 0.7473348379135132, + "step": 1789 + }, + { + "epoch": 0.6308370044052863, + "grad_norm": 1.4291329366827183, + "learning_rate": 1.6395584496454263e-05, + "loss": 0.7311505079269409, + "step": 1790 + }, + { + "epoch": 0.6311894273127753, + "grad_norm": 1.5618530036111458, + "learning_rate": 1.639110441318742e-05, + "loss": 0.7259535789489746, + "step": 1791 + }, + { + "epoch": 0.6315418502202643, + "grad_norm": 1.515515721890048, + "learning_rate": 1.6386622160339522e-05, + "loss": 0.5777252912521362, + "step": 1792 + }, + { + "epoch": 0.6318942731277533, + "grad_norm": 1.3190322559386176, + "learning_rate": 1.638213773943216e-05, + "loss": 0.5510598421096802, + "step": 1793 + }, + { + "epoch": 0.6322466960352423, + "grad_norm": 1.5085803548323364, + "learning_rate": 1.637765115198766e-05, + "loss": 0.6448229551315308, + "step": 1794 + }, + { + "epoch": 0.6325991189427312, + "grad_norm": 1.5827276696724286, + "learning_rate": 1.6373162399529067e-05, + "loss": 0.7359289526939392, + "step": 1795 + }, + { + "epoch": 0.6329515418502203, + "grad_norm": 1.5346140091491929, + "learning_rate": 1.6368671483580185e-05, + "loss": 0.616656482219696, + "step": 1796 + }, + { + "epoch": 0.6333039647577092, + "grad_norm": 1.4291822350961465, + "learning_rate": 1.6364178405665534e-05, + "loss": 0.5966289043426514, + "step": 1797 + }, + { + "epoch": 0.6336563876651983, + "grad_norm": 1.8727626569458464, + "learning_rate": 1.6359683167310375e-05, + "loss": 0.7475985288619995, + "step": 1798 + }, + { + "epoch": 0.6340088105726872, + "grad_norm": 1.4494645750595028, + "learning_rate": 1.63551857700407e-05, + "loss": 0.6030765771865845, + "step": 1799 + }, + { + "epoch": 0.6343612334801763, + "grad_norm": 1.6637248682130477, + "learning_rate": 1.6350686215383237e-05, + "loss": 0.6193016171455383, + "step": 1800 + }, + { + "epoch": 0.6347136563876652, + "grad_norm": 1.3604775956740969, + "learning_rate": 1.6346184504865442e-05, + "loss": 0.6404513120651245, + "step": 1801 + }, + { + "epoch": 0.6350660792951542, + "grad_norm": 1.5539318450371893, + "learning_rate": 1.6341680640015515e-05, + "loss": 0.8453506231307983, + "step": 1802 + }, + { + "epoch": 0.6354185022026432, + "grad_norm": 1.3642622033336096, + "learning_rate": 1.6337174622362366e-05, + "loss": 0.6094445586204529, + "step": 1803 + }, + { + "epoch": 0.6357709251101321, + "grad_norm": 1.5112522647253264, + "learning_rate": 1.6332666453435653e-05, + "loss": 0.7352159023284912, + "step": 1804 + }, + { + "epoch": 0.6361233480176212, + "grad_norm": 1.4529963307650198, + "learning_rate": 1.632815613476576e-05, + "loss": 0.7395339608192444, + "step": 1805 + }, + { + "epoch": 0.6364757709251101, + "grad_norm": 1.4350925789909401, + "learning_rate": 1.63236436678838e-05, + "loss": 0.7246927618980408, + "step": 1806 + }, + { + "epoch": 0.6368281938325991, + "grad_norm": 1.3653208723694477, + "learning_rate": 1.6319129054321616e-05, + "loss": 0.6913329362869263, + "step": 1807 + }, + { + "epoch": 0.6371806167400881, + "grad_norm": 1.9893098285493216, + "learning_rate": 1.6314612295611772e-05, + "loss": 0.6410515308380127, + "step": 1808 + }, + { + "epoch": 0.6375330396475771, + "grad_norm": 2.6583918764324665, + "learning_rate": 1.6310093393287574e-05, + "loss": 0.690910816192627, + "step": 1809 + }, + { + "epoch": 0.6378854625550661, + "grad_norm": 1.4623649413484192, + "learning_rate": 1.6305572348883044e-05, + "loss": 0.6520562171936035, + "step": 1810 + }, + { + "epoch": 0.6382378854625551, + "grad_norm": 1.6850706181935027, + "learning_rate": 1.630104916393294e-05, + "loss": 0.6966608166694641, + "step": 1811 + }, + { + "epoch": 0.638590308370044, + "grad_norm": 1.7161033790648312, + "learning_rate": 1.6296523839972743e-05, + "loss": 0.826806902885437, + "step": 1812 + }, + { + "epoch": 0.6389427312775331, + "grad_norm": 1.431569634617566, + "learning_rate": 1.6291996378538653e-05, + "loss": 0.6695773601531982, + "step": 1813 + }, + { + "epoch": 0.639295154185022, + "grad_norm": 1.4264708644101765, + "learning_rate": 1.6287466781167607e-05, + "loss": 0.5725491046905518, + "step": 1814 + }, + { + "epoch": 0.639647577092511, + "grad_norm": 1.2779233324378096, + "learning_rate": 1.628293504939727e-05, + "loss": 0.5543544292449951, + "step": 1815 + }, + { + "epoch": 0.64, + "grad_norm": 3.2997728941963564, + "learning_rate": 1.6278401184766007e-05, + "loss": 0.6964641809463501, + "step": 1816 + }, + { + "epoch": 0.640352422907489, + "grad_norm": 1.3065245679172277, + "learning_rate": 1.6273865188812935e-05, + "loss": 0.675407886505127, + "step": 1817 + }, + { + "epoch": 0.640704845814978, + "grad_norm": 1.4883059032141013, + "learning_rate": 1.626932706307788e-05, + "loss": 0.6304433345794678, + "step": 1818 + }, + { + "epoch": 0.6410572687224669, + "grad_norm": 1.5529882690454875, + "learning_rate": 1.62647868091014e-05, + "loss": 0.7432112693786621, + "step": 1819 + }, + { + "epoch": 0.641409691629956, + "grad_norm": 1.5761551228008874, + "learning_rate": 1.6260244428424763e-05, + "loss": 0.730377197265625, + "step": 1820 + }, + { + "epoch": 0.6417621145374449, + "grad_norm": 1.7239403694554825, + "learning_rate": 1.6255699922589968e-05, + "loss": 0.694229006767273, + "step": 1821 + }, + { + "epoch": 0.642114537444934, + "grad_norm": 1.5664915948077012, + "learning_rate": 1.6251153293139735e-05, + "loss": 0.7284739017486572, + "step": 1822 + }, + { + "epoch": 0.6424669603524229, + "grad_norm": 1.4047714992661522, + "learning_rate": 1.6246604541617507e-05, + "loss": 0.6028950214385986, + "step": 1823 + }, + { + "epoch": 0.642819383259912, + "grad_norm": 1.65079248713073, + "learning_rate": 1.6242053669567432e-05, + "loss": 0.6776808500289917, + "step": 1824 + }, + { + "epoch": 0.6431718061674009, + "grad_norm": 1.7695857292474644, + "learning_rate": 1.6237500678534396e-05, + "loss": 0.7743366956710815, + "step": 1825 + }, + { + "epoch": 0.6435242290748898, + "grad_norm": 1.594351471613888, + "learning_rate": 1.6232945570064e-05, + "loss": 0.6356723308563232, + "step": 1826 + }, + { + "epoch": 0.6438766519823789, + "grad_norm": 1.4846113103688028, + "learning_rate": 1.622838834570256e-05, + "loss": 0.7356402277946472, + "step": 1827 + }, + { + "epoch": 0.6442290748898678, + "grad_norm": 1.455165750941624, + "learning_rate": 1.622382900699711e-05, + "loss": 0.7639342546463013, + "step": 1828 + }, + { + "epoch": 0.6445814977973569, + "grad_norm": 2.0823946019481987, + "learning_rate": 1.6219267555495407e-05, + "loss": 0.6969513297080994, + "step": 1829 + }, + { + "epoch": 0.6449339207048458, + "grad_norm": 1.418146430885783, + "learning_rate": 1.621470399274592e-05, + "loss": 0.7532765865325928, + "step": 1830 + }, + { + "epoch": 0.6452863436123348, + "grad_norm": 1.3893974330709622, + "learning_rate": 1.6210138320297835e-05, + "loss": 0.5801100730895996, + "step": 1831 + }, + { + "epoch": 0.6456387665198238, + "grad_norm": 1.5780391931120195, + "learning_rate": 1.6205570539701056e-05, + "loss": 0.8006102442741394, + "step": 1832 + }, + { + "epoch": 0.6459911894273128, + "grad_norm": 1.4094927188728377, + "learning_rate": 1.6201000652506203e-05, + "loss": 0.6507089138031006, + "step": 1833 + }, + { + "epoch": 0.6463436123348018, + "grad_norm": 1.9684758989320281, + "learning_rate": 1.619642866026461e-05, + "loss": 0.7407999634742737, + "step": 1834 + }, + { + "epoch": 0.6466960352422908, + "grad_norm": 1.4160609898798358, + "learning_rate": 1.619185456452833e-05, + "loss": 0.6964670419692993, + "step": 1835 + }, + { + "epoch": 0.6470484581497797, + "grad_norm": 1.6614634508995256, + "learning_rate": 1.6187278366850122e-05, + "loss": 0.7095489501953125, + "step": 1836 + }, + { + "epoch": 0.6474008810572687, + "grad_norm": 2.0391949894277017, + "learning_rate": 1.6182700068783463e-05, + "loss": 0.6968166828155518, + "step": 1837 + }, + { + "epoch": 0.6477533039647577, + "grad_norm": 1.3206477384834772, + "learning_rate": 1.617811967188254e-05, + "loss": 0.7745821475982666, + "step": 1838 + }, + { + "epoch": 0.6481057268722467, + "grad_norm": 1.4803456865319338, + "learning_rate": 1.6173537177702266e-05, + "loss": 0.7071934938430786, + "step": 1839 + }, + { + "epoch": 0.6484581497797357, + "grad_norm": 1.7225763324537737, + "learning_rate": 1.6168952587798242e-05, + "loss": 0.6481701135635376, + "step": 1840 + }, + { + "epoch": 0.6488105726872246, + "grad_norm": 1.4447543914645467, + "learning_rate": 1.6164365903726805e-05, + "loss": 0.6349890232086182, + "step": 1841 + }, + { + "epoch": 0.6491629955947137, + "grad_norm": 1.3913908457554178, + "learning_rate": 1.6159777127044982e-05, + "loss": 0.6067368388175964, + "step": 1842 + }, + { + "epoch": 0.6495154185022026, + "grad_norm": 1.3943413375617566, + "learning_rate": 1.6155186259310523e-05, + "loss": 0.7170778512954712, + "step": 1843 + }, + { + "epoch": 0.6498678414096917, + "grad_norm": 1.4309397568408155, + "learning_rate": 1.6150593302081888e-05, + "loss": 0.5623376965522766, + "step": 1844 + }, + { + "epoch": 0.6502202643171806, + "grad_norm": 1.442096873601557, + "learning_rate": 1.6145998256918238e-05, + "loss": 0.7295233607292175, + "step": 1845 + }, + { + "epoch": 0.6505726872246697, + "grad_norm": 1.513681766461532, + "learning_rate": 1.6141401125379454e-05, + "loss": 0.6991151571273804, + "step": 1846 + }, + { + "epoch": 0.6509251101321586, + "grad_norm": 1.568060173563952, + "learning_rate": 1.6136801909026113e-05, + "loss": 0.7553545236587524, + "step": 1847 + }, + { + "epoch": 0.6512775330396475, + "grad_norm": 1.560177534517688, + "learning_rate": 1.613220060941951e-05, + "loss": 0.8280071020126343, + "step": 1848 + }, + { + "epoch": 0.6516299559471366, + "grad_norm": 1.3846780543862842, + "learning_rate": 1.6127597228121636e-05, + "loss": 0.662299633026123, + "step": 1849 + }, + { + "epoch": 0.6519823788546255, + "grad_norm": 1.519733781984336, + "learning_rate": 1.6122991766695206e-05, + "loss": 0.6493197679519653, + "step": 1850 + }, + { + "epoch": 0.6523348017621146, + "grad_norm": 1.5074834442694671, + "learning_rate": 1.6118384226703623e-05, + "loss": 0.5910629034042358, + "step": 1851 + }, + { + "epoch": 0.6526872246696035, + "grad_norm": 1.5082942143966174, + "learning_rate": 1.611377460971101e-05, + "loss": 0.7124426364898682, + "step": 1852 + }, + { + "epoch": 0.6530396475770925, + "grad_norm": 1.6734021483912949, + "learning_rate": 1.610916291728218e-05, + "loss": 0.6081063747406006, + "step": 1853 + }, + { + "epoch": 0.6533920704845815, + "grad_norm": 1.5485445677219123, + "learning_rate": 1.6104549150982666e-05, + "loss": 0.7536673545837402, + "step": 1854 + }, + { + "epoch": 0.6537444933920705, + "grad_norm": 1.5239612944966212, + "learning_rate": 1.6099933312378695e-05, + "loss": 0.6514976024627686, + "step": 1855 + }, + { + "epoch": 0.6540969162995595, + "grad_norm": 1.3951117738157057, + "learning_rate": 1.6095315403037205e-05, + "loss": 0.6595193147659302, + "step": 1856 + }, + { + "epoch": 0.6544493392070485, + "grad_norm": 1.5562205804379312, + "learning_rate": 1.6090695424525826e-05, + "loss": 0.666920006275177, + "step": 1857 + }, + { + "epoch": 0.6548017621145374, + "grad_norm": 1.5350434119319913, + "learning_rate": 1.6086073378412902e-05, + "loss": 0.5984979271888733, + "step": 1858 + }, + { + "epoch": 0.6551541850220264, + "grad_norm": 1.8541188470544154, + "learning_rate": 1.608144926626747e-05, + "loss": 0.8021191358566284, + "step": 1859 + }, + { + "epoch": 0.6555066079295154, + "grad_norm": 1.5029675710659876, + "learning_rate": 1.6076823089659272e-05, + "loss": 0.7368075847625732, + "step": 1860 + }, + { + "epoch": 0.6558590308370044, + "grad_norm": 1.596711606351331, + "learning_rate": 1.6072194850158755e-05, + "loss": 0.7923766374588013, + "step": 1861 + }, + { + "epoch": 0.6562114537444934, + "grad_norm": 1.6332800469997777, + "learning_rate": 1.606756454933706e-05, + "loss": 0.6907824873924255, + "step": 1862 + }, + { + "epoch": 0.6565638766519823, + "grad_norm": 1.5674543537069574, + "learning_rate": 1.606293218876603e-05, + "loss": 0.7366634607315063, + "step": 1863 + }, + { + "epoch": 0.6569162995594714, + "grad_norm": 1.7550517656533429, + "learning_rate": 1.6058297770018208e-05, + "loss": 0.7166022658348083, + "step": 1864 + }, + { + "epoch": 0.6572687224669603, + "grad_norm": 1.5153527205809505, + "learning_rate": 1.6053661294666833e-05, + "loss": 0.6969404220581055, + "step": 1865 + }, + { + "epoch": 0.6576211453744494, + "grad_norm": 1.5681332930444218, + "learning_rate": 1.6049022764285846e-05, + "loss": 0.7182974815368652, + "step": 1866 + }, + { + "epoch": 0.6579735682819383, + "grad_norm": 2.620263422686914, + "learning_rate": 1.6044382180449886e-05, + "loss": 0.7469301819801331, + "step": 1867 + }, + { + "epoch": 0.6583259911894274, + "grad_norm": 1.458082221775431, + "learning_rate": 1.603973954473428e-05, + "loss": 0.7097122073173523, + "step": 1868 + }, + { + "epoch": 0.6586784140969163, + "grad_norm": 1.3404337000381439, + "learning_rate": 1.6035094858715065e-05, + "loss": 0.6907291412353516, + "step": 1869 + }, + { + "epoch": 0.6590308370044052, + "grad_norm": 1.5576579616406543, + "learning_rate": 1.6030448123968963e-05, + "loss": 0.6259130239486694, + "step": 1870 + }, + { + "epoch": 0.6593832599118943, + "grad_norm": 1.6431810286043311, + "learning_rate": 1.6025799342073397e-05, + "loss": 0.6948051452636719, + "step": 1871 + }, + { + "epoch": 0.6597356828193832, + "grad_norm": 1.3540961323396474, + "learning_rate": 1.602114851460648e-05, + "loss": 0.7037572264671326, + "step": 1872 + }, + { + "epoch": 0.6600881057268723, + "grad_norm": 1.565352238933419, + "learning_rate": 1.6016495643147036e-05, + "loss": 0.7728864550590515, + "step": 1873 + }, + { + "epoch": 0.6604405286343612, + "grad_norm": 1.4345290675539004, + "learning_rate": 1.601184072927456e-05, + "loss": 0.7782067060470581, + "step": 1874 + }, + { + "epoch": 0.6607929515418502, + "grad_norm": 1.4505913839056241, + "learning_rate": 1.6007183774569246e-05, + "loss": 0.6168591976165771, + "step": 1875 + }, + { + "epoch": 0.6611453744493392, + "grad_norm": 1.6465062301007323, + "learning_rate": 1.6002524780611995e-05, + "loss": 0.702346920967102, + "step": 1876 + }, + { + "epoch": 0.6614977973568282, + "grad_norm": 1.6478258582343996, + "learning_rate": 1.5997863748984384e-05, + "loss": 0.6084239482879639, + "step": 1877 + }, + { + "epoch": 0.6618502202643172, + "grad_norm": 1.5841429013244157, + "learning_rate": 1.5993200681268696e-05, + "loss": 0.8307315707206726, + "step": 1878 + }, + { + "epoch": 0.6622026431718062, + "grad_norm": 1.8073980879357947, + "learning_rate": 1.5988535579047888e-05, + "loss": 0.6465811729431152, + "step": 1879 + }, + { + "epoch": 0.6625550660792952, + "grad_norm": 1.5593829827457022, + "learning_rate": 1.598386844390562e-05, + "loss": 0.71415114402771, + "step": 1880 + }, + { + "epoch": 0.6629074889867841, + "grad_norm": 6.602062472303997, + "learning_rate": 1.5979199277426243e-05, + "loss": 0.7135012149810791, + "step": 1881 + }, + { + "epoch": 0.6632599118942731, + "grad_norm": 1.584805815321856, + "learning_rate": 1.597452808119479e-05, + "loss": 0.840306282043457, + "step": 1882 + }, + { + "epoch": 0.6636123348017621, + "grad_norm": 1.454651140369818, + "learning_rate": 1.596985485679699e-05, + "loss": 0.622429609298706, + "step": 1883 + }, + { + "epoch": 0.6639647577092511, + "grad_norm": 1.5798478269154124, + "learning_rate": 1.5965179605819248e-05, + "loss": 0.6505612134933472, + "step": 1884 + }, + { + "epoch": 0.66431718061674, + "grad_norm": 1.4292089389404006, + "learning_rate": 1.5960502329848683e-05, + "loss": 0.7665247917175293, + "step": 1885 + }, + { + "epoch": 0.6646696035242291, + "grad_norm": 1.614107737492675, + "learning_rate": 1.5955823030473068e-05, + "loss": 0.7780051231384277, + "step": 1886 + }, + { + "epoch": 0.665022026431718, + "grad_norm": 1.4074097920809756, + "learning_rate": 1.5951141709280886e-05, + "loss": 0.6311650276184082, + "step": 1887 + }, + { + "epoch": 0.6653744493392071, + "grad_norm": 1.287734360896639, + "learning_rate": 1.5946458367861302e-05, + "loss": 0.7126712799072266, + "step": 1888 + }, + { + "epoch": 0.665726872246696, + "grad_norm": 1.3823278268773909, + "learning_rate": 1.5941773007804165e-05, + "loss": 0.6979397535324097, + "step": 1889 + }, + { + "epoch": 0.6660792951541851, + "grad_norm": 1.5067230035216896, + "learning_rate": 1.5937085630700003e-05, + "loss": 0.7065495252609253, + "step": 1890 + }, + { + "epoch": 0.666431718061674, + "grad_norm": 1.373677820269664, + "learning_rate": 1.593239623814004e-05, + "loss": 0.6157221794128418, + "step": 1891 + }, + { + "epoch": 0.6667841409691629, + "grad_norm": 1.6157271272896285, + "learning_rate": 1.5927704831716177e-05, + "loss": 0.6835625171661377, + "step": 1892 + }, + { + "epoch": 0.667136563876652, + "grad_norm": 1.5002309814069255, + "learning_rate": 1.5923011413021e-05, + "loss": 0.6416822671890259, + "step": 1893 + }, + { + "epoch": 0.6674889867841409, + "grad_norm": 1.4507514621746327, + "learning_rate": 1.5918315983647782e-05, + "loss": 0.7307168245315552, + "step": 1894 + }, + { + "epoch": 0.66784140969163, + "grad_norm": 1.3321086634513644, + "learning_rate": 1.5913618545190468e-05, + "loss": 0.5464824438095093, + "step": 1895 + }, + { + "epoch": 0.6681938325991189, + "grad_norm": 1.544912001907108, + "learning_rate": 1.5908919099243698e-05, + "loss": 0.6634502410888672, + "step": 1896 + }, + { + "epoch": 0.668546255506608, + "grad_norm": 1.2985703589965545, + "learning_rate": 1.5904217647402788e-05, + "loss": 0.719158411026001, + "step": 1897 + }, + { + "epoch": 0.6688986784140969, + "grad_norm": 1.5083721998375157, + "learning_rate": 1.5899514191263733e-05, + "loss": 0.7547527551651001, + "step": 1898 + }, + { + "epoch": 0.6692511013215859, + "grad_norm": 1.6226125781851348, + "learning_rate": 1.5894808732423207e-05, + "loss": 0.7549886703491211, + "step": 1899 + }, + { + "epoch": 0.6696035242290749, + "grad_norm": 1.5327056521201368, + "learning_rate": 1.589010127247857e-05, + "loss": 0.7107831239700317, + "step": 1900 + }, + { + "epoch": 0.6699559471365639, + "grad_norm": 1.5679371113552734, + "learning_rate": 1.588539181302786e-05, + "loss": 0.855078935623169, + "step": 1901 + }, + { + "epoch": 0.6703083700440529, + "grad_norm": 1.4970896726818788, + "learning_rate": 1.5880680355669792e-05, + "loss": 0.8235266208648682, + "step": 1902 + }, + { + "epoch": 0.6706607929515418, + "grad_norm": 1.339674008175079, + "learning_rate": 1.587596690200375e-05, + "loss": 0.6060166358947754, + "step": 1903 + }, + { + "epoch": 0.6710132158590308, + "grad_norm": 1.4603163291197105, + "learning_rate": 1.5871251453629817e-05, + "loss": 0.7325272560119629, + "step": 1904 + }, + { + "epoch": 0.6713656387665198, + "grad_norm": 1.5470128203990354, + "learning_rate": 1.586653401214873e-05, + "loss": 0.674901008605957, + "step": 1905 + }, + { + "epoch": 0.6717180616740088, + "grad_norm": 1.3515017914848853, + "learning_rate": 1.5861814579161928e-05, + "loss": 0.767164945602417, + "step": 1906 + }, + { + "epoch": 0.6720704845814978, + "grad_norm": 1.3633425183694836, + "learning_rate": 1.5857093156271496e-05, + "loss": 0.5691556930541992, + "step": 1907 + }, + { + "epoch": 0.6724229074889868, + "grad_norm": 1.3106038540183678, + "learning_rate": 1.585236974508022e-05, + "loss": 0.6885931491851807, + "step": 1908 + }, + { + "epoch": 0.6727753303964757, + "grad_norm": 1.143239709830434, + "learning_rate": 1.5847644347191545e-05, + "loss": 0.6227391958236694, + "step": 1909 + }, + { + "epoch": 0.6731277533039648, + "grad_norm": 1.4883434470080177, + "learning_rate": 1.5842916964209602e-05, + "loss": 0.6084527969360352, + "step": 1910 + }, + { + "epoch": 0.6734801762114537, + "grad_norm": 1.7178691294348742, + "learning_rate": 1.583818759773919e-05, + "loss": 0.7001935243606567, + "step": 1911 + }, + { + "epoch": 0.6738325991189428, + "grad_norm": 1.684468384573203, + "learning_rate": 1.5833456249385774e-05, + "loss": 0.8263465166091919, + "step": 1912 + }, + { + "epoch": 0.6741850220264317, + "grad_norm": 1.6085564780466834, + "learning_rate": 1.582872292075551e-05, + "loss": 0.662792444229126, + "step": 1913 + }, + { + "epoch": 0.6745374449339208, + "grad_norm": 1.7464203558320361, + "learning_rate": 1.582398761345521e-05, + "loss": 0.7093051075935364, + "step": 1914 + }, + { + "epoch": 0.6748898678414097, + "grad_norm": 1.4885122105608484, + "learning_rate": 1.5819250329092364e-05, + "loss": 0.7264106273651123, + "step": 1915 + }, + { + "epoch": 0.6752422907488986, + "grad_norm": 1.5383309179609377, + "learning_rate": 1.581451106927513e-05, + "loss": 0.6561543345451355, + "step": 1916 + }, + { + "epoch": 0.6755947136563877, + "grad_norm": 1.634971670239321, + "learning_rate": 1.580976983561235e-05, + "loss": 0.6563262939453125, + "step": 1917 + }, + { + "epoch": 0.6759471365638766, + "grad_norm": 1.2931579342976025, + "learning_rate": 1.5805026629713512e-05, + "loss": 0.5224509239196777, + "step": 1918 + }, + { + "epoch": 0.6762995594713657, + "grad_norm": 1.4840746720603137, + "learning_rate": 1.5800281453188793e-05, + "loss": 0.6565898656845093, + "step": 1919 + }, + { + "epoch": 0.6766519823788546, + "grad_norm": 1.4375600407888718, + "learning_rate": 1.5795534307649032e-05, + "loss": 0.7954028844833374, + "step": 1920 + }, + { + "epoch": 0.6770044052863436, + "grad_norm": 1.3454762773409146, + "learning_rate": 1.579078519470574e-05, + "loss": 0.6624404788017273, + "step": 1921 + }, + { + "epoch": 0.6773568281938326, + "grad_norm": 1.5514355338443828, + "learning_rate": 1.5786034115971083e-05, + "loss": 0.840311586856842, + "step": 1922 + }, + { + "epoch": 0.6777092511013216, + "grad_norm": 1.5163172495660509, + "learning_rate": 1.578128107305792e-05, + "loss": 0.6967859864234924, + "step": 1923 + }, + { + "epoch": 0.6780616740088106, + "grad_norm": 1.0735596232953704, + "learning_rate": 1.5776526067579746e-05, + "loss": 0.5295379161834717, + "step": 1924 + }, + { + "epoch": 0.6784140969162996, + "grad_norm": 1.8118747234451476, + "learning_rate": 1.5771769101150752e-05, + "loss": 0.6758475303649902, + "step": 1925 + }, + { + "epoch": 0.6787665198237885, + "grad_norm": 1.3510918406813899, + "learning_rate": 1.576701017538577e-05, + "loss": 0.6891785860061646, + "step": 1926 + }, + { + "epoch": 0.6791189427312775, + "grad_norm": 1.4115910497948105, + "learning_rate": 1.5762249291900304e-05, + "loss": 0.6507086157798767, + "step": 1927 + }, + { + "epoch": 0.6794713656387665, + "grad_norm": 1.4168935733459347, + "learning_rate": 1.5757486452310537e-05, + "loss": 0.6220029592514038, + "step": 1928 + }, + { + "epoch": 0.6798237885462555, + "grad_norm": 1.5134078284046213, + "learning_rate": 1.5752721658233294e-05, + "loss": 0.7742874622344971, + "step": 1929 + }, + { + "epoch": 0.6801762114537445, + "grad_norm": 1.513809055671425, + "learning_rate": 1.5747954911286085e-05, + "loss": 0.6895851492881775, + "step": 1930 + }, + { + "epoch": 0.6805286343612335, + "grad_norm": 1.6367265924041048, + "learning_rate": 1.5743186213087062e-05, + "loss": 0.71466064453125, + "step": 1931 + }, + { + "epoch": 0.6808810572687225, + "grad_norm": 1.506916023064254, + "learning_rate": 1.5738415565255056e-05, + "loss": 0.6465627551078796, + "step": 1932 + }, + { + "epoch": 0.6812334801762114, + "grad_norm": 1.3796886447957644, + "learning_rate": 1.5733642969409553e-05, + "loss": 0.7592962980270386, + "step": 1933 + }, + { + "epoch": 0.6815859030837005, + "grad_norm": 1.662185742102518, + "learning_rate": 1.57288684271707e-05, + "loss": 0.7641816735267639, + "step": 1934 + }, + { + "epoch": 0.6819383259911894, + "grad_norm": 1.5600426648231815, + "learning_rate": 1.5724091940159306e-05, + "loss": 0.7015130519866943, + "step": 1935 + }, + { + "epoch": 0.6822907488986785, + "grad_norm": 1.5031237824980206, + "learning_rate": 1.5719313509996833e-05, + "loss": 0.7851461172103882, + "step": 1936 + }, + { + "epoch": 0.6826431718061674, + "grad_norm": 1.5670991097913773, + "learning_rate": 1.571453313830542e-05, + "loss": 0.7924813628196716, + "step": 1937 + }, + { + "epoch": 0.6829955947136563, + "grad_norm": 1.3030215719290177, + "learning_rate": 1.570975082670785e-05, + "loss": 0.6082741022109985, + "step": 1938 + }, + { + "epoch": 0.6833480176211454, + "grad_norm": 1.5878638287998994, + "learning_rate": 1.5704966576827563e-05, + "loss": 0.7307756543159485, + "step": 1939 + }, + { + "epoch": 0.6837004405286343, + "grad_norm": 1.421111197077357, + "learning_rate": 1.570018039028867e-05, + "loss": 0.6877273917198181, + "step": 1940 + }, + { + "epoch": 0.6840528634361234, + "grad_norm": 3.222041323215856, + "learning_rate": 1.5695392268715934e-05, + "loss": 0.7702943086624146, + "step": 1941 + }, + { + "epoch": 0.6844052863436123, + "grad_norm": 1.3182333231384877, + "learning_rate": 1.569060221373477e-05, + "loss": 0.6576820611953735, + "step": 1942 + }, + { + "epoch": 0.6847577092511014, + "grad_norm": 1.6178003008675335, + "learning_rate": 1.568581022697125e-05, + "loss": 0.6605322360992432, + "step": 1943 + }, + { + "epoch": 0.6851101321585903, + "grad_norm": 1.5479637201173908, + "learning_rate": 1.568101631005211e-05, + "loss": 0.8065364360809326, + "step": 1944 + }, + { + "epoch": 0.6854625550660793, + "grad_norm": 1.5909483515555374, + "learning_rate": 1.5676220464604726e-05, + "loss": 0.8018748164176941, + "step": 1945 + }, + { + "epoch": 0.6858149779735683, + "grad_norm": 1.4496461628107289, + "learning_rate": 1.567142269225715e-05, + "loss": 0.6114683151245117, + "step": 1946 + }, + { + "epoch": 0.6861674008810573, + "grad_norm": 1.4567709922330223, + "learning_rate": 1.566662299463807e-05, + "loss": 0.8470789194107056, + "step": 1947 + }, + { + "epoch": 0.6865198237885463, + "grad_norm": 1.4716494157627575, + "learning_rate": 1.5661821373376837e-05, + "loss": 0.7133561372756958, + "step": 1948 + }, + { + "epoch": 0.6868722466960352, + "grad_norm": 1.6398709503866558, + "learning_rate": 1.5657017830103448e-05, + "loss": 0.9101625084877014, + "step": 1949 + }, + { + "epoch": 0.6872246696035242, + "grad_norm": 1.8312595153810016, + "learning_rate": 1.565221236644856e-05, + "loss": 0.7395101189613342, + "step": 1950 + }, + { + "epoch": 0.6875770925110132, + "grad_norm": 1.4532682115054107, + "learning_rate": 1.5647404984043474e-05, + "loss": 0.7421061992645264, + "step": 1951 + }, + { + "epoch": 0.6879295154185022, + "grad_norm": 1.4495130982943423, + "learning_rate": 1.5642595684520154e-05, + "loss": 0.8744432330131531, + "step": 1952 + }, + { + "epoch": 0.6882819383259912, + "grad_norm": 1.6475850419823541, + "learning_rate": 1.56377844695112e-05, + "loss": 0.8043868541717529, + "step": 1953 + }, + { + "epoch": 0.6886343612334802, + "grad_norm": 1.444538108927131, + "learning_rate": 1.5632971340649873e-05, + "loss": 0.6231396198272705, + "step": 1954 + }, + { + "epoch": 0.6889867841409691, + "grad_norm": 1.3765988847280666, + "learning_rate": 1.562815629957008e-05, + "loss": 0.7791434526443481, + "step": 1955 + }, + { + "epoch": 0.6893392070484582, + "grad_norm": 1.2135950275511538, + "learning_rate": 1.5623339347906383e-05, + "loss": 0.5652475357055664, + "step": 1956 + }, + { + "epoch": 0.6896916299559471, + "grad_norm": 1.4607959644694648, + "learning_rate": 1.561852048729398e-05, + "loss": 0.611067533493042, + "step": 1957 + }, + { + "epoch": 0.6900440528634362, + "grad_norm": 1.2569255893474116, + "learning_rate": 1.5613699719368724e-05, + "loss": 0.7580389976501465, + "step": 1958 + }, + { + "epoch": 0.6903964757709251, + "grad_norm": 1.516048041026883, + "learning_rate": 1.560887704576712e-05, + "loss": 0.6841205954551697, + "step": 1959 + }, + { + "epoch": 0.690748898678414, + "grad_norm": 1.7678860610521125, + "learning_rate": 1.5604052468126315e-05, + "loss": 0.7600575089454651, + "step": 1960 + }, + { + "epoch": 0.6911013215859031, + "grad_norm": 1.458096987341084, + "learning_rate": 1.55992259880841e-05, + "loss": 0.7547114491462708, + "step": 1961 + }, + { + "epoch": 0.691453744493392, + "grad_norm": 1.3490975617996133, + "learning_rate": 1.5594397607278912e-05, + "loss": 0.6917474865913391, + "step": 1962 + }, + { + "epoch": 0.6918061674008811, + "grad_norm": 1.378212312699651, + "learning_rate": 1.5589567327349845e-05, + "loss": 0.6820487976074219, + "step": 1963 + }, + { + "epoch": 0.69215859030837, + "grad_norm": 1.4687305992297937, + "learning_rate": 1.5584735149936628e-05, + "loss": 0.6513597965240479, + "step": 1964 + }, + { + "epoch": 0.6925110132158591, + "grad_norm": 1.4807223837447299, + "learning_rate": 1.5579901076679625e-05, + "loss": 0.668257474899292, + "step": 1965 + }, + { + "epoch": 0.692863436123348, + "grad_norm": 1.5130451892313703, + "learning_rate": 1.5575065109219864e-05, + "loss": 0.7600705623626709, + "step": 1966 + }, + { + "epoch": 0.693215859030837, + "grad_norm": 1.5218611988458295, + "learning_rate": 1.5570227249198993e-05, + "loss": 0.8140011429786682, + "step": 1967 + }, + { + "epoch": 0.693568281938326, + "grad_norm": 1.1438716908088957, + "learning_rate": 1.556538749825933e-05, + "loss": 0.610436201095581, + "step": 1968 + }, + { + "epoch": 0.693920704845815, + "grad_norm": 1.7706616264872619, + "learning_rate": 1.556054585804381e-05, + "loss": 0.7745693922042847, + "step": 1969 + }, + { + "epoch": 0.694273127753304, + "grad_norm": 1.4076568647110412, + "learning_rate": 1.5555702330196024e-05, + "loss": 0.5809592008590698, + "step": 1970 + }, + { + "epoch": 0.6946255506607929, + "grad_norm": 1.220751429593537, + "learning_rate": 1.5550856916360195e-05, + "loss": 0.6354515552520752, + "step": 1971 + }, + { + "epoch": 0.694977973568282, + "grad_norm": 1.4513364815061058, + "learning_rate": 1.5546009618181194e-05, + "loss": 0.8076149225234985, + "step": 1972 + }, + { + "epoch": 0.6953303964757709, + "grad_norm": 1.6702158357132753, + "learning_rate": 1.5541160437304524e-05, + "loss": 0.7553249597549438, + "step": 1973 + }, + { + "epoch": 0.6956828193832599, + "grad_norm": 1.4495619596653457, + "learning_rate": 1.5536309375376332e-05, + "loss": 0.6109169125556946, + "step": 1974 + }, + { + "epoch": 0.6960352422907489, + "grad_norm": 1.4052818449921982, + "learning_rate": 1.5531456434043404e-05, + "loss": 0.8184436559677124, + "step": 1975 + }, + { + "epoch": 0.6963876651982379, + "grad_norm": 1.3611746850672197, + "learning_rate": 1.5526601614953164e-05, + "loss": 0.6823909878730774, + "step": 1976 + }, + { + "epoch": 0.6967400881057269, + "grad_norm": 1.3254402340100906, + "learning_rate": 1.5521744919753668e-05, + "loss": 0.6669045090675354, + "step": 1977 + }, + { + "epoch": 0.6970925110132159, + "grad_norm": 1.7752129025350782, + "learning_rate": 1.5516886350093617e-05, + "loss": 0.8054187297821045, + "step": 1978 + }, + { + "epoch": 0.6974449339207048, + "grad_norm": 1.6379915816078137, + "learning_rate": 1.551202590762234e-05, + "loss": 0.7089184522628784, + "step": 1979 + }, + { + "epoch": 0.6977973568281939, + "grad_norm": 1.5207382048575195, + "learning_rate": 1.5507163593989804e-05, + "loss": 0.7908214330673218, + "step": 1980 + }, + { + "epoch": 0.6981497797356828, + "grad_norm": 1.454323961299799, + "learning_rate": 1.5502299410846626e-05, + "loss": 0.8859039545059204, + "step": 1981 + }, + { + "epoch": 0.6985022026431718, + "grad_norm": 1.5085321450966587, + "learning_rate": 1.549743335984403e-05, + "loss": 0.7156866788864136, + "step": 1982 + }, + { + "epoch": 0.6988546255506608, + "grad_norm": 1.4496904801370623, + "learning_rate": 1.5492565442633894e-05, + "loss": 0.6158934831619263, + "step": 1983 + }, + { + "epoch": 0.6992070484581497, + "grad_norm": 1.5453977055484032, + "learning_rate": 1.548769566086873e-05, + "loss": 0.6689192056655884, + "step": 1984 + }, + { + "epoch": 0.6995594713656388, + "grad_norm": 1.4591630403591411, + "learning_rate": 1.548282401620167e-05, + "loss": 0.6695841550827026, + "step": 1985 + }, + { + "epoch": 0.6999118942731277, + "grad_norm": 1.6161480882103554, + "learning_rate": 1.5477950510286488e-05, + "loss": 0.7196098566055298, + "step": 1986 + }, + { + "epoch": 0.7002643171806168, + "grad_norm": 1.5261033448052712, + "learning_rate": 1.5473075144777586e-05, + "loss": 0.7811123132705688, + "step": 1987 + }, + { + "epoch": 0.7006167400881057, + "grad_norm": 1.3902237132074229, + "learning_rate": 1.5468197921330006e-05, + "loss": 0.6341326236724854, + "step": 1988 + }, + { + "epoch": 0.7009691629955948, + "grad_norm": 1.4052051959904983, + "learning_rate": 1.5463318841599408e-05, + "loss": 0.6344352960586548, + "step": 1989 + }, + { + "epoch": 0.7013215859030837, + "grad_norm": 1.5015659436227353, + "learning_rate": 1.5458437907242084e-05, + "loss": 0.6708072423934937, + "step": 1990 + }, + { + "epoch": 0.7016740088105727, + "grad_norm": 1.4551372124338164, + "learning_rate": 1.5453555119914963e-05, + "loss": 0.7018578052520752, + "step": 1991 + }, + { + "epoch": 0.7020264317180617, + "grad_norm": 1.4651591378979865, + "learning_rate": 1.5448670481275604e-05, + "loss": 0.6966190338134766, + "step": 1992 + }, + { + "epoch": 0.7023788546255506, + "grad_norm": 1.2815956936347872, + "learning_rate": 1.5443783992982182e-05, + "loss": 0.6280171871185303, + "step": 1993 + }, + { + "epoch": 0.7027312775330397, + "grad_norm": 1.451492070117077, + "learning_rate": 1.5438895656693512e-05, + "loss": 0.6644559502601624, + "step": 1994 + }, + { + "epoch": 0.7030837004405286, + "grad_norm": 1.5030450433681415, + "learning_rate": 1.543400547406903e-05, + "loss": 0.776411771774292, + "step": 1995 + }, + { + "epoch": 0.7034361233480176, + "grad_norm": 1.428531901666428, + "learning_rate": 1.5429113446768805e-05, + "loss": 0.6353679895401001, + "step": 1996 + }, + { + "epoch": 0.7037885462555066, + "grad_norm": 1.468487936335314, + "learning_rate": 1.5424219576453526e-05, + "loss": 0.686774492263794, + "step": 1997 + }, + { + "epoch": 0.7041409691629956, + "grad_norm": 1.2525683766202464, + "learning_rate": 1.5419323864784508e-05, + "loss": 0.5296701192855835, + "step": 1998 + }, + { + "epoch": 0.7044933920704846, + "grad_norm": 1.264413948230812, + "learning_rate": 1.5414426313423692e-05, + "loss": 0.6246802806854248, + "step": 1999 + }, + { + "epoch": 0.7048458149779736, + "grad_norm": 1.44172793688486, + "learning_rate": 1.5409526924033646e-05, + "loss": 0.6633912920951843, + "step": 2000 + }, + { + "epoch": 0.7051982378854625, + "grad_norm": 1.720413855985522, + "learning_rate": 1.540462569827756e-05, + "loss": 0.7324577569961548, + "step": 2001 + }, + { + "epoch": 0.7055506607929516, + "grad_norm": 1.6372387419200998, + "learning_rate": 1.539972263781925e-05, + "loss": 0.7988085746765137, + "step": 2002 + }, + { + "epoch": 0.7059030837004405, + "grad_norm": 1.4528481393218415, + "learning_rate": 1.539481774432315e-05, + "loss": 0.6761256456375122, + "step": 2003 + }, + { + "epoch": 0.7062555066079295, + "grad_norm": 1.6101005409981786, + "learning_rate": 1.538991101945431e-05, + "loss": 0.6647740006446838, + "step": 2004 + }, + { + "epoch": 0.7066079295154185, + "grad_norm": 1.5047715708456952, + "learning_rate": 1.538500246487843e-05, + "loss": 0.7111536860466003, + "step": 2005 + }, + { + "epoch": 0.7069603524229074, + "grad_norm": 1.8533704165409681, + "learning_rate": 1.5380092082261797e-05, + "loss": 0.7395933270454407, + "step": 2006 + }, + { + "epoch": 0.7073127753303965, + "grad_norm": 1.4630720873509298, + "learning_rate": 1.5375179873271335e-05, + "loss": 0.6158996820449829, + "step": 2007 + }, + { + "epoch": 0.7076651982378854, + "grad_norm": 1.4746770670226905, + "learning_rate": 1.537026583957459e-05, + "loss": 0.7259848117828369, + "step": 2008 + }, + { + "epoch": 0.7080176211453745, + "grad_norm": 1.6674311554666914, + "learning_rate": 1.5365349982839723e-05, + "loss": 0.8370928764343262, + "step": 2009 + }, + { + "epoch": 0.7083700440528634, + "grad_norm": 1.3618230849109776, + "learning_rate": 1.536043230473551e-05, + "loss": 0.6041784882545471, + "step": 2010 + }, + { + "epoch": 0.7087224669603525, + "grad_norm": 1.4112680073946362, + "learning_rate": 1.535551280693135e-05, + "loss": 0.688548743724823, + "step": 2011 + }, + { + "epoch": 0.7090748898678414, + "grad_norm": 1.6056330275270763, + "learning_rate": 1.5350591491097265e-05, + "loss": 0.573681652545929, + "step": 2012 + }, + { + "epoch": 0.7094273127753304, + "grad_norm": 2.0956667904129636, + "learning_rate": 1.5345668358903886e-05, + "loss": 0.6919670104980469, + "step": 2013 + }, + { + "epoch": 0.7097797356828194, + "grad_norm": 1.6440284625605202, + "learning_rate": 1.534074341202246e-05, + "loss": 0.6693999767303467, + "step": 2014 + }, + { + "epoch": 0.7101321585903083, + "grad_norm": 1.5023686452775393, + "learning_rate": 1.533581665212486e-05, + "loss": 0.7204093337059021, + "step": 2015 + }, + { + "epoch": 0.7104845814977974, + "grad_norm": 1.7353596990699613, + "learning_rate": 1.5330888080883555e-05, + "loss": 0.6196314096450806, + "step": 2016 + }, + { + "epoch": 0.7108370044052863, + "grad_norm": 1.4190743094269347, + "learning_rate": 1.5325957699971657e-05, + "loss": 0.7292872071266174, + "step": 2017 + }, + { + "epoch": 0.7111894273127753, + "grad_norm": 1.7578012075664924, + "learning_rate": 1.532102551106287e-05, + "loss": 0.7514410018920898, + "step": 2018 + }, + { + "epoch": 0.7115418502202643, + "grad_norm": 1.329552917806312, + "learning_rate": 1.531609151583152e-05, + "loss": 0.7683345079421997, + "step": 2019 + }, + { + "epoch": 0.7118942731277533, + "grad_norm": 1.8323846391695044, + "learning_rate": 1.5311155715952536e-05, + "loss": 0.6994156837463379, + "step": 2020 + }, + { + "epoch": 0.7122466960352423, + "grad_norm": 1.3407977210543047, + "learning_rate": 1.5306218113101482e-05, + "loss": 0.5530328750610352, + "step": 2021 + }, + { + "epoch": 0.7125991189427313, + "grad_norm": 1.6814720781682417, + "learning_rate": 1.530127870895451e-05, + "loss": 0.6126301884651184, + "step": 2022 + }, + { + "epoch": 0.7129515418502202, + "grad_norm": 1.9618212705640916, + "learning_rate": 1.5296337505188403e-05, + "loss": 0.7514982223510742, + "step": 2023 + }, + { + "epoch": 0.7133039647577093, + "grad_norm": 1.742411408925072, + "learning_rate": 1.529139450348054e-05, + "loss": 0.7087191939353943, + "step": 2024 + }, + { + "epoch": 0.7136563876651982, + "grad_norm": 1.3195305972662899, + "learning_rate": 1.5286449705508914e-05, + "loss": 0.5713562965393066, + "step": 2025 + }, + { + "epoch": 0.7140088105726872, + "grad_norm": 1.3621779724967453, + "learning_rate": 1.5281503112952136e-05, + "loss": 0.6796679496765137, + "step": 2026 + }, + { + "epoch": 0.7143612334801762, + "grad_norm": 1.8247081007192694, + "learning_rate": 1.5276554727489415e-05, + "loss": 0.7902421355247498, + "step": 2027 + }, + { + "epoch": 0.7147136563876652, + "grad_norm": 1.3608050254188053, + "learning_rate": 1.527160455080058e-05, + "loss": 0.6645491123199463, + "step": 2028 + }, + { + "epoch": 0.7150660792951542, + "grad_norm": 1.489658346292968, + "learning_rate": 1.5266652584566056e-05, + "loss": 0.6077255606651306, + "step": 2029 + }, + { + "epoch": 0.7154185022026431, + "grad_norm": 1.412193602346091, + "learning_rate": 1.5261698830466888e-05, + "loss": 0.6219078302383423, + "step": 2030 + }, + { + "epoch": 0.7157709251101322, + "grad_norm": 1.280704281307457, + "learning_rate": 1.5256743290184713e-05, + "loss": 0.5895035266876221, + "step": 2031 + }, + { + "epoch": 0.7161233480176211, + "grad_norm": 1.497416305314063, + "learning_rate": 1.5251785965401786e-05, + "loss": 0.6735520958900452, + "step": 2032 + }, + { + "epoch": 0.7164757709251102, + "grad_norm": 1.353147232010895, + "learning_rate": 1.524682685780097e-05, + "loss": 0.6212488412857056, + "step": 2033 + }, + { + "epoch": 0.7168281938325991, + "grad_norm": 1.5786628078958613, + "learning_rate": 1.524186596906572e-05, + "loss": 0.7181172966957092, + "step": 2034 + }, + { + "epoch": 0.7171806167400882, + "grad_norm": 3.1301800941750906, + "learning_rate": 1.5236903300880107e-05, + "loss": 0.7156587839126587, + "step": 2035 + }, + { + "epoch": 0.7175330396475771, + "grad_norm": 1.513371130481219, + "learning_rate": 1.52319388549288e-05, + "loss": 0.6989034414291382, + "step": 2036 + }, + { + "epoch": 0.7178854625550661, + "grad_norm": 1.5183441818080943, + "learning_rate": 1.5226972632897079e-05, + "loss": 0.7224982976913452, + "step": 2037 + }, + { + "epoch": 0.7182378854625551, + "grad_norm": 1.5033480023563544, + "learning_rate": 1.522200463647082e-05, + "loss": 0.6871547698974609, + "step": 2038 + }, + { + "epoch": 0.718590308370044, + "grad_norm": 1.5898527901911406, + "learning_rate": 1.5217034867336498e-05, + "loss": 0.725049614906311, + "step": 2039 + }, + { + "epoch": 0.718942731277533, + "grad_norm": 2.079980258079047, + "learning_rate": 1.5212063327181197e-05, + "loss": 0.7105863094329834, + "step": 2040 + }, + { + "epoch": 0.719295154185022, + "grad_norm": 1.4720898042575539, + "learning_rate": 1.5207090017692605e-05, + "loss": 0.5823827981948853, + "step": 2041 + }, + { + "epoch": 0.719647577092511, + "grad_norm": 1.9166232714289464, + "learning_rate": 1.5202114940559005e-05, + "loss": 0.7087944746017456, + "step": 2042 + }, + { + "epoch": 0.72, + "grad_norm": 1.40676061171607, + "learning_rate": 1.5197138097469275e-05, + "loss": 0.6678824424743652, + "step": 2043 + }, + { + "epoch": 0.720352422907489, + "grad_norm": 1.8181396920642288, + "learning_rate": 1.5192159490112904e-05, + "loss": 0.7318846583366394, + "step": 2044 + }, + { + "epoch": 0.720704845814978, + "grad_norm": 1.4972370605408583, + "learning_rate": 1.5187179120179969e-05, + "loss": 0.7245825529098511, + "step": 2045 + }, + { + "epoch": 0.721057268722467, + "grad_norm": 1.8554569851295908, + "learning_rate": 1.5182196989361155e-05, + "loss": 0.7691583633422852, + "step": 2046 + }, + { + "epoch": 0.7214096916299559, + "grad_norm": 1.8926959198228865, + "learning_rate": 1.517721309934774e-05, + "loss": 0.7961187362670898, + "step": 2047 + }, + { + "epoch": 0.721762114537445, + "grad_norm": 1.4465824812635413, + "learning_rate": 1.51722274518316e-05, + "loss": 0.7163759469985962, + "step": 2048 + }, + { + "epoch": 0.7221145374449339, + "grad_norm": 1.5931659235074929, + "learning_rate": 1.51672400485052e-05, + "loss": 0.6807754039764404, + "step": 2049 + }, + { + "epoch": 0.7224669603524229, + "grad_norm": 1.6629043788678177, + "learning_rate": 1.516225089106162e-05, + "loss": 0.7026433348655701, + "step": 2050 + }, + { + "epoch": 0.7228193832599119, + "grad_norm": 1.5979782761024863, + "learning_rate": 1.5157259981194514e-05, + "loss": 0.8230476379394531, + "step": 2051 + }, + { + "epoch": 0.7231718061674008, + "grad_norm": 1.7451468269512191, + "learning_rate": 1.5152267320598149e-05, + "loss": 0.6466805934906006, + "step": 2052 + }, + { + "epoch": 0.7235242290748899, + "grad_norm": 1.441654513994546, + "learning_rate": 1.5147272910967368e-05, + "loss": 0.7203368544578552, + "step": 2053 + }, + { + "epoch": 0.7238766519823788, + "grad_norm": 1.3552926542352444, + "learning_rate": 1.5142276753997627e-05, + "loss": 0.6455702781677246, + "step": 2054 + }, + { + "epoch": 0.7242290748898679, + "grad_norm": 1.4569594560235375, + "learning_rate": 1.5137278851384958e-05, + "loss": 0.609260082244873, + "step": 2055 + }, + { + "epoch": 0.7245814977973568, + "grad_norm": 1.8083723333355965, + "learning_rate": 1.5132279204826e-05, + "loss": 0.8320673704147339, + "step": 2056 + }, + { + "epoch": 0.7249339207048459, + "grad_norm": 1.5846751172626037, + "learning_rate": 1.512727781601797e-05, + "loss": 0.8497718572616577, + "step": 2057 + }, + { + "epoch": 0.7252863436123348, + "grad_norm": 1.3523103900088498, + "learning_rate": 1.5122274686658695e-05, + "loss": 0.6398370265960693, + "step": 2058 + }, + { + "epoch": 0.7256387665198238, + "grad_norm": 1.4475161405549521, + "learning_rate": 1.511726981844657e-05, + "loss": 0.7562476396560669, + "step": 2059 + }, + { + "epoch": 0.7259911894273128, + "grad_norm": 1.8369611551341436, + "learning_rate": 1.51122632130806e-05, + "loss": 0.7948570251464844, + "step": 2060 + }, + { + "epoch": 0.7263436123348017, + "grad_norm": 1.9057892039367437, + "learning_rate": 1.5107254872260366e-05, + "loss": 0.7062652111053467, + "step": 2061 + }, + { + "epoch": 0.7266960352422908, + "grad_norm": 1.666793884988277, + "learning_rate": 1.5102244797686049e-05, + "loss": 0.6290205717086792, + "step": 2062 + }, + { + "epoch": 0.7270484581497797, + "grad_norm": 1.7111515682842917, + "learning_rate": 1.5097232991058409e-05, + "loss": 0.727097749710083, + "step": 2063 + }, + { + "epoch": 0.7274008810572687, + "grad_norm": 1.6005396217530683, + "learning_rate": 1.5092219454078803e-05, + "loss": 0.783380389213562, + "step": 2064 + }, + { + "epoch": 0.7277533039647577, + "grad_norm": 1.4872748126751951, + "learning_rate": 1.5087204188449165e-05, + "loss": 0.6190629601478577, + "step": 2065 + }, + { + "epoch": 0.7281057268722467, + "grad_norm": 1.5426042958975894, + "learning_rate": 1.5082187195872026e-05, + "loss": 0.6749798059463501, + "step": 2066 + }, + { + "epoch": 0.7284581497797357, + "grad_norm": 1.524694880675492, + "learning_rate": 1.5077168478050494e-05, + "loss": 0.6581153273582458, + "step": 2067 + }, + { + "epoch": 0.7288105726872247, + "grad_norm": 1.433767292714838, + "learning_rate": 1.5072148036688279e-05, + "loss": 0.6886252760887146, + "step": 2068 + }, + { + "epoch": 0.7291629955947136, + "grad_norm": 1.651630016781231, + "learning_rate": 1.506712587348965e-05, + "loss": 0.6893814206123352, + "step": 2069 + }, + { + "epoch": 0.7295154185022027, + "grad_norm": 1.7840073958291343, + "learning_rate": 1.5062101990159486e-05, + "loss": 0.8242654800415039, + "step": 2070 + }, + { + "epoch": 0.7298678414096916, + "grad_norm": 1.4785860236042563, + "learning_rate": 1.5057076388403229e-05, + "loss": 0.6331228017807007, + "step": 2071 + }, + { + "epoch": 0.7302202643171806, + "grad_norm": 1.999658994203056, + "learning_rate": 1.5052049069926927e-05, + "loss": 0.6440649032592773, + "step": 2072 + }, + { + "epoch": 0.7305726872246696, + "grad_norm": 1.4709264297577982, + "learning_rate": 1.5047020036437187e-05, + "loss": 0.7575498819351196, + "step": 2073 + }, + { + "epoch": 0.7309251101321586, + "grad_norm": 1.8032604054381702, + "learning_rate": 1.5041989289641215e-05, + "loss": 0.7530438899993896, + "step": 2074 + }, + { + "epoch": 0.7312775330396476, + "grad_norm": 1.5344556457224068, + "learning_rate": 1.5036956831246792e-05, + "loss": 0.6035616397857666, + "step": 2075 + }, + { + "epoch": 0.7316299559471365, + "grad_norm": 1.5603807233808964, + "learning_rate": 1.5031922662962279e-05, + "loss": 0.8199492692947388, + "step": 2076 + }, + { + "epoch": 0.7319823788546256, + "grad_norm": 1.4221584765379676, + "learning_rate": 1.5026886786496624e-05, + "loss": 0.7700716257095337, + "step": 2077 + }, + { + "epoch": 0.7323348017621145, + "grad_norm": 1.363028657258907, + "learning_rate": 1.5021849203559347e-05, + "loss": 0.6147816777229309, + "step": 2078 + }, + { + "epoch": 0.7326872246696036, + "grad_norm": 1.5628142146943151, + "learning_rate": 1.5016809915860549e-05, + "loss": 0.6841654777526855, + "step": 2079 + }, + { + "epoch": 0.7330396475770925, + "grad_norm": 1.7910877668379601, + "learning_rate": 1.5011768925110915e-05, + "loss": 0.7212510108947754, + "step": 2080 + }, + { + "epoch": 0.7333920704845815, + "grad_norm": 1.5222211216380177, + "learning_rate": 1.5006726233021702e-05, + "loss": 0.6695969104766846, + "step": 2081 + }, + { + "epoch": 0.7337444933920705, + "grad_norm": 1.391558192885713, + "learning_rate": 1.500168184130475e-05, + "loss": 0.5991939306259155, + "step": 2082 + }, + { + "epoch": 0.7340969162995594, + "grad_norm": 1.4191544168706896, + "learning_rate": 1.4996635751672467e-05, + "loss": 0.7127671241760254, + "step": 2083 + }, + { + "epoch": 0.7344493392070485, + "grad_norm": 1.6905086418980109, + "learning_rate": 1.4991587965837853e-05, + "loss": 0.6874737739562988, + "step": 2084 + }, + { + "epoch": 0.7348017621145374, + "grad_norm": 1.3584519480933235, + "learning_rate": 1.4986538485514466e-05, + "loss": 0.6695086359977722, + "step": 2085 + }, + { + "epoch": 0.7351541850220265, + "grad_norm": 1.694264564137899, + "learning_rate": 1.4981487312416452e-05, + "loss": 0.8366880416870117, + "step": 2086 + }, + { + "epoch": 0.7355066079295154, + "grad_norm": 1.4589826786561007, + "learning_rate": 1.4976434448258519e-05, + "loss": 0.6448042988777161, + "step": 2087 + }, + { + "epoch": 0.7358590308370044, + "grad_norm": 1.8583566766216881, + "learning_rate": 1.4971379894755969e-05, + "loss": 0.7015181183815002, + "step": 2088 + }, + { + "epoch": 0.7362114537444934, + "grad_norm": 1.702091122213854, + "learning_rate": 1.4966323653624657e-05, + "loss": 0.6842815279960632, + "step": 2089 + }, + { + "epoch": 0.7365638766519824, + "grad_norm": 1.7134163669939546, + "learning_rate": 1.4961265726581025e-05, + "loss": 0.6866877675056458, + "step": 2090 + }, + { + "epoch": 0.7369162995594714, + "grad_norm": 1.537334961209543, + "learning_rate": 1.4956206115342076e-05, + "loss": 0.5486865043640137, + "step": 2091 + }, + { + "epoch": 0.7372687224669604, + "grad_norm": 1.7196744065626985, + "learning_rate": 1.4951144821625396e-05, + "loss": 0.7241986989974976, + "step": 2092 + }, + { + "epoch": 0.7376211453744493, + "grad_norm": 1.647893211532232, + "learning_rate": 1.4946081847149134e-05, + "loss": 0.8400537967681885, + "step": 2093 + }, + { + "epoch": 0.7379735682819383, + "grad_norm": 2.2262132208657146, + "learning_rate": 1.4941017193632013e-05, + "loss": 0.6050147414207458, + "step": 2094 + }, + { + "epoch": 0.7383259911894273, + "grad_norm": 1.337421477916073, + "learning_rate": 1.4935950862793322e-05, + "loss": 0.6744229197502136, + "step": 2095 + }, + { + "epoch": 0.7386784140969163, + "grad_norm": 1.4345512538147223, + "learning_rate": 1.493088285635293e-05, + "loss": 0.6902294158935547, + "step": 2096 + }, + { + "epoch": 0.7390308370044053, + "grad_norm": 1.8712136012401615, + "learning_rate": 1.492581317603126e-05, + "loss": 0.6328809261322021, + "step": 2097 + }, + { + "epoch": 0.7393832599118942, + "grad_norm": 1.4287618993627116, + "learning_rate": 1.4920741823549316e-05, + "loss": 0.5740914344787598, + "step": 2098 + }, + { + "epoch": 0.7397356828193833, + "grad_norm": 2.181624869430245, + "learning_rate": 1.491566880062866e-05, + "loss": 0.676064133644104, + "step": 2099 + }, + { + "epoch": 0.7400881057268722, + "grad_norm": 1.5152586818427025, + "learning_rate": 1.4910594108991427e-05, + "loss": 0.655153751373291, + "step": 2100 + }, + { + "epoch": 0.7404405286343613, + "grad_norm": 1.7534591753196083, + "learning_rate": 1.4905517750360321e-05, + "loss": 0.7406177520751953, + "step": 2101 + }, + { + "epoch": 0.7407929515418502, + "grad_norm": 1.777307095945404, + "learning_rate": 1.4900439726458602e-05, + "loss": 0.6568606495857239, + "step": 2102 + }, + { + "epoch": 0.7411453744493393, + "grad_norm": 1.661203262476052, + "learning_rate": 1.4895360039010101e-05, + "loss": 0.8073545098304749, + "step": 2103 + }, + { + "epoch": 0.7414977973568282, + "grad_norm": 1.6727123321226325, + "learning_rate": 1.4890278689739219e-05, + "loss": 0.6350502967834473, + "step": 2104 + }, + { + "epoch": 0.7418502202643171, + "grad_norm": 1.475293376760879, + "learning_rate": 1.4885195680370915e-05, + "loss": 0.6419750452041626, + "step": 2105 + }, + { + "epoch": 0.7422026431718062, + "grad_norm": 1.5480091112446772, + "learning_rate": 1.4880111012630706e-05, + "loss": 0.72661292552948, + "step": 2106 + }, + { + "epoch": 0.7425550660792951, + "grad_norm": 1.5125479406066336, + "learning_rate": 1.4875024688244683e-05, + "loss": 0.6996778845787048, + "step": 2107 + }, + { + "epoch": 0.7429074889867842, + "grad_norm": 1.7343888178448454, + "learning_rate": 1.4869936708939497e-05, + "loss": 0.8383389711380005, + "step": 2108 + }, + { + "epoch": 0.7432599118942731, + "grad_norm": 1.6950461405964057, + "learning_rate": 1.4864847076442358e-05, + "loss": 0.6863676905632019, + "step": 2109 + }, + { + "epoch": 0.7436123348017621, + "grad_norm": 1.781136801701718, + "learning_rate": 1.4859755792481032e-05, + "loss": 0.8493780493736267, + "step": 2110 + }, + { + "epoch": 0.7439647577092511, + "grad_norm": 1.3754571175527768, + "learning_rate": 1.4854662858783857e-05, + "loss": 0.6172446012496948, + "step": 2111 + }, + { + "epoch": 0.7443171806167401, + "grad_norm": 6.860121931549926, + "learning_rate": 1.4849568277079724e-05, + "loss": 0.8390353918075562, + "step": 2112 + }, + { + "epoch": 0.7446696035242291, + "grad_norm": 1.8563178731324264, + "learning_rate": 1.4844472049098087e-05, + "loss": 0.7108968496322632, + "step": 2113 + }, + { + "epoch": 0.7450220264317181, + "grad_norm": 1.5680406370173388, + "learning_rate": 1.4839374176568956e-05, + "loss": 0.7322912812232971, + "step": 2114 + }, + { + "epoch": 0.745374449339207, + "grad_norm": 1.5999840343791083, + "learning_rate": 1.4834274661222896e-05, + "loss": 0.6371238231658936, + "step": 2115 + }, + { + "epoch": 0.745726872246696, + "grad_norm": 1.6793360349519253, + "learning_rate": 1.4829173504791035e-05, + "loss": 0.8346511125564575, + "step": 2116 + }, + { + "epoch": 0.746079295154185, + "grad_norm": 1.5530745059154032, + "learning_rate": 1.4824070709005063e-05, + "loss": 0.5893645286560059, + "step": 2117 + }, + { + "epoch": 0.746431718061674, + "grad_norm": 1.298803943907695, + "learning_rate": 1.4818966275597213e-05, + "loss": 0.60541832447052, + "step": 2118 + }, + { + "epoch": 0.746784140969163, + "grad_norm": 2.0046684565684108, + "learning_rate": 1.4813860206300286e-05, + "loss": 0.5823955535888672, + "step": 2119 + }, + { + "epoch": 0.747136563876652, + "grad_norm": 1.8094924676670123, + "learning_rate": 1.480875250284763e-05, + "loss": 0.6751007437705994, + "step": 2120 + }, + { + "epoch": 0.747488986784141, + "grad_norm": 1.5760168475146599, + "learning_rate": 1.4803643166973155e-05, + "loss": 0.6878843307495117, + "step": 2121 + }, + { + "epoch": 0.7478414096916299, + "grad_norm": 1.4061876649605263, + "learning_rate": 1.4798532200411319e-05, + "loss": 0.6732173562049866, + "step": 2122 + }, + { + "epoch": 0.748193832599119, + "grad_norm": 1.558565097379613, + "learning_rate": 1.479341960489714e-05, + "loss": 0.6383658647537231, + "step": 2123 + }, + { + "epoch": 0.7485462555066079, + "grad_norm": 1.8120908321553708, + "learning_rate": 1.4788305382166174e-05, + "loss": 0.7444638013839722, + "step": 2124 + }, + { + "epoch": 0.748898678414097, + "grad_norm": 1.7437949253948153, + "learning_rate": 1.4783189533954555e-05, + "loss": 0.5492427349090576, + "step": 2125 + }, + { + "epoch": 0.7492511013215859, + "grad_norm": 1.60343309806789, + "learning_rate": 1.4778072061998944e-05, + "loss": 0.6193333864212036, + "step": 2126 + }, + { + "epoch": 0.7496035242290748, + "grad_norm": 2.019729643045431, + "learning_rate": 1.4772952968036572e-05, + "loss": 0.853213906288147, + "step": 2127 + }, + { + "epoch": 0.7499559471365639, + "grad_norm": 1.4306248677016198, + "learning_rate": 1.4767832253805203e-05, + "loss": 0.6128672361373901, + "step": 2128 + }, + { + "epoch": 0.7503083700440528, + "grad_norm": 1.7550432779472305, + "learning_rate": 1.4762709921043166e-05, + "loss": 0.7298723459243774, + "step": 2129 + }, + { + "epoch": 0.7506607929515419, + "grad_norm": 1.3773404123246435, + "learning_rate": 1.475758597148933e-05, + "loss": 0.6578782796859741, + "step": 2130 + }, + { + "epoch": 0.7510132158590308, + "grad_norm": 1.6603784675007325, + "learning_rate": 1.4752460406883122e-05, + "loss": 0.6490681171417236, + "step": 2131 + }, + { + "epoch": 0.7513656387665198, + "grad_norm": 1.530112138397779, + "learning_rate": 1.4747333228964502e-05, + "loss": 0.657980740070343, + "step": 2132 + }, + { + "epoch": 0.7517180616740088, + "grad_norm": 1.9937499661396574, + "learning_rate": 1.4742204439473999e-05, + "loss": 0.8431578874588013, + "step": 2133 + }, + { + "epoch": 0.7520704845814978, + "grad_norm": 1.7351787739786175, + "learning_rate": 1.4737074040152667e-05, + "loss": 0.7217377424240112, + "step": 2134 + }, + { + "epoch": 0.7524229074889868, + "grad_norm": 2.232953474209366, + "learning_rate": 1.4731942032742127e-05, + "loss": 0.6299912333488464, + "step": 2135 + }, + { + "epoch": 0.7527753303964758, + "grad_norm": 1.6053563211063129, + "learning_rate": 1.4726808418984527e-05, + "loss": 0.6325603723526001, + "step": 2136 + }, + { + "epoch": 0.7531277533039648, + "grad_norm": 1.7427287871247603, + "learning_rate": 1.4721673200622572e-05, + "loss": 0.6785098314285278, + "step": 2137 + }, + { + "epoch": 0.7534801762114537, + "grad_norm": 2.5780020778792068, + "learning_rate": 1.471653637939951e-05, + "loss": 0.7311918139457703, + "step": 2138 + }, + { + "epoch": 0.7538325991189427, + "grad_norm": 1.498799685922224, + "learning_rate": 1.4711397957059132e-05, + "loss": 0.7117096781730652, + "step": 2139 + }, + { + "epoch": 0.7541850220264317, + "grad_norm": 1.4519847744536865, + "learning_rate": 1.4706257935345772e-05, + "loss": 0.6709408760070801, + "step": 2140 + }, + { + "epoch": 0.7545374449339207, + "grad_norm": 1.9629689982019365, + "learning_rate": 1.4701116316004307e-05, + "loss": 0.6478008031845093, + "step": 2141 + }, + { + "epoch": 0.7548898678414097, + "grad_norm": 1.5362345610055923, + "learning_rate": 1.4695973100780154e-05, + "loss": 0.6414140462875366, + "step": 2142 + }, + { + "epoch": 0.7552422907488987, + "grad_norm": 1.7088547501964069, + "learning_rate": 1.4690828291419283e-05, + "loss": 0.6947815418243408, + "step": 2143 + }, + { + "epoch": 0.7555947136563876, + "grad_norm": 1.6244554419934112, + "learning_rate": 1.4685681889668187e-05, + "loss": 0.6614837646484375, + "step": 2144 + }, + { + "epoch": 0.7559471365638767, + "grad_norm": 1.87010430937903, + "learning_rate": 1.4680533897273913e-05, + "loss": 0.7803678512573242, + "step": 2145 + }, + { + "epoch": 0.7562995594713656, + "grad_norm": 1.975192105020327, + "learning_rate": 1.4675384315984045e-05, + "loss": 0.8411567211151123, + "step": 2146 + }, + { + "epoch": 0.7566519823788547, + "grad_norm": 2.4329758477488177, + "learning_rate": 1.4670233147546708e-05, + "loss": 0.8379243016242981, + "step": 2147 + }, + { + "epoch": 0.7570044052863436, + "grad_norm": 1.6153137773652926, + "learning_rate": 1.4665080393710558e-05, + "loss": 0.6419194936752319, + "step": 2148 + }, + { + "epoch": 0.7573568281938327, + "grad_norm": 1.8383077301350303, + "learning_rate": 1.4659926056224798e-05, + "loss": 0.7791979908943176, + "step": 2149 + }, + { + "epoch": 0.7577092511013216, + "grad_norm": 1.72203201226436, + "learning_rate": 1.465477013683916e-05, + "loss": 0.7237389087677002, + "step": 2150 + }, + { + "epoch": 0.7580616740088105, + "grad_norm": 1.5129431088418641, + "learning_rate": 1.464961263730393e-05, + "loss": 0.6750755906105042, + "step": 2151 + }, + { + "epoch": 0.7584140969162996, + "grad_norm": 1.3799525283393634, + "learning_rate": 1.4644453559369904e-05, + "loss": 0.5412150621414185, + "step": 2152 + }, + { + "epoch": 0.7587665198237885, + "grad_norm": 1.7752121571388841, + "learning_rate": 1.463929290478844e-05, + "loss": 0.7009850740432739, + "step": 2153 + }, + { + "epoch": 0.7591189427312776, + "grad_norm": 1.5166585489574307, + "learning_rate": 1.4634130675311411e-05, + "loss": 0.8678998351097107, + "step": 2154 + }, + { + "epoch": 0.7594713656387665, + "grad_norm": 2.0127463717616347, + "learning_rate": 1.4628966872691241e-05, + "loss": 0.7395705580711365, + "step": 2155 + }, + { + "epoch": 0.7598237885462555, + "grad_norm": 1.5739842401493016, + "learning_rate": 1.4623801498680875e-05, + "loss": 0.5950812101364136, + "step": 2156 + }, + { + "epoch": 0.7601762114537445, + "grad_norm": 1.6474041176538503, + "learning_rate": 1.46186345550338e-05, + "loss": 0.7133630514144897, + "step": 2157 + }, + { + "epoch": 0.7605286343612335, + "grad_norm": 1.4644647660974064, + "learning_rate": 1.4613466043504026e-05, + "loss": 0.7551965117454529, + "step": 2158 + }, + { + "epoch": 0.7608810572687225, + "grad_norm": 1.4284086636489846, + "learning_rate": 1.4608295965846111e-05, + "loss": 0.6654022932052612, + "step": 2159 + }, + { + "epoch": 0.7612334801762115, + "grad_norm": 3.5518990487711126, + "learning_rate": 1.460312432381513e-05, + "loss": 0.8081967830657959, + "step": 2160 + }, + { + "epoch": 0.7615859030837004, + "grad_norm": 1.8113760087057564, + "learning_rate": 1.4597951119166696e-05, + "loss": 0.7478348016738892, + "step": 2161 + }, + { + "epoch": 0.7619383259911894, + "grad_norm": 2.9384500423152833, + "learning_rate": 1.4592776353656948e-05, + "loss": 0.7866748571395874, + "step": 2162 + }, + { + "epoch": 0.7622907488986784, + "grad_norm": 1.4185631764668494, + "learning_rate": 1.4587600029042563e-05, + "loss": 0.6675869226455688, + "step": 2163 + }, + { + "epoch": 0.7626431718061674, + "grad_norm": 1.934904377243222, + "learning_rate": 1.4582422147080739e-05, + "loss": 0.6881103515625, + "step": 2164 + }, + { + "epoch": 0.7629955947136564, + "grad_norm": 1.6886719056667128, + "learning_rate": 1.457724270952921e-05, + "loss": 0.7298593521118164, + "step": 2165 + }, + { + "epoch": 0.7633480176211453, + "grad_norm": 1.5123877451607526, + "learning_rate": 1.4572061718146224e-05, + "loss": 0.7102776765823364, + "step": 2166 + }, + { + "epoch": 0.7637004405286344, + "grad_norm": 1.6706836844885837, + "learning_rate": 1.4566879174690576e-05, + "loss": 0.7767213582992554, + "step": 2167 + }, + { + "epoch": 0.7640528634361233, + "grad_norm": 1.4702267439170456, + "learning_rate": 1.4561695080921573e-05, + "loss": 0.7480257749557495, + "step": 2168 + }, + { + "epoch": 0.7644052863436124, + "grad_norm": 1.4326376726611632, + "learning_rate": 1.4556509438599057e-05, + "loss": 0.7419564723968506, + "step": 2169 + }, + { + "epoch": 0.7647577092511013, + "grad_norm": 1.4787079836022163, + "learning_rate": 1.4551322249483388e-05, + "loss": 0.6820264458656311, + "step": 2170 + }, + { + "epoch": 0.7651101321585904, + "grad_norm": 1.3819947250134947, + "learning_rate": 1.4546133515335462e-05, + "loss": 0.5947732329368591, + "step": 2171 + }, + { + "epoch": 0.7654625550660793, + "grad_norm": 1.6478975280830812, + "learning_rate": 1.4540943237916685e-05, + "loss": 0.6772021055221558, + "step": 2172 + }, + { + "epoch": 0.7658149779735682, + "grad_norm": 1.7643629263201115, + "learning_rate": 1.4535751418989e-05, + "loss": 0.7822210192680359, + "step": 2173 + }, + { + "epoch": 0.7661674008810573, + "grad_norm": 1.6079996302057808, + "learning_rate": 1.4530558060314866e-05, + "loss": 0.6208021640777588, + "step": 2174 + }, + { + "epoch": 0.7665198237885462, + "grad_norm": 1.5681481752797541, + "learning_rate": 1.4525363163657264e-05, + "loss": 0.8017063140869141, + "step": 2175 + }, + { + "epoch": 0.7668722466960353, + "grad_norm": 1.4681783580715917, + "learning_rate": 1.4520166730779704e-05, + "loss": 0.738383948802948, + "step": 2176 + }, + { + "epoch": 0.7672246696035242, + "grad_norm": 1.742058488341915, + "learning_rate": 1.4514968763446213e-05, + "loss": 0.7698314785957336, + "step": 2177 + }, + { + "epoch": 0.7675770925110132, + "grad_norm": 1.7037031257568012, + "learning_rate": 1.4509769263421337e-05, + "loss": 0.789836049079895, + "step": 2178 + }, + { + "epoch": 0.7679295154185022, + "grad_norm": 1.8506345351591968, + "learning_rate": 1.4504568232470145e-05, + "loss": 0.6437339782714844, + "step": 2179 + }, + { + "epoch": 0.7682819383259912, + "grad_norm": 2.04999468198658, + "learning_rate": 1.4499365672358226e-05, + "loss": 0.6684735417366028, + "step": 2180 + }, + { + "epoch": 0.7686343612334802, + "grad_norm": 1.5077038126146909, + "learning_rate": 1.4494161584851687e-05, + "loss": 0.6577454805374146, + "step": 2181 + }, + { + "epoch": 0.7689867841409692, + "grad_norm": 1.3277471323795764, + "learning_rate": 1.4488955971717154e-05, + "loss": 0.5975776314735413, + "step": 2182 + }, + { + "epoch": 0.7693392070484582, + "grad_norm": 1.8819815707164231, + "learning_rate": 1.4483748834721767e-05, + "loss": 0.6385577917098999, + "step": 2183 + }, + { + "epoch": 0.7696916299559471, + "grad_norm": 1.4452778349053288, + "learning_rate": 1.4478540175633193e-05, + "loss": 0.6295928955078125, + "step": 2184 + }, + { + "epoch": 0.7700440528634361, + "grad_norm": 1.5790897154124113, + "learning_rate": 1.4473329996219605e-05, + "loss": 0.6848496198654175, + "step": 2185 + }, + { + "epoch": 0.7703964757709251, + "grad_norm": 1.410283277756768, + "learning_rate": 1.44681182982497e-05, + "loss": 0.6476501226425171, + "step": 2186 + }, + { + "epoch": 0.7707488986784141, + "grad_norm": 1.5220085975801703, + "learning_rate": 1.4462905083492683e-05, + "loss": 0.750103235244751, + "step": 2187 + }, + { + "epoch": 0.771101321585903, + "grad_norm": 1.3838063845924222, + "learning_rate": 1.4457690353718285e-05, + "loss": 0.668454110622406, + "step": 2188 + }, + { + "epoch": 0.7714537444933921, + "grad_norm": 1.3695000422583874, + "learning_rate": 1.4452474110696738e-05, + "loss": 0.6671048402786255, + "step": 2189 + }, + { + "epoch": 0.771806167400881, + "grad_norm": 1.404147919130693, + "learning_rate": 1.4447256356198797e-05, + "loss": 0.6261379718780518, + "step": 2190 + }, + { + "epoch": 0.7721585903083701, + "grad_norm": 1.6192228095415668, + "learning_rate": 1.4442037091995726e-05, + "loss": 0.6128308176994324, + "step": 2191 + }, + { + "epoch": 0.772511013215859, + "grad_norm": 1.629684954387357, + "learning_rate": 1.4436816319859306e-05, + "loss": 0.7709108591079712, + "step": 2192 + }, + { + "epoch": 0.7728634361233481, + "grad_norm": 1.7604991326643686, + "learning_rate": 1.4431594041561822e-05, + "loss": 0.6242028474807739, + "step": 2193 + }, + { + "epoch": 0.773215859030837, + "grad_norm": 1.7562103574700596, + "learning_rate": 1.4426370258876079e-05, + "loss": 0.8030718564987183, + "step": 2194 + }, + { + "epoch": 0.7735682819383259, + "grad_norm": 1.5182882363444798, + "learning_rate": 1.4421144973575386e-05, + "loss": 0.7785710692405701, + "step": 2195 + }, + { + "epoch": 0.773920704845815, + "grad_norm": 1.5453752656669346, + "learning_rate": 1.4415918187433564e-05, + "loss": 0.6846014857292175, + "step": 2196 + }, + { + "epoch": 0.7742731277533039, + "grad_norm": 1.6007643935951585, + "learning_rate": 1.4410689902224947e-05, + "loss": 0.7883827686309814, + "step": 2197 + }, + { + "epoch": 0.774625550660793, + "grad_norm": 2.0453745328196065, + "learning_rate": 1.4405460119724377e-05, + "loss": 0.8285650610923767, + "step": 2198 + }, + { + "epoch": 0.7749779735682819, + "grad_norm": 1.5026043059194256, + "learning_rate": 1.4400228841707193e-05, + "loss": 0.6101093292236328, + "step": 2199 + }, + { + "epoch": 0.775330396475771, + "grad_norm": 1.4888885445589903, + "learning_rate": 1.4394996069949262e-05, + "loss": 0.6627891063690186, + "step": 2200 + }, + { + "epoch": 0.7756828193832599, + "grad_norm": 1.4487650646569075, + "learning_rate": 1.4389761806226943e-05, + "loss": 0.6755822896957397, + "step": 2201 + }, + { + "epoch": 0.7760352422907489, + "grad_norm": 1.438634659048083, + "learning_rate": 1.4384526052317106e-05, + "loss": 0.6718465089797974, + "step": 2202 + }, + { + "epoch": 0.7763876651982379, + "grad_norm": 1.4171659147035778, + "learning_rate": 1.4379288809997121e-05, + "loss": 0.5857758522033691, + "step": 2203 + }, + { + "epoch": 0.7767400881057269, + "grad_norm": 1.1200186604200135, + "learning_rate": 1.4374050081044876e-05, + "loss": 0.5861783027648926, + "step": 2204 + }, + { + "epoch": 0.7770925110132159, + "grad_norm": 1.442532656158601, + "learning_rate": 1.4368809867238754e-05, + "loss": 0.6862374544143677, + "step": 2205 + }, + { + "epoch": 0.7774449339207048, + "grad_norm": 1.6455201954220524, + "learning_rate": 1.4363568170357646e-05, + "loss": 0.6787701845169067, + "step": 2206 + }, + { + "epoch": 0.7777973568281938, + "grad_norm": 1.4101038203667695, + "learning_rate": 1.435832499218094e-05, + "loss": 0.5671687126159668, + "step": 2207 + }, + { + "epoch": 0.7781497797356828, + "grad_norm": 1.5479554264257531, + "learning_rate": 1.435308033448854e-05, + "loss": 0.8243429064750671, + "step": 2208 + }, + { + "epoch": 0.7785022026431718, + "grad_norm": 1.3676716888852272, + "learning_rate": 1.4347834199060835e-05, + "loss": 0.5880655646324158, + "step": 2209 + }, + { + "epoch": 0.7788546255506608, + "grad_norm": 2.451624357800272, + "learning_rate": 1.4342586587678734e-05, + "loss": 0.7085679769515991, + "step": 2210 + }, + { + "epoch": 0.7792070484581498, + "grad_norm": 1.546990179750224, + "learning_rate": 1.4337337502123627e-05, + "loss": 0.7011853456497192, + "step": 2211 + }, + { + "epoch": 0.7795594713656387, + "grad_norm": 1.6003260447933962, + "learning_rate": 1.4332086944177426e-05, + "loss": 0.755327582359314, + "step": 2212 + }, + { + "epoch": 0.7799118942731278, + "grad_norm": 1.3917359947430683, + "learning_rate": 1.4326834915622522e-05, + "loss": 0.7152736186981201, + "step": 2213 + }, + { + "epoch": 0.7802643171806167, + "grad_norm": 1.3821995576878587, + "learning_rate": 1.4321581418241825e-05, + "loss": 0.6744083166122437, + "step": 2214 + }, + { + "epoch": 0.7806167400881058, + "grad_norm": 1.5294456027931242, + "learning_rate": 1.4316326453818728e-05, + "loss": 0.6112288236618042, + "step": 2215 + }, + { + "epoch": 0.7809691629955947, + "grad_norm": 1.2620758120071194, + "learning_rate": 1.4311070024137128e-05, + "loss": 0.5569246411323547, + "step": 2216 + }, + { + "epoch": 0.7813215859030836, + "grad_norm": 1.474883531826743, + "learning_rate": 1.4305812130981418e-05, + "loss": 0.6214494705200195, + "step": 2217 + }, + { + "epoch": 0.7816740088105727, + "grad_norm": 1.4094788075709526, + "learning_rate": 1.4300552776136497e-05, + "loss": 0.5401003956794739, + "step": 2218 + }, + { + "epoch": 0.7820264317180616, + "grad_norm": 1.433294268920241, + "learning_rate": 1.4295291961387742e-05, + "loss": 0.5128720998764038, + "step": 2219 + }, + { + "epoch": 0.7823788546255507, + "grad_norm": 1.352265751544302, + "learning_rate": 1.4290029688521043e-05, + "loss": 0.5495916604995728, + "step": 2220 + }, + { + "epoch": 0.7827312775330396, + "grad_norm": 1.6131865642068703, + "learning_rate": 1.4284765959322772e-05, + "loss": 0.628544807434082, + "step": 2221 + }, + { + "epoch": 0.7830837004405287, + "grad_norm": 1.443784571277232, + "learning_rate": 1.427950077557981e-05, + "loss": 0.7171294689178467, + "step": 2222 + }, + { + "epoch": 0.7834361233480176, + "grad_norm": 1.3723589201513293, + "learning_rate": 1.4274234139079513e-05, + "loss": 0.7436389327049255, + "step": 2223 + }, + { + "epoch": 0.7837885462555066, + "grad_norm": 1.5295286402885273, + "learning_rate": 1.426896605160975e-05, + "loss": 0.7154244780540466, + "step": 2224 + }, + { + "epoch": 0.7841409691629956, + "grad_norm": 1.4385555847293963, + "learning_rate": 1.426369651495886e-05, + "loss": 0.6433268189430237, + "step": 2225 + }, + { + "epoch": 0.7844933920704846, + "grad_norm": 1.4177681718218336, + "learning_rate": 1.4258425530915703e-05, + "loss": 0.6612321734428406, + "step": 2226 + }, + { + "epoch": 0.7848458149779736, + "grad_norm": 1.962010974229914, + "learning_rate": 1.42531531012696e-05, + "loss": 0.6384811401367188, + "step": 2227 + }, + { + "epoch": 0.7851982378854625, + "grad_norm": 1.4927220821701634, + "learning_rate": 1.4247879227810384e-05, + "loss": 0.5592762231826782, + "step": 2228 + }, + { + "epoch": 0.7855506607929515, + "grad_norm": 1.6376570609433725, + "learning_rate": 1.4242603912328367e-05, + "loss": 0.6904512643814087, + "step": 2229 + }, + { + "epoch": 0.7859030837004405, + "grad_norm": 1.7784965930873091, + "learning_rate": 1.4237327156614358e-05, + "loss": 0.7165266871452332, + "step": 2230 + }, + { + "epoch": 0.7862555066079295, + "grad_norm": 1.6275397333714936, + "learning_rate": 1.423204896245965e-05, + "loss": 0.8567172288894653, + "step": 2231 + }, + { + "epoch": 0.7866079295154185, + "grad_norm": 1.6554990252792119, + "learning_rate": 1.4226769331656028e-05, + "loss": 0.6595934629440308, + "step": 2232 + }, + { + "epoch": 0.7869603524229075, + "grad_norm": 1.8034278962736743, + "learning_rate": 1.4221488265995755e-05, + "loss": 0.750861644744873, + "step": 2233 + }, + { + "epoch": 0.7873127753303965, + "grad_norm": 1.3674194021669617, + "learning_rate": 1.4216205767271597e-05, + "loss": 0.7146387696266174, + "step": 2234 + }, + { + "epoch": 0.7876651982378855, + "grad_norm": 1.9347692502503655, + "learning_rate": 1.4210921837276792e-05, + "loss": 0.58647221326828, + "step": 2235 + }, + { + "epoch": 0.7880176211453744, + "grad_norm": 1.4888974250205094, + "learning_rate": 1.4205636477805072e-05, + "loss": 0.6893318891525269, + "step": 2236 + }, + { + "epoch": 0.7883700440528635, + "grad_norm": 1.1833417050311776, + "learning_rate": 1.4200349690650654e-05, + "loss": 0.5545464158058167, + "step": 2237 + }, + { + "epoch": 0.7887224669603524, + "grad_norm": 1.6014523598259138, + "learning_rate": 1.4195061477608234e-05, + "loss": 0.6088600158691406, + "step": 2238 + }, + { + "epoch": 0.7890748898678414, + "grad_norm": 1.3513904877886467, + "learning_rate": 1.4189771840472997e-05, + "loss": 0.6330769658088684, + "step": 2239 + }, + { + "epoch": 0.7894273127753304, + "grad_norm": 1.4283770062393895, + "learning_rate": 1.4184480781040613e-05, + "loss": 0.678654670715332, + "step": 2240 + }, + { + "epoch": 0.7897797356828193, + "grad_norm": 1.445633946040222, + "learning_rate": 1.417918830110723e-05, + "loss": 0.6259177923202515, + "step": 2241 + }, + { + "epoch": 0.7901321585903084, + "grad_norm": 1.408151849302333, + "learning_rate": 1.4173894402469477e-05, + "loss": 0.634982168674469, + "step": 2242 + }, + { + "epoch": 0.7904845814977973, + "grad_norm": 1.37778450193705, + "learning_rate": 1.4168599086924473e-05, + "loss": 0.6610612869262695, + "step": 2243 + }, + { + "epoch": 0.7908370044052864, + "grad_norm": 1.386127288755765, + "learning_rate": 1.416330235626981e-05, + "loss": 0.6952961683273315, + "step": 2244 + }, + { + "epoch": 0.7911894273127753, + "grad_norm": 1.6165363001234343, + "learning_rate": 1.4158004212303565e-05, + "loss": 0.5055881142616272, + "step": 2245 + }, + { + "epoch": 0.7915418502202644, + "grad_norm": 1.4841191669035856, + "learning_rate": 1.4152704656824288e-05, + "loss": 0.7284455299377441, + "step": 2246 + }, + { + "epoch": 0.7918942731277533, + "grad_norm": 1.3583334859782668, + "learning_rate": 1.414740369163102e-05, + "loss": 0.6985108852386475, + "step": 2247 + }, + { + "epoch": 0.7922466960352423, + "grad_norm": 1.3664811170856164, + "learning_rate": 1.4142101318523271e-05, + "loss": 0.5967550277709961, + "step": 2248 + }, + { + "epoch": 0.7925991189427313, + "grad_norm": 1.5695298710984633, + "learning_rate": 1.4136797539301033e-05, + "loss": 0.7696695327758789, + "step": 2249 + }, + { + "epoch": 0.7929515418502202, + "grad_norm": 1.3234775564665824, + "learning_rate": 1.413149235576477e-05, + "loss": 0.8131378293037415, + "step": 2250 + }, + { + "epoch": 0.7933039647577093, + "grad_norm": 1.8429663529686, + "learning_rate": 1.4126185769715428e-05, + "loss": 0.8029932975769043, + "step": 2251 + }, + { + "epoch": 0.7936563876651982, + "grad_norm": 1.720051288151631, + "learning_rate": 1.412087778295443e-05, + "loss": 0.7408573031425476, + "step": 2252 + }, + { + "epoch": 0.7940088105726872, + "grad_norm": 1.8037723298533723, + "learning_rate": 1.411556839728367e-05, + "loss": 0.8624325394630432, + "step": 2253 + }, + { + "epoch": 0.7943612334801762, + "grad_norm": 1.5291561523904078, + "learning_rate": 1.411025761450552e-05, + "loss": 0.7635384798049927, + "step": 2254 + }, + { + "epoch": 0.7947136563876652, + "grad_norm": 1.5012301776005823, + "learning_rate": 1.4104945436422832e-05, + "loss": 0.5612920522689819, + "step": 2255 + }, + { + "epoch": 0.7950660792951542, + "grad_norm": 1.5891725973137842, + "learning_rate": 1.4099631864838912e-05, + "loss": 0.5792248845100403, + "step": 2256 + }, + { + "epoch": 0.7954185022026432, + "grad_norm": 1.427703140365858, + "learning_rate": 1.4094316901557563e-05, + "loss": 0.7405142188072205, + "step": 2257 + }, + { + "epoch": 0.7957709251101321, + "grad_norm": 1.5302016454534209, + "learning_rate": 1.4089000548383044e-05, + "loss": 0.630780816078186, + "step": 2258 + }, + { + "epoch": 0.7961233480176212, + "grad_norm": 1.5690685088460359, + "learning_rate": 1.4083682807120092e-05, + "loss": 0.6737201809883118, + "step": 2259 + }, + { + "epoch": 0.7964757709251101, + "grad_norm": 4.158789316506426, + "learning_rate": 1.4078363679573918e-05, + "loss": 0.6469985842704773, + "step": 2260 + }, + { + "epoch": 0.7968281938325992, + "grad_norm": 1.4774582614404035, + "learning_rate": 1.4073043167550198e-05, + "loss": 0.6315224170684814, + "step": 2261 + }, + { + "epoch": 0.7971806167400881, + "grad_norm": 1.1766652256758812, + "learning_rate": 1.4067721272855079e-05, + "loss": 0.6785402297973633, + "step": 2262 + }, + { + "epoch": 0.797533039647577, + "grad_norm": 1.4677269844033833, + "learning_rate": 1.406239799729518e-05, + "loss": 0.7131394147872925, + "step": 2263 + }, + { + "epoch": 0.7978854625550661, + "grad_norm": 1.5575833651180606, + "learning_rate": 1.405707334267759e-05, + "loss": 0.6921142339706421, + "step": 2264 + }, + { + "epoch": 0.798237885462555, + "grad_norm": 1.375694666198905, + "learning_rate": 1.4051747310809863e-05, + "loss": 0.695213794708252, + "step": 2265 + }, + { + "epoch": 0.7985903083700441, + "grad_norm": 1.8529986724322307, + "learning_rate": 1.4046419903500013e-05, + "loss": 0.7081988453865051, + "step": 2266 + }, + { + "epoch": 0.798942731277533, + "grad_norm": 1.4461573292928833, + "learning_rate": 1.4041091122556539e-05, + "loss": 0.6404637098312378, + "step": 2267 + }, + { + "epoch": 0.7992951541850221, + "grad_norm": 1.3566691109367863, + "learning_rate": 1.403576096978839e-05, + "loss": 0.6404134631156921, + "step": 2268 + }, + { + "epoch": 0.799647577092511, + "grad_norm": 1.5118859398886633, + "learning_rate": 1.4030429447004992e-05, + "loss": 0.7963751554489136, + "step": 2269 + }, + { + "epoch": 0.8, + "grad_norm": 1.632997404115334, + "learning_rate": 1.4025096556016224e-05, + "loss": 0.6648174524307251, + "step": 2270 + }, + { + "epoch": 0.800352422907489, + "grad_norm": 1.4103532345019565, + "learning_rate": 1.4019762298632445e-05, + "loss": 0.6661815047264099, + "step": 2271 + }, + { + "epoch": 0.800704845814978, + "grad_norm": 1.7237738440956045, + "learning_rate": 1.4014426676664462e-05, + "loss": 0.6194477081298828, + "step": 2272 + }, + { + "epoch": 0.801057268722467, + "grad_norm": 1.8457235726726873, + "learning_rate": 1.400908969192356e-05, + "loss": 0.6869276762008667, + "step": 2273 + }, + { + "epoch": 0.8014096916299559, + "grad_norm": 1.7545140114513338, + "learning_rate": 1.4003751346221472e-05, + "loss": 0.7352420091629028, + "step": 2274 + }, + { + "epoch": 0.801762114537445, + "grad_norm": 1.5994812918128933, + "learning_rate": 1.3998411641370405e-05, + "loss": 0.8212440609931946, + "step": 2275 + }, + { + "epoch": 0.8021145374449339, + "grad_norm": 1.5868623288152288, + "learning_rate": 1.3993070579183021e-05, + "loss": 0.6897045969963074, + "step": 2276 + }, + { + "epoch": 0.8024669603524229, + "grad_norm": 1.716974382638037, + "learning_rate": 1.3987728161472442e-05, + "loss": 0.8406906127929688, + "step": 2277 + }, + { + "epoch": 0.8028193832599119, + "grad_norm": 1.6664794009014727, + "learning_rate": 1.3982384390052257e-05, + "loss": 0.6236976385116577, + "step": 2278 + }, + { + "epoch": 0.8031718061674009, + "grad_norm": 1.7056031446043847, + "learning_rate": 1.3977039266736508e-05, + "loss": 0.8110965490341187, + "step": 2279 + }, + { + "epoch": 0.8035242290748899, + "grad_norm": 1.6273998334271178, + "learning_rate": 1.3971692793339697e-05, + "loss": 0.635534405708313, + "step": 2280 + }, + { + "epoch": 0.8038766519823789, + "grad_norm": 1.5382566365445476, + "learning_rate": 1.3966344971676789e-05, + "loss": 0.7806028127670288, + "step": 2281 + }, + { + "epoch": 0.8042290748898678, + "grad_norm": 1.7131487498074927, + "learning_rate": 1.3960995803563195e-05, + "loss": 0.6635935306549072, + "step": 2282 + }, + { + "epoch": 0.8045814977973569, + "grad_norm": 1.6068551029738092, + "learning_rate": 1.39556452908148e-05, + "loss": 0.6064634323120117, + "step": 2283 + }, + { + "epoch": 0.8049339207048458, + "grad_norm": 1.7686604234656398, + "learning_rate": 1.3950293435247933e-05, + "loss": 0.760187029838562, + "step": 2284 + }, + { + "epoch": 0.8052863436123348, + "grad_norm": 1.5333245954906318, + "learning_rate": 1.3944940238679384e-05, + "loss": 0.7004644274711609, + "step": 2285 + }, + { + "epoch": 0.8056387665198238, + "grad_norm": 1.9274194313344672, + "learning_rate": 1.393958570292639e-05, + "loss": 0.7662780284881592, + "step": 2286 + }, + { + "epoch": 0.8059911894273127, + "grad_norm": 1.3943181397787612, + "learning_rate": 1.393422982980666e-05, + "loss": 0.7939090132713318, + "step": 2287 + }, + { + "epoch": 0.8063436123348018, + "grad_norm": 1.377559765071464, + "learning_rate": 1.3928872621138337e-05, + "loss": 0.7461861371994019, + "step": 2288 + }, + { + "epoch": 0.8066960352422907, + "grad_norm": 1.4875661773009663, + "learning_rate": 1.3923514078740032e-05, + "loss": 0.5997019410133362, + "step": 2289 + }, + { + "epoch": 0.8070484581497798, + "grad_norm": 1.5379009713311227, + "learning_rate": 1.3918154204430801e-05, + "loss": 0.5437384843826294, + "step": 2290 + }, + { + "epoch": 0.8074008810572687, + "grad_norm": 1.8168415447512607, + "learning_rate": 1.3912793000030154e-05, + "loss": 0.7387127876281738, + "step": 2291 + }, + { + "epoch": 0.8077533039647578, + "grad_norm": 1.305308107523337, + "learning_rate": 1.3907430467358054e-05, + "loss": 0.483035147190094, + "step": 2292 + }, + { + "epoch": 0.8081057268722467, + "grad_norm": 1.3669144351401303, + "learning_rate": 1.3902066608234919e-05, + "loss": 0.6208503842353821, + "step": 2293 + }, + { + "epoch": 0.8084581497797357, + "grad_norm": 1.7196168695476914, + "learning_rate": 1.3896701424481603e-05, + "loss": 0.6691559553146362, + "step": 2294 + }, + { + "epoch": 0.8088105726872247, + "grad_norm": 1.6945751274550964, + "learning_rate": 1.3891334917919422e-05, + "loss": 0.8960802555084229, + "step": 2295 + }, + { + "epoch": 0.8091629955947136, + "grad_norm": 1.7625732291329363, + "learning_rate": 1.388596709037014e-05, + "loss": 0.669715404510498, + "step": 2296 + }, + { + "epoch": 0.8095154185022027, + "grad_norm": 1.4235891674683654, + "learning_rate": 1.3880597943655972e-05, + "loss": 0.7356190085411072, + "step": 2297 + }, + { + "epoch": 0.8098678414096916, + "grad_norm": 1.6403595773987272, + "learning_rate": 1.3875227479599565e-05, + "loss": 0.9158750176429749, + "step": 2298 + }, + { + "epoch": 0.8102202643171806, + "grad_norm": 1.718215094287951, + "learning_rate": 1.3869855700024031e-05, + "loss": 0.7395786643028259, + "step": 2299 + }, + { + "epoch": 0.8105726872246696, + "grad_norm": 1.6360185397225708, + "learning_rate": 1.3864482606752922e-05, + "loss": 0.594106912612915, + "step": 2300 + }, + { + "epoch": 0.8109251101321586, + "grad_norm": 1.6395747499474045, + "learning_rate": 1.3859108201610236e-05, + "loss": 0.7853089570999146, + "step": 2301 + }, + { + "epoch": 0.8112775330396476, + "grad_norm": 1.6313227134249062, + "learning_rate": 1.3853732486420413e-05, + "loss": 0.8346991539001465, + "step": 2302 + }, + { + "epoch": 0.8116299559471366, + "grad_norm": 1.6254363131857819, + "learning_rate": 1.3848355463008344e-05, + "loss": 0.5493819117546082, + "step": 2303 + }, + { + "epoch": 0.8119823788546255, + "grad_norm": 1.566621350016491, + "learning_rate": 1.3842977133199363e-05, + "loss": 0.7474828958511353, + "step": 2304 + }, + { + "epoch": 0.8123348017621146, + "grad_norm": 1.6648296076023164, + "learning_rate": 1.3837597498819242e-05, + "loss": 0.6599621772766113, + "step": 2305 + }, + { + "epoch": 0.8126872246696035, + "grad_norm": 1.5217466732352583, + "learning_rate": 1.38322165616942e-05, + "loss": 0.6751214861869812, + "step": 2306 + }, + { + "epoch": 0.8130396475770925, + "grad_norm": 1.720054765999457, + "learning_rate": 1.3826834323650899e-05, + "loss": 0.7450453042984009, + "step": 2307 + }, + { + "epoch": 0.8133920704845815, + "grad_norm": 1.4739637914592345, + "learning_rate": 1.382145078651644e-05, + "loss": 0.7015345692634583, + "step": 2308 + }, + { + "epoch": 0.8137444933920704, + "grad_norm": 1.4921910425897076, + "learning_rate": 1.3816065952118368e-05, + "loss": 0.7161329984664917, + "step": 2309 + }, + { + "epoch": 0.8140969162995595, + "grad_norm": 1.576440929020717, + "learning_rate": 1.3810679822284665e-05, + "loss": 0.771783709526062, + "step": 2310 + }, + { + "epoch": 0.8144493392070484, + "grad_norm": 1.461165164266228, + "learning_rate": 1.3805292398843755e-05, + "loss": 0.6710794568061829, + "step": 2311 + }, + { + "epoch": 0.8148017621145375, + "grad_norm": 1.6256312715940777, + "learning_rate": 1.3799903683624503e-05, + "loss": 0.6614924669265747, + "step": 2312 + }, + { + "epoch": 0.8151541850220264, + "grad_norm": 1.429649360127197, + "learning_rate": 1.3794513678456203e-05, + "loss": 0.6432225704193115, + "step": 2313 + }, + { + "epoch": 0.8155066079295155, + "grad_norm": 1.233784916709085, + "learning_rate": 1.3789122385168604e-05, + "loss": 0.6228311061859131, + "step": 2314 + }, + { + "epoch": 0.8158590308370044, + "grad_norm": 1.5182036065920572, + "learning_rate": 1.3783729805591875e-05, + "loss": 0.5597498416900635, + "step": 2315 + }, + { + "epoch": 0.8162114537444934, + "grad_norm": 1.954667780900904, + "learning_rate": 1.3778335941556629e-05, + "loss": 0.7651177048683167, + "step": 2316 + }, + { + "epoch": 0.8165638766519824, + "grad_norm": 1.3053642347729657, + "learning_rate": 1.3772940794893916e-05, + "loss": 0.5482406616210938, + "step": 2317 + }, + { + "epoch": 0.8169162995594713, + "grad_norm": 1.4432389735878668, + "learning_rate": 1.3767544367435229e-05, + "loss": 0.767236590385437, + "step": 2318 + }, + { + "epoch": 0.8172687224669604, + "grad_norm": 1.7071036751428772, + "learning_rate": 1.3762146661012471e-05, + "loss": 0.705253541469574, + "step": 2319 + }, + { + "epoch": 0.8176211453744493, + "grad_norm": 1.4969645559129943, + "learning_rate": 1.3756747677458008e-05, + "loss": 0.7800463438034058, + "step": 2320 + }, + { + "epoch": 0.8179735682819383, + "grad_norm": 1.6172262621918039, + "learning_rate": 1.3751347418604623e-05, + "loss": 0.7615088224411011, + "step": 2321 + }, + { + "epoch": 0.8183259911894273, + "grad_norm": 1.6932314886464006, + "learning_rate": 1.3745945886285536e-05, + "loss": 0.8004297614097595, + "step": 2322 + }, + { + "epoch": 0.8186784140969163, + "grad_norm": 1.605867375121777, + "learning_rate": 1.3740543082334399e-05, + "loss": 0.6428912281990051, + "step": 2323 + }, + { + "epoch": 0.8190308370044053, + "grad_norm": 1.4147620040703779, + "learning_rate": 1.3735139008585294e-05, + "loss": 0.6702802777290344, + "step": 2324 + }, + { + "epoch": 0.8193832599118943, + "grad_norm": 1.3127203907182126, + "learning_rate": 1.3729733666872736e-05, + "loss": 0.6003440022468567, + "step": 2325 + }, + { + "epoch": 0.8197356828193832, + "grad_norm": 2.04633486984075, + "learning_rate": 1.3724327059031677e-05, + "loss": 0.8264240622520447, + "step": 2326 + }, + { + "epoch": 0.8200881057268723, + "grad_norm": 1.4037319277657845, + "learning_rate": 1.3718919186897481e-05, + "loss": 0.6974462866783142, + "step": 2327 + }, + { + "epoch": 0.8204405286343612, + "grad_norm": 1.7081986923623933, + "learning_rate": 1.3713510052305962e-05, + "loss": 0.8273947238922119, + "step": 2328 + }, + { + "epoch": 0.8207929515418502, + "grad_norm": 1.5000401588722418, + "learning_rate": 1.3708099657093348e-05, + "loss": 0.6230529546737671, + "step": 2329 + }, + { + "epoch": 0.8211453744493392, + "grad_norm": 1.6377312790274685, + "learning_rate": 1.37026880030963e-05, + "loss": 0.6997084021568298, + "step": 2330 + }, + { + "epoch": 0.8214977973568282, + "grad_norm": 1.582616740422673, + "learning_rate": 1.3697275092151908e-05, + "loss": 0.7212036848068237, + "step": 2331 + }, + { + "epoch": 0.8218502202643172, + "grad_norm": 1.5449017822829925, + "learning_rate": 1.3691860926097685e-05, + "loss": 0.7758737206459045, + "step": 2332 + }, + { + "epoch": 0.8222026431718061, + "grad_norm": 1.7784238395856364, + "learning_rate": 1.368644550677157e-05, + "loss": 0.62369704246521, + "step": 2333 + }, + { + "epoch": 0.8225550660792952, + "grad_norm": 1.6110908974677367, + "learning_rate": 1.3681028836011935e-05, + "loss": 0.8051841855049133, + "step": 2334 + }, + { + "epoch": 0.8229074889867841, + "grad_norm": 1.3626761635443752, + "learning_rate": 1.3675610915657568e-05, + "loss": 0.6087243556976318, + "step": 2335 + }, + { + "epoch": 0.8232599118942732, + "grad_norm": 1.9382202981470131, + "learning_rate": 1.3670191747547685e-05, + "loss": 0.6949581503868103, + "step": 2336 + }, + { + "epoch": 0.8236123348017621, + "grad_norm": 1.5451121537596906, + "learning_rate": 1.3664771333521922e-05, + "loss": 0.5621528029441833, + "step": 2337 + }, + { + "epoch": 0.8239647577092511, + "grad_norm": 1.622327701652298, + "learning_rate": 1.3659349675420346e-05, + "loss": 0.8731498718261719, + "step": 2338 + }, + { + "epoch": 0.8243171806167401, + "grad_norm": 1.5570249925953572, + "learning_rate": 1.3653926775083437e-05, + "loss": 0.6997240781784058, + "step": 2339 + }, + { + "epoch": 0.824669603524229, + "grad_norm": 1.6562463291138314, + "learning_rate": 1.3648502634352104e-05, + "loss": 0.8061426877975464, + "step": 2340 + }, + { + "epoch": 0.8250220264317181, + "grad_norm": 1.7061312576253802, + "learning_rate": 1.3643077255067667e-05, + "loss": 0.6186845302581787, + "step": 2341 + }, + { + "epoch": 0.825374449339207, + "grad_norm": 1.6605971928200247, + "learning_rate": 1.3637650639071884e-05, + "loss": 0.8098937273025513, + "step": 2342 + }, + { + "epoch": 0.825726872246696, + "grad_norm": 1.6091516027269386, + "learning_rate": 1.3632222788206916e-05, + "loss": 0.5810271501541138, + "step": 2343 + }, + { + "epoch": 0.826079295154185, + "grad_norm": 1.4965459276387059, + "learning_rate": 1.3626793704315348e-05, + "loss": 0.48309600353240967, + "step": 2344 + }, + { + "epoch": 0.826431718061674, + "grad_norm": 1.4326274242229946, + "learning_rate": 1.3621363389240188e-05, + "loss": 0.7366980314254761, + "step": 2345 + }, + { + "epoch": 0.826784140969163, + "grad_norm": 1.571199172280502, + "learning_rate": 1.3615931844824859e-05, + "loss": 0.6572252511978149, + "step": 2346 + }, + { + "epoch": 0.827136563876652, + "grad_norm": 1.3078300281358257, + "learning_rate": 1.3610499072913204e-05, + "loss": 0.6776653528213501, + "step": 2347 + }, + { + "epoch": 0.827488986784141, + "grad_norm": 1.772641440888185, + "learning_rate": 1.3605065075349473e-05, + "loss": 0.6536053419113159, + "step": 2348 + }, + { + "epoch": 0.82784140969163, + "grad_norm": 1.600184025362065, + "learning_rate": 1.3599629853978342e-05, + "loss": 0.7000117301940918, + "step": 2349 + }, + { + "epoch": 0.8281938325991189, + "grad_norm": 1.5533713409132957, + "learning_rate": 1.3594193410644902e-05, + "loss": 0.6480045318603516, + "step": 2350 + }, + { + "epoch": 0.8285462555066079, + "grad_norm": 1.5474076871693587, + "learning_rate": 1.3588755747194656e-05, + "loss": 0.6428179740905762, + "step": 2351 + }, + { + "epoch": 0.8288986784140969, + "grad_norm": 1.3886734182652174, + "learning_rate": 1.3583316865473517e-05, + "loss": 0.618633508682251, + "step": 2352 + }, + { + "epoch": 0.8292511013215859, + "grad_norm": 1.5946423674864716, + "learning_rate": 1.357787676732782e-05, + "loss": 0.7289671897888184, + "step": 2353 + }, + { + "epoch": 0.8296035242290749, + "grad_norm": 1.687058159970245, + "learning_rate": 1.3572435454604307e-05, + "loss": 0.6969538927078247, + "step": 2354 + }, + { + "epoch": 0.8299559471365638, + "grad_norm": 1.565248379514886, + "learning_rate": 1.3566992929150137e-05, + "loss": 0.8490859270095825, + "step": 2355 + }, + { + "epoch": 0.8303083700440529, + "grad_norm": 1.532906793366292, + "learning_rate": 1.3561549192812877e-05, + "loss": 0.6883271336555481, + "step": 2356 + }, + { + "epoch": 0.8306607929515418, + "grad_norm": 1.3151000902691472, + "learning_rate": 1.3556104247440504e-05, + "loss": 0.68092280626297, + "step": 2357 + }, + { + "epoch": 0.8310132158590309, + "grad_norm": 1.2591886658215548, + "learning_rate": 1.3550658094881413e-05, + "loss": 0.7077454924583435, + "step": 2358 + }, + { + "epoch": 0.8313656387665198, + "grad_norm": 1.5452673483096302, + "learning_rate": 1.3545210736984393e-05, + "loss": 0.7364591360092163, + "step": 2359 + }, + { + "epoch": 0.8317180616740089, + "grad_norm": 1.4999509926023873, + "learning_rate": 1.3539762175598666e-05, + "loss": 0.8047930002212524, + "step": 2360 + }, + { + "epoch": 0.8320704845814978, + "grad_norm": 1.4862380654794773, + "learning_rate": 1.3534312412573836e-05, + "loss": 0.7717781066894531, + "step": 2361 + }, + { + "epoch": 0.8324229074889867, + "grad_norm": 1.7032828917925678, + "learning_rate": 1.3528861449759938e-05, + "loss": 0.7228613495826721, + "step": 2362 + }, + { + "epoch": 0.8327753303964758, + "grad_norm": 1.5752771060390574, + "learning_rate": 1.3523409289007399e-05, + "loss": 0.8025436401367188, + "step": 2363 + }, + { + "epoch": 0.8331277533039647, + "grad_norm": 1.5214524176303228, + "learning_rate": 1.3517955932167057e-05, + "loss": 0.6653664112091064, + "step": 2364 + }, + { + "epoch": 0.8334801762114538, + "grad_norm": 1.4409217046848606, + "learning_rate": 1.3512501381090158e-05, + "loss": 0.709527313709259, + "step": 2365 + }, + { + "epoch": 0.8338325991189427, + "grad_norm": 1.4678807653581447, + "learning_rate": 1.3507045637628355e-05, + "loss": 0.7317520380020142, + "step": 2366 + }, + { + "epoch": 0.8341850220264317, + "grad_norm": 1.4520344718636113, + "learning_rate": 1.3501588703633703e-05, + "loss": 0.734069287776947, + "step": 2367 + }, + { + "epoch": 0.8345374449339207, + "grad_norm": 1.355050784601881, + "learning_rate": 1.349613058095866e-05, + "loss": 0.5950552225112915, + "step": 2368 + }, + { + "epoch": 0.8348898678414097, + "grad_norm": 1.3916802158941735, + "learning_rate": 1.3490671271456084e-05, + "loss": 0.5958857536315918, + "step": 2369 + }, + { + "epoch": 0.8352422907488987, + "grad_norm": 1.319860830071963, + "learning_rate": 1.348521077697925e-05, + "loss": 0.7094449996948242, + "step": 2370 + }, + { + "epoch": 0.8355947136563877, + "grad_norm": 1.283824481194398, + "learning_rate": 1.3479749099381818e-05, + "loss": 0.6260385513305664, + "step": 2371 + }, + { + "epoch": 0.8359471365638766, + "grad_norm": 1.3546760632082742, + "learning_rate": 1.3474286240517862e-05, + "loss": 0.65608811378479, + "step": 2372 + }, + { + "epoch": 0.8362995594713656, + "grad_norm": 1.5902013950729095, + "learning_rate": 1.346882220224185e-05, + "loss": 0.6942586898803711, + "step": 2373 + }, + { + "epoch": 0.8366519823788546, + "grad_norm": 1.5432700710308092, + "learning_rate": 1.3463356986408653e-05, + "loss": 0.6831374168395996, + "step": 2374 + }, + { + "epoch": 0.8370044052863436, + "grad_norm": 1.2453712902306997, + "learning_rate": 1.3457890594873546e-05, + "loss": 0.6363790035247803, + "step": 2375 + }, + { + "epoch": 0.8373568281938326, + "grad_norm": 1.4407831477600082, + "learning_rate": 1.3452423029492194e-05, + "loss": 0.698935866355896, + "step": 2376 + }, + { + "epoch": 0.8377092511013216, + "grad_norm": 1.6516160077651472, + "learning_rate": 1.3446954292120667e-05, + "loss": 0.8569005727767944, + "step": 2377 + }, + { + "epoch": 0.8380616740088106, + "grad_norm": 1.4963554673760426, + "learning_rate": 1.3441484384615428e-05, + "loss": 0.8461613655090332, + "step": 2378 + }, + { + "epoch": 0.8384140969162995, + "grad_norm": 1.635336062215313, + "learning_rate": 1.343601330883335e-05, + "loss": 0.7481078505516052, + "step": 2379 + }, + { + "epoch": 0.8387665198237886, + "grad_norm": 1.1164155853725835, + "learning_rate": 1.343054106663168e-05, + "loss": 0.5632544755935669, + "step": 2380 + }, + { + "epoch": 0.8391189427312775, + "grad_norm": 1.2387886339726162, + "learning_rate": 1.3425067659868084e-05, + "loss": 0.528980016708374, + "step": 2381 + }, + { + "epoch": 0.8394713656387666, + "grad_norm": 1.2987181937645196, + "learning_rate": 1.341959309040061e-05, + "loss": 0.5520849227905273, + "step": 2382 + }, + { + "epoch": 0.8398237885462555, + "grad_norm": 1.1709661282123542, + "learning_rate": 1.34141173600877e-05, + "loss": 0.569744348526001, + "step": 2383 + }, + { + "epoch": 0.8401762114537445, + "grad_norm": 1.1526596958180186, + "learning_rate": 1.3408640470788202e-05, + "loss": 0.595065712928772, + "step": 2384 + }, + { + "epoch": 0.8405286343612335, + "grad_norm": 1.716530250506247, + "learning_rate": 1.3403162424361342e-05, + "loss": 0.6993277072906494, + "step": 2385 + }, + { + "epoch": 0.8408810572687224, + "grad_norm": 1.467497517918387, + "learning_rate": 1.3397683222666748e-05, + "loss": 0.6183342933654785, + "step": 2386 + }, + { + "epoch": 0.8412334801762115, + "grad_norm": 1.5660447986557493, + "learning_rate": 1.339220286756444e-05, + "loss": 0.7280797362327576, + "step": 2387 + }, + { + "epoch": 0.8415859030837004, + "grad_norm": 1.5538390945999534, + "learning_rate": 1.3386721360914829e-05, + "loss": 0.7377837896347046, + "step": 2388 + }, + { + "epoch": 0.8419383259911895, + "grad_norm": 1.3658202604001934, + "learning_rate": 1.3381238704578718e-05, + "loss": 0.7202758193016052, + "step": 2389 + }, + { + "epoch": 0.8422907488986784, + "grad_norm": 1.4864419338323784, + "learning_rate": 1.3375754900417291e-05, + "loss": 0.5899994969367981, + "step": 2390 + }, + { + "epoch": 0.8426431718061674, + "grad_norm": 1.6545749228929092, + "learning_rate": 1.3370269950292133e-05, + "loss": 0.8128558993339539, + "step": 2391 + }, + { + "epoch": 0.8429955947136564, + "grad_norm": 1.4863580222240895, + "learning_rate": 1.3364783856065213e-05, + "loss": 0.8222962617874146, + "step": 2392 + }, + { + "epoch": 0.8433480176211454, + "grad_norm": 1.5392010225603865, + "learning_rate": 1.3359296619598894e-05, + "loss": 0.7898896932601929, + "step": 2393 + }, + { + "epoch": 0.8437004405286344, + "grad_norm": 1.59106154269148, + "learning_rate": 1.3353808242755912e-05, + "loss": 0.6596726179122925, + "step": 2394 + }, + { + "epoch": 0.8440528634361234, + "grad_norm": 1.6652244607977948, + "learning_rate": 1.3348318727399411e-05, + "loss": 0.8073080778121948, + "step": 2395 + }, + { + "epoch": 0.8444052863436123, + "grad_norm": 1.582055504815832, + "learning_rate": 1.3342828075392902e-05, + "loss": 0.6640043258666992, + "step": 2396 + }, + { + "epoch": 0.8447577092511013, + "grad_norm": 1.415789065826391, + "learning_rate": 1.3337336288600297e-05, + "loss": 0.6067632436752319, + "step": 2397 + }, + { + "epoch": 0.8451101321585903, + "grad_norm": 1.308177796408265, + "learning_rate": 1.3331843368885882e-05, + "loss": 0.6891398429870605, + "step": 2398 + }, + { + "epoch": 0.8454625550660793, + "grad_norm": 1.276250238749864, + "learning_rate": 1.3326349318114335e-05, + "loss": 0.6007423996925354, + "step": 2399 + }, + { + "epoch": 0.8458149779735683, + "grad_norm": 1.6159836309404996, + "learning_rate": 1.3320854138150712e-05, + "loss": 0.7314017415046692, + "step": 2400 + }, + { + "epoch": 0.8461674008810572, + "grad_norm": 1.5060027308979995, + "learning_rate": 1.3315357830860461e-05, + "loss": 0.7352335453033447, + "step": 2401 + }, + { + "epoch": 0.8465198237885463, + "grad_norm": 1.3629774951204896, + "learning_rate": 1.3309860398109402e-05, + "loss": 0.6546785831451416, + "step": 2402 + }, + { + "epoch": 0.8468722466960352, + "grad_norm": 1.4629106252693242, + "learning_rate": 1.3304361841763746e-05, + "loss": 0.590252697467804, + "step": 2403 + }, + { + "epoch": 0.8472246696035243, + "grad_norm": 1.5501476697602834, + "learning_rate": 1.3298862163690078e-05, + "loss": 0.6864089369773865, + "step": 2404 + }, + { + "epoch": 0.8475770925110132, + "grad_norm": 1.452376737172979, + "learning_rate": 1.3293361365755373e-05, + "loss": 0.7818390130996704, + "step": 2405 + }, + { + "epoch": 0.8479295154185023, + "grad_norm": 1.9084475381981967, + "learning_rate": 1.3287859449826977e-05, + "loss": 0.7461166381835938, + "step": 2406 + }, + { + "epoch": 0.8482819383259912, + "grad_norm": 1.7337796671611372, + "learning_rate": 1.3282356417772618e-05, + "loss": 0.7519750595092773, + "step": 2407 + }, + { + "epoch": 0.8486343612334801, + "grad_norm": 1.445619912428175, + "learning_rate": 1.3276852271460406e-05, + "loss": 0.7041791081428528, + "step": 2408 + }, + { + "epoch": 0.8489867841409692, + "grad_norm": 1.3131157575910486, + "learning_rate": 1.327134701275883e-05, + "loss": 0.5649428367614746, + "step": 2409 + }, + { + "epoch": 0.8493392070484581, + "grad_norm": 1.838398891045019, + "learning_rate": 1.3265840643536746e-05, + "loss": 0.6607545614242554, + "step": 2410 + }, + { + "epoch": 0.8496916299559472, + "grad_norm": 1.590568626194504, + "learning_rate": 1.3260333165663406e-05, + "loss": 0.7393547892570496, + "step": 2411 + }, + { + "epoch": 0.8500440528634361, + "grad_norm": 1.660269046740627, + "learning_rate": 1.325482458100842e-05, + "loss": 0.6550742387771606, + "step": 2412 + }, + { + "epoch": 0.8503964757709251, + "grad_norm": 1.3409806360783354, + "learning_rate": 1.324931489144178e-05, + "loss": 0.5104576349258423, + "step": 2413 + }, + { + "epoch": 0.8507488986784141, + "grad_norm": 1.7056036938051933, + "learning_rate": 1.3243804098833859e-05, + "loss": 0.7679733037948608, + "step": 2414 + }, + { + "epoch": 0.8511013215859031, + "grad_norm": 1.3058704920771766, + "learning_rate": 1.3238292205055397e-05, + "loss": 0.6516377925872803, + "step": 2415 + }, + { + "epoch": 0.8514537444933921, + "grad_norm": 1.4749751578789572, + "learning_rate": 1.3232779211977509e-05, + "loss": 0.8509281277656555, + "step": 2416 + }, + { + "epoch": 0.8518061674008811, + "grad_norm": 1.6532741255389543, + "learning_rate": 1.3227265121471691e-05, + "loss": 0.5643317103385925, + "step": 2417 + }, + { + "epoch": 0.85215859030837, + "grad_norm": 1.4681710603298503, + "learning_rate": 1.3221749935409798e-05, + "loss": 0.5294302105903625, + "step": 2418 + }, + { + "epoch": 0.852511013215859, + "grad_norm": 1.4914498870655002, + "learning_rate": 1.3216233655664067e-05, + "loss": 0.6301594972610474, + "step": 2419 + }, + { + "epoch": 0.852863436123348, + "grad_norm": 1.399957922496421, + "learning_rate": 1.32107162841071e-05, + "loss": 0.6930294036865234, + "step": 2420 + }, + { + "epoch": 0.853215859030837, + "grad_norm": 1.4069779391578274, + "learning_rate": 1.3205197822611876e-05, + "loss": 0.6266883611679077, + "step": 2421 + }, + { + "epoch": 0.853568281938326, + "grad_norm": 1.7817063662748283, + "learning_rate": 1.3199678273051743e-05, + "loss": 0.7789868116378784, + "step": 2422 + }, + { + "epoch": 0.853920704845815, + "grad_norm": 1.3387299141459739, + "learning_rate": 1.3194157637300416e-05, + "loss": 0.7148274779319763, + "step": 2423 + }, + { + "epoch": 0.854273127753304, + "grad_norm": 1.4757263125304436, + "learning_rate": 1.3188635917231972e-05, + "loss": 0.550403356552124, + "step": 2424 + }, + { + "epoch": 0.8546255506607929, + "grad_norm": 1.563076871593329, + "learning_rate": 1.3183113114720872e-05, + "loss": 0.6650338768959045, + "step": 2425 + }, + { + "epoch": 0.854977973568282, + "grad_norm": 1.569123753374588, + "learning_rate": 1.317758923164193e-05, + "loss": 0.7774436473846436, + "step": 2426 + }, + { + "epoch": 0.8553303964757709, + "grad_norm": 1.407079429107656, + "learning_rate": 1.3172064269870335e-05, + "loss": 0.6192025542259216, + "step": 2427 + }, + { + "epoch": 0.85568281938326, + "grad_norm": 1.6230407627498116, + "learning_rate": 1.3166538231281635e-05, + "loss": 0.6758309602737427, + "step": 2428 + }, + { + "epoch": 0.8560352422907489, + "grad_norm": 1.6026256588862147, + "learning_rate": 1.3161011117751756e-05, + "loss": 0.7311116456985474, + "step": 2429 + }, + { + "epoch": 0.8563876651982378, + "grad_norm": 1.797024553793142, + "learning_rate": 1.3155482931156977e-05, + "loss": 0.7525666952133179, + "step": 2430 + }, + { + "epoch": 0.8567400881057269, + "grad_norm": 1.7067244433524313, + "learning_rate": 1.3149953673373945e-05, + "loss": 0.6903671026229858, + "step": 2431 + }, + { + "epoch": 0.8570925110132158, + "grad_norm": 1.2833360218942749, + "learning_rate": 1.314442334627967e-05, + "loss": 0.6036638021469116, + "step": 2432 + }, + { + "epoch": 0.8574449339207049, + "grad_norm": 1.6354054518430503, + "learning_rate": 1.3138891951751526e-05, + "loss": 0.6490209698677063, + "step": 2433 + }, + { + "epoch": 0.8577973568281938, + "grad_norm": 1.6970156912379664, + "learning_rate": 1.3133359491667252e-05, + "loss": 0.692024290561676, + "step": 2434 + }, + { + "epoch": 0.8581497797356828, + "grad_norm": 1.4031255607051936, + "learning_rate": 1.3127825967904944e-05, + "loss": 0.6977943181991577, + "step": 2435 + }, + { + "epoch": 0.8585022026431718, + "grad_norm": 1.3842045822286646, + "learning_rate": 1.312229138234306e-05, + "loss": 0.625649094581604, + "step": 2436 + }, + { + "epoch": 0.8588546255506608, + "grad_norm": 1.5910466082409926, + "learning_rate": 1.3116755736860422e-05, + "loss": 0.671939492225647, + "step": 2437 + }, + { + "epoch": 0.8592070484581498, + "grad_norm": 1.3856883940296008, + "learning_rate": 1.3111219033336211e-05, + "loss": 0.700029194355011, + "step": 2438 + }, + { + "epoch": 0.8595594713656388, + "grad_norm": 1.3907118477619378, + "learning_rate": 1.3105681273649959e-05, + "loss": 0.6339718699455261, + "step": 2439 + }, + { + "epoch": 0.8599118942731278, + "grad_norm": 1.306943148235595, + "learning_rate": 1.3100142459681569e-05, + "loss": 0.7105488777160645, + "step": 2440 + }, + { + "epoch": 0.8602643171806167, + "grad_norm": 1.4503861250177865, + "learning_rate": 1.3094602593311294e-05, + "loss": 0.616797924041748, + "step": 2441 + }, + { + "epoch": 0.8606167400881057, + "grad_norm": 1.5110286813274958, + "learning_rate": 1.3089061676419746e-05, + "loss": 0.7167524099349976, + "step": 2442 + }, + { + "epoch": 0.8609691629955947, + "grad_norm": 1.5215961993133658, + "learning_rate": 1.3083519710887895e-05, + "loss": 0.5499090552330017, + "step": 2443 + }, + { + "epoch": 0.8613215859030837, + "grad_norm": 1.4623789546240658, + "learning_rate": 1.3077976698597064e-05, + "loss": 0.5764151811599731, + "step": 2444 + }, + { + "epoch": 0.8616740088105727, + "grad_norm": 1.438510619597336, + "learning_rate": 1.3072432641428931e-05, + "loss": 0.7171419858932495, + "step": 2445 + }, + { + "epoch": 0.8620264317180617, + "grad_norm": 1.3023250448197168, + "learning_rate": 1.3066887541265539e-05, + "loss": 0.7546026706695557, + "step": 2446 + }, + { + "epoch": 0.8623788546255506, + "grad_norm": 1.2250371592811133, + "learning_rate": 1.306134139998927e-05, + "loss": 0.5884296298027039, + "step": 2447 + }, + { + "epoch": 0.8627312775330397, + "grad_norm": 1.3135127283076564, + "learning_rate": 1.3055794219482867e-05, + "loss": 0.6877926588058472, + "step": 2448 + }, + { + "epoch": 0.8630837004405286, + "grad_norm": 1.5935068741769265, + "learning_rate": 1.3050246001629425e-05, + "loss": 0.598037838935852, + "step": 2449 + }, + { + "epoch": 0.8634361233480177, + "grad_norm": 1.4128431939298278, + "learning_rate": 1.3044696748312395e-05, + "loss": 0.6560795307159424, + "step": 2450 + }, + { + "epoch": 0.8637885462555066, + "grad_norm": 1.5856094022002207, + "learning_rate": 1.3039146461415575e-05, + "loss": 0.7130829691886902, + "step": 2451 + }, + { + "epoch": 0.8641409691629955, + "grad_norm": 1.9167144031452974, + "learning_rate": 1.303359514282311e-05, + "loss": 0.7402251958847046, + "step": 2452 + }, + { + "epoch": 0.8644933920704846, + "grad_norm": 1.4143817039312587, + "learning_rate": 1.3028042794419502e-05, + "loss": 0.6610683798789978, + "step": 2453 + }, + { + "epoch": 0.8648458149779735, + "grad_norm": 1.6544654323663863, + "learning_rate": 1.3022489418089606e-05, + "loss": 0.84892737865448, + "step": 2454 + }, + { + "epoch": 0.8651982378854626, + "grad_norm": 1.689285386487206, + "learning_rate": 1.3016935015718612e-05, + "loss": 0.7285948991775513, + "step": 2455 + }, + { + "epoch": 0.8655506607929515, + "grad_norm": 1.46262615014944, + "learning_rate": 1.3011379589192074e-05, + "loss": 0.6800004839897156, + "step": 2456 + }, + { + "epoch": 0.8659030837004406, + "grad_norm": 1.492659523558787, + "learning_rate": 1.3005823140395878e-05, + "loss": 0.618618369102478, + "step": 2457 + }, + { + "epoch": 0.8662555066079295, + "grad_norm": 1.8084387802865425, + "learning_rate": 1.3000265671216278e-05, + "loss": 0.7657757997512817, + "step": 2458 + }, + { + "epoch": 0.8666079295154185, + "grad_norm": 1.5490708834885107, + "learning_rate": 1.2994707183539848e-05, + "loss": 0.7814151644706726, + "step": 2459 + }, + { + "epoch": 0.8669603524229075, + "grad_norm": 1.2899412950022648, + "learning_rate": 1.2989147679253531e-05, + "loss": 0.6494930982589722, + "step": 2460 + }, + { + "epoch": 0.8673127753303965, + "grad_norm": 1.5543724658760723, + "learning_rate": 1.2983587160244602e-05, + "loss": 0.6498425006866455, + "step": 2461 + }, + { + "epoch": 0.8676651982378855, + "grad_norm": 1.5210228165977844, + "learning_rate": 1.2978025628400684e-05, + "loss": 0.635313093662262, + "step": 2462 + }, + { + "epoch": 0.8680176211453744, + "grad_norm": 1.500755936886382, + "learning_rate": 1.2972463085609744e-05, + "loss": 0.6892971992492676, + "step": 2463 + }, + { + "epoch": 0.8683700440528634, + "grad_norm": 1.3872566957567176, + "learning_rate": 1.2966899533760095e-05, + "loss": 0.691922128200531, + "step": 2464 + }, + { + "epoch": 0.8687224669603524, + "grad_norm": 1.773327696286038, + "learning_rate": 1.2961334974740386e-05, + "loss": 0.5764378309249878, + "step": 2465 + }, + { + "epoch": 0.8690748898678414, + "grad_norm": 1.6231464224655543, + "learning_rate": 1.2955769410439616e-05, + "loss": 0.8193005919456482, + "step": 2466 + }, + { + "epoch": 0.8694273127753304, + "grad_norm": 1.4243504226778951, + "learning_rate": 1.2950202842747115e-05, + "loss": 0.6141501665115356, + "step": 2467 + }, + { + "epoch": 0.8697797356828194, + "grad_norm": 1.5061592811010869, + "learning_rate": 1.2944635273552565e-05, + "loss": 0.7464454174041748, + "step": 2468 + }, + { + "epoch": 0.8701321585903083, + "grad_norm": 1.3349759192393535, + "learning_rate": 1.293906670474598e-05, + "loss": 0.5970025062561035, + "step": 2469 + }, + { + "epoch": 0.8704845814977974, + "grad_norm": 1.6022434524431073, + "learning_rate": 1.2933497138217714e-05, + "loss": 0.7247673273086548, + "step": 2470 + }, + { + "epoch": 0.8708370044052863, + "grad_norm": 1.535051650641408, + "learning_rate": 1.2927926575858463e-05, + "loss": 0.746272087097168, + "step": 2471 + }, + { + "epoch": 0.8711894273127754, + "grad_norm": 1.5072596947359789, + "learning_rate": 1.2922355019559265e-05, + "loss": 0.6918776035308838, + "step": 2472 + }, + { + "epoch": 0.8715418502202643, + "grad_norm": 1.553343209452483, + "learning_rate": 1.2916782471211478e-05, + "loss": 0.6056039929389954, + "step": 2473 + }, + { + "epoch": 0.8718942731277532, + "grad_norm": 1.3670048649799473, + "learning_rate": 1.2911208932706821e-05, + "loss": 0.6699481010437012, + "step": 2474 + }, + { + "epoch": 0.8722466960352423, + "grad_norm": 1.4719810242076543, + "learning_rate": 1.2905634405937327e-05, + "loss": 0.5141814947128296, + "step": 2475 + }, + { + "epoch": 0.8725991189427312, + "grad_norm": 1.5819338229003952, + "learning_rate": 1.2900058892795383e-05, + "loss": 0.7521284818649292, + "step": 2476 + }, + { + "epoch": 0.8729515418502203, + "grad_norm": 2.2082732494247916, + "learning_rate": 1.2894482395173695e-05, + "loss": 0.6878937482833862, + "step": 2477 + }, + { + "epoch": 0.8733039647577092, + "grad_norm": 1.3942904192465777, + "learning_rate": 1.2888904914965317e-05, + "loss": 0.5963379144668579, + "step": 2478 + }, + { + "epoch": 0.8736563876651983, + "grad_norm": 1.7634340153188761, + "learning_rate": 1.2883326454063623e-05, + "loss": 0.7572320103645325, + "step": 2479 + }, + { + "epoch": 0.8740088105726872, + "grad_norm": 1.399026210420982, + "learning_rate": 1.2877747014362334e-05, + "loss": 0.7047982215881348, + "step": 2480 + }, + { + "epoch": 0.8743612334801762, + "grad_norm": 2.0588397887454715, + "learning_rate": 1.2872166597755488e-05, + "loss": 0.6449024677276611, + "step": 2481 + }, + { + "epoch": 0.8747136563876652, + "grad_norm": 1.6446468607591163, + "learning_rate": 1.2866585206137469e-05, + "loss": 0.7590922117233276, + "step": 2482 + }, + { + "epoch": 0.8750660792951542, + "grad_norm": 1.6164965426300901, + "learning_rate": 1.2861002841402983e-05, + "loss": 0.7534210085868835, + "step": 2483 + }, + { + "epoch": 0.8754185022026432, + "grad_norm": 1.9198456186069754, + "learning_rate": 1.2855419505447073e-05, + "loss": 0.7091225385665894, + "step": 2484 + }, + { + "epoch": 0.8757709251101321, + "grad_norm": 1.5347710098555305, + "learning_rate": 1.2849835200165104e-05, + "loss": 0.7578933835029602, + "step": 2485 + }, + { + "epoch": 0.8761233480176212, + "grad_norm": 1.3282869408675961, + "learning_rate": 1.2844249927452771e-05, + "loss": 0.5938349962234497, + "step": 2486 + }, + { + "epoch": 0.8764757709251101, + "grad_norm": 1.5090052513716286, + "learning_rate": 1.2838663689206108e-05, + "loss": 0.5726315379142761, + "step": 2487 + }, + { + "epoch": 0.8768281938325991, + "grad_norm": 1.450396836473225, + "learning_rate": 1.2833076487321465e-05, + "loss": 0.8181554079055786, + "step": 2488 + }, + { + "epoch": 0.8771806167400881, + "grad_norm": 1.71919397348368, + "learning_rate": 1.2827488323695522e-05, + "loss": 0.7465275526046753, + "step": 2489 + }, + { + "epoch": 0.8775330396475771, + "grad_norm": 1.2623461784182488, + "learning_rate": 1.2821899200225288e-05, + "loss": 0.6083456873893738, + "step": 2490 + }, + { + "epoch": 0.877885462555066, + "grad_norm": 1.4922167619772364, + "learning_rate": 1.2816309118808095e-05, + "loss": 0.6393307447433472, + "step": 2491 + }, + { + "epoch": 0.8782378854625551, + "grad_norm": 1.3846980777960398, + "learning_rate": 1.2810718081341604e-05, + "loss": 0.6562504768371582, + "step": 2492 + }, + { + "epoch": 0.878590308370044, + "grad_norm": 1.5590691123255283, + "learning_rate": 1.2805126089723798e-05, + "loss": 0.6737300753593445, + "step": 2493 + }, + { + "epoch": 0.8789427312775331, + "grad_norm": 1.7724399876158112, + "learning_rate": 1.2799533145852982e-05, + "loss": 0.6246815919876099, + "step": 2494 + }, + { + "epoch": 0.879295154185022, + "grad_norm": 1.7718655540042538, + "learning_rate": 1.2793939251627788e-05, + "loss": 0.7499577403068542, + "step": 2495 + }, + { + "epoch": 0.8796475770925111, + "grad_norm": 1.6628095797742937, + "learning_rate": 1.2788344408947171e-05, + "loss": 0.7645655870437622, + "step": 2496 + }, + { + "epoch": 0.88, + "grad_norm": 1.732888201165417, + "learning_rate": 1.27827486197104e-05, + "loss": 0.7407524585723877, + "step": 2497 + }, + { + "epoch": 0.8803524229074889, + "grad_norm": 1.590151572985607, + "learning_rate": 1.2777151885817078e-05, + "loss": 0.6401108503341675, + "step": 2498 + }, + { + "epoch": 0.880704845814978, + "grad_norm": 1.5984459598023502, + "learning_rate": 1.2771554209167116e-05, + "loss": 0.8332269191741943, + "step": 2499 + }, + { + "epoch": 0.8810572687224669, + "grad_norm": 1.61859187638703, + "learning_rate": 1.2765955591660757e-05, + "loss": 0.7677830457687378, + "step": 2500 + }, + { + "epoch": 0.881409691629956, + "grad_norm": 1.4420535275594295, + "learning_rate": 1.2760356035198553e-05, + "loss": 0.8532943725585938, + "step": 2501 + }, + { + "epoch": 0.8817621145374449, + "grad_norm": 1.3662949943021319, + "learning_rate": 1.2754755541681384e-05, + "loss": 0.6287009716033936, + "step": 2502 + }, + { + "epoch": 0.882114537444934, + "grad_norm": 1.38981570117233, + "learning_rate": 1.2749154113010432e-05, + "loss": 0.7039133310317993, + "step": 2503 + }, + { + "epoch": 0.8824669603524229, + "grad_norm": 1.6518390089780828, + "learning_rate": 1.2743551751087222e-05, + "loss": 0.6959357857704163, + "step": 2504 + }, + { + "epoch": 0.8828193832599119, + "grad_norm": 1.3554006828606007, + "learning_rate": 1.2737948457813571e-05, + "loss": 0.6862938404083252, + "step": 2505 + }, + { + "epoch": 0.8831718061674009, + "grad_norm": 1.6773466383223146, + "learning_rate": 1.273234423509163e-05, + "loss": 0.6903352737426758, + "step": 2506 + }, + { + "epoch": 0.8835242290748899, + "grad_norm": 1.374322606051121, + "learning_rate": 1.2726739084823851e-05, + "loss": 0.7226145267486572, + "step": 2507 + }, + { + "epoch": 0.8838766519823789, + "grad_norm": 1.4091144718113782, + "learning_rate": 1.2721133008913015e-05, + "loss": 0.7865043878555298, + "step": 2508 + }, + { + "epoch": 0.8842290748898678, + "grad_norm": 1.4501170174913356, + "learning_rate": 1.2715526009262209e-05, + "loss": 0.6594572067260742, + "step": 2509 + }, + { + "epoch": 0.8845814977973568, + "grad_norm": 1.3500042347590218, + "learning_rate": 1.270991808777483e-05, + "loss": 0.5967481136322021, + "step": 2510 + }, + { + "epoch": 0.8849339207048458, + "grad_norm": 1.3600104271689806, + "learning_rate": 1.2704309246354599e-05, + "loss": 0.7843632698059082, + "step": 2511 + }, + { + "epoch": 0.8852863436123348, + "grad_norm": 1.3543191802484777, + "learning_rate": 1.2698699486905538e-05, + "loss": 0.7475506067276001, + "step": 2512 + }, + { + "epoch": 0.8856387665198238, + "grad_norm": 1.4881501151953718, + "learning_rate": 1.2693088811331987e-05, + "loss": 0.8082534670829773, + "step": 2513 + }, + { + "epoch": 0.8859911894273128, + "grad_norm": 1.6899694353159702, + "learning_rate": 1.2687477221538598e-05, + "loss": 0.7421785593032837, + "step": 2514 + }, + { + "epoch": 0.8863436123348017, + "grad_norm": 1.295151070825849, + "learning_rate": 1.2681864719430328e-05, + "loss": 0.6268718242645264, + "step": 2515 + }, + { + "epoch": 0.8866960352422908, + "grad_norm": 1.595396389533138, + "learning_rate": 1.2676251306912448e-05, + "loss": 0.7285459041595459, + "step": 2516 + }, + { + "epoch": 0.8870484581497797, + "grad_norm": 1.4826705601530517, + "learning_rate": 1.2670636985890542e-05, + "loss": 0.6132184267044067, + "step": 2517 + }, + { + "epoch": 0.8874008810572688, + "grad_norm": 1.4018565352445778, + "learning_rate": 1.2665021758270488e-05, + "loss": 0.5550754070281982, + "step": 2518 + }, + { + "epoch": 0.8877533039647577, + "grad_norm": 1.3628132273232696, + "learning_rate": 1.2659405625958488e-05, + "loss": 0.5357390642166138, + "step": 2519 + }, + { + "epoch": 0.8881057268722466, + "grad_norm": 1.4153066703364516, + "learning_rate": 1.2653788590861039e-05, + "loss": 0.5858328342437744, + "step": 2520 + }, + { + "epoch": 0.8884581497797357, + "grad_norm": 1.731815068535558, + "learning_rate": 1.2648170654884955e-05, + "loss": 0.7109283208847046, + "step": 2521 + }, + { + "epoch": 0.8888105726872246, + "grad_norm": 1.9753429482306435, + "learning_rate": 1.2642551819937348e-05, + "loss": 0.808137834072113, + "step": 2522 + }, + { + "epoch": 0.8891629955947137, + "grad_norm": 1.6385693606484741, + "learning_rate": 1.2636932087925637e-05, + "loss": 0.587998628616333, + "step": 2523 + }, + { + "epoch": 0.8895154185022026, + "grad_norm": 1.4234526769499198, + "learning_rate": 1.2631311460757545e-05, + "loss": 0.5555537343025208, + "step": 2524 + }, + { + "epoch": 0.8898678414096917, + "grad_norm": 1.4118650122814267, + "learning_rate": 1.2625689940341102e-05, + "loss": 0.641632080078125, + "step": 2525 + }, + { + "epoch": 0.8902202643171806, + "grad_norm": 1.5401015682174186, + "learning_rate": 1.262006752858464e-05, + "loss": 0.7005184888839722, + "step": 2526 + }, + { + "epoch": 0.8905726872246696, + "grad_norm": 1.272518513643159, + "learning_rate": 1.2614444227396792e-05, + "loss": 0.6907261610031128, + "step": 2527 + }, + { + "epoch": 0.8909251101321586, + "grad_norm": 1.4162379009723582, + "learning_rate": 1.2608820038686492e-05, + "loss": 0.5757718086242676, + "step": 2528 + }, + { + "epoch": 0.8912775330396476, + "grad_norm": 1.888252337049927, + "learning_rate": 1.2603194964362979e-05, + "loss": 0.6462569832801819, + "step": 2529 + }, + { + "epoch": 0.8916299559471366, + "grad_norm": 2.6509089623338586, + "learning_rate": 1.2597569006335787e-05, + "loss": 0.7028999328613281, + "step": 2530 + }, + { + "epoch": 0.8919823788546255, + "grad_norm": 1.3325876541370223, + "learning_rate": 1.2591942166514763e-05, + "loss": 0.5789325833320618, + "step": 2531 + }, + { + "epoch": 0.8923348017621145, + "grad_norm": 1.5373223041612576, + "learning_rate": 1.258631444681003e-05, + "loss": 0.6545255184173584, + "step": 2532 + }, + { + "epoch": 0.8926872246696035, + "grad_norm": 1.560686991488605, + "learning_rate": 1.258068584913204e-05, + "loss": 0.7227469682693481, + "step": 2533 + }, + { + "epoch": 0.8930396475770925, + "grad_norm": 1.3545909427052794, + "learning_rate": 1.2575056375391513e-05, + "loss": 0.5985771417617798, + "step": 2534 + }, + { + "epoch": 0.8933920704845815, + "grad_norm": 1.5422643503857134, + "learning_rate": 1.2569426027499485e-05, + "loss": 0.6705960035324097, + "step": 2535 + }, + { + "epoch": 0.8937444933920705, + "grad_norm": 1.5427105799340322, + "learning_rate": 1.2563794807367284e-05, + "loss": 0.6662027835845947, + "step": 2536 + }, + { + "epoch": 0.8940969162995595, + "grad_norm": 1.5270286613671318, + "learning_rate": 1.2558162716906537e-05, + "loss": 0.7742453813552856, + "step": 2537 + }, + { + "epoch": 0.8944493392070485, + "grad_norm": 1.628032718158035, + "learning_rate": 1.255252975802916e-05, + "loss": 0.6124528050422668, + "step": 2538 + }, + { + "epoch": 0.8948017621145374, + "grad_norm": 1.455711423520218, + "learning_rate": 1.2546895932647365e-05, + "loss": 0.5728615522384644, + "step": 2539 + }, + { + "epoch": 0.8951541850220265, + "grad_norm": 1.5737389396802581, + "learning_rate": 1.2541261242673665e-05, + "loss": 0.6347167491912842, + "step": 2540 + }, + { + "epoch": 0.8955066079295154, + "grad_norm": 1.62324317727844, + "learning_rate": 1.2535625690020861e-05, + "loss": 0.6350656747817993, + "step": 2541 + }, + { + "epoch": 0.8958590308370044, + "grad_norm": 1.674339310689998, + "learning_rate": 1.2529989276602043e-05, + "loss": 0.7538303732872009, + "step": 2542 + }, + { + "epoch": 0.8962114537444934, + "grad_norm": 1.5900983527544528, + "learning_rate": 1.2524352004330607e-05, + "loss": 0.8154318928718567, + "step": 2543 + }, + { + "epoch": 0.8965638766519823, + "grad_norm": 1.4033932104877718, + "learning_rate": 1.2518713875120222e-05, + "loss": 0.5313037633895874, + "step": 2544 + }, + { + "epoch": 0.8969162995594714, + "grad_norm": 1.3069539051845793, + "learning_rate": 1.2513074890884864e-05, + "loss": 0.740921139717102, + "step": 2545 + }, + { + "epoch": 0.8972687224669603, + "grad_norm": 1.593785966579892, + "learning_rate": 1.250743505353879e-05, + "loss": 0.6079888343811035, + "step": 2546 + }, + { + "epoch": 0.8976211453744494, + "grad_norm": 1.266024042192646, + "learning_rate": 1.2501794364996553e-05, + "loss": 0.46736663579940796, + "step": 2547 + }, + { + "epoch": 0.8979735682819383, + "grad_norm": 1.5066472302506413, + "learning_rate": 1.2496152827172982e-05, + "loss": 0.5670880079269409, + "step": 2548 + }, + { + "epoch": 0.8983259911894274, + "grad_norm": 1.4991563073413907, + "learning_rate": 1.2490510441983212e-05, + "loss": 0.7845931649208069, + "step": 2549 + }, + { + "epoch": 0.8986784140969163, + "grad_norm": 1.5458127280177445, + "learning_rate": 1.2484867211342653e-05, + "loss": 0.5625143647193909, + "step": 2550 + }, + { + "epoch": 0.8990308370044053, + "grad_norm": 1.5409896244330605, + "learning_rate": 1.2479223137167011e-05, + "loss": 0.6631217002868652, + "step": 2551 + }, + { + "epoch": 0.8993832599118943, + "grad_norm": 1.6071757454969378, + "learning_rate": 1.247357822137227e-05, + "loss": 0.6588548421859741, + "step": 2552 + }, + { + "epoch": 0.8997356828193832, + "grad_norm": 1.4192601474848106, + "learning_rate": 1.24679324658747e-05, + "loss": 0.8046029806137085, + "step": 2553 + }, + { + "epoch": 0.9000881057268723, + "grad_norm": 1.6272051463241026, + "learning_rate": 1.2462285872590862e-05, + "loss": 0.6651894450187683, + "step": 2554 + }, + { + "epoch": 0.9004405286343612, + "grad_norm": 1.5179002680249722, + "learning_rate": 1.2456638443437605e-05, + "loss": 0.5888474583625793, + "step": 2555 + }, + { + "epoch": 0.9007929515418502, + "grad_norm": 1.7319345866859506, + "learning_rate": 1.2450990180332045e-05, + "loss": 0.5915735363960266, + "step": 2556 + }, + { + "epoch": 0.9011453744493392, + "grad_norm": 1.5409991319630119, + "learning_rate": 1.24453410851916e-05, + "loss": 0.6830431222915649, + "step": 2557 + }, + { + "epoch": 0.9014977973568282, + "grad_norm": 1.3954767744454935, + "learning_rate": 1.2439691159933955e-05, + "loss": 0.6812379956245422, + "step": 2558 + }, + { + "epoch": 0.9018502202643172, + "grad_norm": 1.3481753587360845, + "learning_rate": 1.2434040406477092e-05, + "loss": 0.6887152791023254, + "step": 2559 + }, + { + "epoch": 0.9022026431718062, + "grad_norm": 1.495436388275929, + "learning_rate": 1.2428388826739254e-05, + "loss": 0.677071213722229, + "step": 2560 + }, + { + "epoch": 0.9025550660792951, + "grad_norm": 1.5809198519920526, + "learning_rate": 1.242273642263899e-05, + "loss": 0.6635652780532837, + "step": 2561 + }, + { + "epoch": 0.9029074889867842, + "grad_norm": 1.7455357614962055, + "learning_rate": 1.2417083196095105e-05, + "loss": 0.7543712854385376, + "step": 2562 + }, + { + "epoch": 0.9032599118942731, + "grad_norm": 1.743758273604275, + "learning_rate": 1.2411429149026701e-05, + "loss": 0.6219073534011841, + "step": 2563 + }, + { + "epoch": 0.9036123348017621, + "grad_norm": 1.360518097358955, + "learning_rate": 1.2405774283353144e-05, + "loss": 0.6576533317565918, + "step": 2564 + }, + { + "epoch": 0.9039647577092511, + "grad_norm": 1.3683846685040542, + "learning_rate": 1.240011860099409e-05, + "loss": 0.6458585262298584, + "step": 2565 + }, + { + "epoch": 0.90431718061674, + "grad_norm": 1.5753618523282886, + "learning_rate": 1.2394462103869464e-05, + "loss": 0.6943198442459106, + "step": 2566 + }, + { + "epoch": 0.9046696035242291, + "grad_norm": 1.5425443594991994, + "learning_rate": 1.2388804793899473e-05, + "loss": 0.6684235334396362, + "step": 2567 + }, + { + "epoch": 0.905022026431718, + "grad_norm": 1.4432793187881665, + "learning_rate": 1.2383146673004598e-05, + "loss": 0.6707017421722412, + "step": 2568 + }, + { + "epoch": 0.9053744493392071, + "grad_norm": 1.4610510830510222, + "learning_rate": 1.2377487743105593e-05, + "loss": 0.6009544134140015, + "step": 2569 + }, + { + "epoch": 0.905726872246696, + "grad_norm": 1.3343070463381261, + "learning_rate": 1.2371828006123488e-05, + "loss": 0.57770836353302, + "step": 2570 + }, + { + "epoch": 0.9060792951541851, + "grad_norm": 1.50423514822828, + "learning_rate": 1.236616746397959e-05, + "loss": 0.6146866083145142, + "step": 2571 + }, + { + "epoch": 0.906431718061674, + "grad_norm": 1.4060902038910876, + "learning_rate": 1.2360506118595476e-05, + "loss": 0.6374951601028442, + "step": 2572 + }, + { + "epoch": 0.906784140969163, + "grad_norm": 1.5006132241656203, + "learning_rate": 1.2354843971892998e-05, + "loss": 0.6933800578117371, + "step": 2573 + }, + { + "epoch": 0.907136563876652, + "grad_norm": 1.6402374081466708, + "learning_rate": 1.2349181025794278e-05, + "loss": 0.857126772403717, + "step": 2574 + }, + { + "epoch": 0.9074889867841409, + "grad_norm": 1.7970464713795387, + "learning_rate": 1.2343517282221704e-05, + "loss": 0.7316192388534546, + "step": 2575 + }, + { + "epoch": 0.90784140969163, + "grad_norm": 1.7338748475900745, + "learning_rate": 1.2337852743097947e-05, + "loss": 0.7916824817657471, + "step": 2576 + }, + { + "epoch": 0.9081938325991189, + "grad_norm": 1.342845056559204, + "learning_rate": 1.2332187410345941e-05, + "loss": 0.6437021493911743, + "step": 2577 + }, + { + "epoch": 0.908546255506608, + "grad_norm": 1.547322536503476, + "learning_rate": 1.2326521285888892e-05, + "loss": 0.8788109421730042, + "step": 2578 + }, + { + "epoch": 0.9088986784140969, + "grad_norm": 1.4382005842040866, + "learning_rate": 1.2320854371650268e-05, + "loss": 0.704395055770874, + "step": 2579 + }, + { + "epoch": 0.9092511013215859, + "grad_norm": 1.410037340911335, + "learning_rate": 1.2315186669553814e-05, + "loss": 0.6579844951629639, + "step": 2580 + }, + { + "epoch": 0.9096035242290749, + "grad_norm": 1.3089054036910626, + "learning_rate": 1.2309518181523537e-05, + "loss": 0.6329941749572754, + "step": 2581 + }, + { + "epoch": 0.9099559471365639, + "grad_norm": 1.746183595307062, + "learning_rate": 1.2303848909483711e-05, + "loss": 0.8868603706359863, + "step": 2582 + }, + { + "epoch": 0.9103083700440529, + "grad_norm": 1.4531546458491524, + "learning_rate": 1.2298178855358875e-05, + "loss": 0.6402688026428223, + "step": 2583 + }, + { + "epoch": 0.9106607929515419, + "grad_norm": 1.3289180353613772, + "learning_rate": 1.2292508021073846e-05, + "loss": 0.8017194271087646, + "step": 2584 + }, + { + "epoch": 0.9110132158590308, + "grad_norm": 1.6340808373356166, + "learning_rate": 1.2286836408553687e-05, + "loss": 0.7396517992019653, + "step": 2585 + }, + { + "epoch": 0.9113656387665198, + "grad_norm": 1.5443847526543046, + "learning_rate": 1.2281164019723737e-05, + "loss": 0.6123272776603699, + "step": 2586 + }, + { + "epoch": 0.9117180616740088, + "grad_norm": 1.464544186162697, + "learning_rate": 1.2275490856509591e-05, + "loss": 0.7675807476043701, + "step": 2587 + }, + { + "epoch": 0.9120704845814978, + "grad_norm": 1.67164115622116, + "learning_rate": 1.2269816920837121e-05, + "loss": 0.6814998388290405, + "step": 2588 + }, + { + "epoch": 0.9124229074889868, + "grad_norm": 1.3228366401729674, + "learning_rate": 1.2264142214632441e-05, + "loss": 0.6290348768234253, + "step": 2589 + }, + { + "epoch": 0.9127753303964757, + "grad_norm": 1.5676260945728981, + "learning_rate": 1.2258466739821946e-05, + "loss": 0.6752464175224304, + "step": 2590 + }, + { + "epoch": 0.9131277533039648, + "grad_norm": 1.3388236473063337, + "learning_rate": 1.2252790498332275e-05, + "loss": 0.6153687238693237, + "step": 2591 + }, + { + "epoch": 0.9134801762114537, + "grad_norm": 1.5346187118504635, + "learning_rate": 1.2247113492090344e-05, + "loss": 0.5952479839324951, + "step": 2592 + }, + { + "epoch": 0.9138325991189428, + "grad_norm": 1.4457638395568853, + "learning_rate": 1.2241435723023309e-05, + "loss": 0.5457659959793091, + "step": 2593 + }, + { + "epoch": 0.9141850220264317, + "grad_norm": 1.5389040689398128, + "learning_rate": 1.2235757193058607e-05, + "loss": 0.7373491525650024, + "step": 2594 + }, + { + "epoch": 0.9145374449339208, + "grad_norm": 1.3149945847764668, + "learning_rate": 1.2230077904123914e-05, + "loss": 0.6564488410949707, + "step": 2595 + }, + { + "epoch": 0.9148898678414097, + "grad_norm": 1.8716233271125673, + "learning_rate": 1.2224397858147176e-05, + "loss": 0.6790947914123535, + "step": 2596 + }, + { + "epoch": 0.9152422907488986, + "grad_norm": 1.6467277287942856, + "learning_rate": 1.2218717057056592e-05, + "loss": 0.8304486274719238, + "step": 2597 + }, + { + "epoch": 0.9155947136563877, + "grad_norm": 1.7018746535629268, + "learning_rate": 1.2213035502780616e-05, + "loss": 0.7452701330184937, + "step": 2598 + }, + { + "epoch": 0.9159471365638766, + "grad_norm": 1.270448247487427, + "learning_rate": 1.2207353197247957e-05, + "loss": 0.572200357913971, + "step": 2599 + }, + { + "epoch": 0.9162995594713657, + "grad_norm": 1.574291214704138, + "learning_rate": 1.2201670142387587e-05, + "loss": 0.7142342925071716, + "step": 2600 + }, + { + "epoch": 0.9166519823788546, + "grad_norm": 1.367606009894927, + "learning_rate": 1.219598634012872e-05, + "loss": 0.9390528202056885, + "step": 2601 + }, + { + "epoch": 0.9170044052863436, + "grad_norm": 1.6870829349403977, + "learning_rate": 1.2190301792400832e-05, + "loss": 0.6897540092468262, + "step": 2602 + }, + { + "epoch": 0.9173568281938326, + "grad_norm": 1.5631074773710765, + "learning_rate": 1.2184616501133649e-05, + "loss": 0.7309582233428955, + "step": 2603 + }, + { + "epoch": 0.9177092511013216, + "grad_norm": 1.4956685909345118, + "learning_rate": 1.2178930468257154e-05, + "loss": 0.7692370414733887, + "step": 2604 + }, + { + "epoch": 0.9180616740088106, + "grad_norm": 1.6160577913139176, + "learning_rate": 1.2173243695701575e-05, + "loss": 0.7650456428527832, + "step": 2605 + }, + { + "epoch": 0.9184140969162996, + "grad_norm": 1.4419682356133905, + "learning_rate": 1.2167556185397396e-05, + "loss": 0.6000699996948242, + "step": 2606 + }, + { + "epoch": 0.9187665198237885, + "grad_norm": 1.368037173998054, + "learning_rate": 1.2161867939275344e-05, + "loss": 0.6227651834487915, + "step": 2607 + }, + { + "epoch": 0.9191189427312776, + "grad_norm": 1.3507337866227296, + "learning_rate": 1.2156178959266414e-05, + "loss": 0.6554160118103027, + "step": 2608 + }, + { + "epoch": 0.9194713656387665, + "grad_norm": 1.4986959017577084, + "learning_rate": 1.2150489247301826e-05, + "loss": 0.5360773801803589, + "step": 2609 + }, + { + "epoch": 0.9198237885462555, + "grad_norm": 1.3546990782009203, + "learning_rate": 1.2144798805313065e-05, + "loss": 0.7184062004089355, + "step": 2610 + }, + { + "epoch": 0.9201762114537445, + "grad_norm": 1.6293146255106934, + "learning_rate": 1.2139107635231857e-05, + "loss": 0.646910548210144, + "step": 2611 + }, + { + "epoch": 0.9205286343612334, + "grad_norm": 1.449047238736513, + "learning_rate": 1.2133415738990179e-05, + "loss": 0.7794413566589355, + "step": 2612 + }, + { + "epoch": 0.9208810572687225, + "grad_norm": 1.5173448374489182, + "learning_rate": 1.2127723118520254e-05, + "loss": 0.5904654860496521, + "step": 2613 + }, + { + "epoch": 0.9212334801762114, + "grad_norm": 1.6062827687776695, + "learning_rate": 1.2122029775754545e-05, + "loss": 0.5526635646820068, + "step": 2614 + }, + { + "epoch": 0.9215859030837005, + "grad_norm": 1.584080412995617, + "learning_rate": 1.2116335712625766e-05, + "loss": 0.6832528710365295, + "step": 2615 + }, + { + "epoch": 0.9219383259911894, + "grad_norm": 1.5962919739796952, + "learning_rate": 1.211064093106688e-05, + "loss": 0.5858304500579834, + "step": 2616 + }, + { + "epoch": 0.9222907488986785, + "grad_norm": 1.6542154949587857, + "learning_rate": 1.2104945433011079e-05, + "loss": 0.7383478879928589, + "step": 2617 + }, + { + "epoch": 0.9226431718061674, + "grad_norm": 1.4197774198085091, + "learning_rate": 1.2099249220391815e-05, + "loss": 0.6466768980026245, + "step": 2618 + }, + { + "epoch": 0.9229955947136564, + "grad_norm": 1.6780588288371647, + "learning_rate": 1.209355229514277e-05, + "loss": 0.5681238174438477, + "step": 2619 + }, + { + "epoch": 0.9233480176211454, + "grad_norm": 1.4473210287022626, + "learning_rate": 1.2087854659197874e-05, + "loss": 0.5726606249809265, + "step": 2620 + }, + { + "epoch": 0.9237004405286343, + "grad_norm": 1.5671254030487451, + "learning_rate": 1.2082156314491298e-05, + "loss": 0.6643342971801758, + "step": 2621 + }, + { + "epoch": 0.9240528634361234, + "grad_norm": 1.6891696074210503, + "learning_rate": 1.2076457262957454e-05, + "loss": 0.5408967733383179, + "step": 2622 + }, + { + "epoch": 0.9244052863436123, + "grad_norm": 1.503887173232949, + "learning_rate": 1.207075750653099e-05, + "loss": 0.706169843673706, + "step": 2623 + }, + { + "epoch": 0.9247577092511013, + "grad_norm": 1.7934999117659478, + "learning_rate": 1.2065057047146797e-05, + "loss": 0.7973969578742981, + "step": 2624 + }, + { + "epoch": 0.9251101321585903, + "grad_norm": 1.4120942109312036, + "learning_rate": 1.2059355886740002e-05, + "loss": 0.6907010674476624, + "step": 2625 + }, + { + "epoch": 0.9254625550660793, + "grad_norm": 1.8378017160561377, + "learning_rate": 1.2053654027245977e-05, + "loss": 0.8174253702163696, + "step": 2626 + }, + { + "epoch": 0.9258149779735683, + "grad_norm": 1.8873519737119473, + "learning_rate": 1.204795147060032e-05, + "loss": 0.60319983959198, + "step": 2627 + }, + { + "epoch": 0.9261674008810573, + "grad_norm": 2.916318649806586, + "learning_rate": 1.204224821873887e-05, + "loss": 0.718228816986084, + "step": 2628 + }, + { + "epoch": 0.9265198237885462, + "grad_norm": 1.5801609410641386, + "learning_rate": 1.2036544273597708e-05, + "loss": 0.7385132312774658, + "step": 2629 + }, + { + "epoch": 0.9268722466960353, + "grad_norm": 1.5320403236251587, + "learning_rate": 1.203083963711315e-05, + "loss": 0.7700635194778442, + "step": 2630 + }, + { + "epoch": 0.9272246696035242, + "grad_norm": 1.4381703720368488, + "learning_rate": 1.2025134311221732e-05, + "loss": 0.8767666816711426, + "step": 2631 + }, + { + "epoch": 0.9275770925110132, + "grad_norm": 1.4534975042510074, + "learning_rate": 1.2019428297860241e-05, + "loss": 0.6517986059188843, + "step": 2632 + }, + { + "epoch": 0.9279295154185022, + "grad_norm": 1.3295910752440807, + "learning_rate": 1.2013721598965688e-05, + "loss": 0.5967941284179688, + "step": 2633 + }, + { + "epoch": 0.9282819383259912, + "grad_norm": 2.085302745009741, + "learning_rate": 1.2008014216475327e-05, + "loss": 0.7480533123016357, + "step": 2634 + }, + { + "epoch": 0.9286343612334802, + "grad_norm": 1.415633444981562, + "learning_rate": 1.2002306152326626e-05, + "loss": 0.8020488023757935, + "step": 2635 + }, + { + "epoch": 0.9289867841409691, + "grad_norm": 1.235581839334599, + "learning_rate": 1.1996597408457302e-05, + "loss": 0.5535889863967896, + "step": 2636 + }, + { + "epoch": 0.9293392070484582, + "grad_norm": 1.5093780754929471, + "learning_rate": 1.1990887986805295e-05, + "loss": 0.6888864040374756, + "step": 2637 + }, + { + "epoch": 0.9296916299559471, + "grad_norm": 1.761723253773031, + "learning_rate": 1.1985177889308777e-05, + "loss": 0.7723515033721924, + "step": 2638 + }, + { + "epoch": 0.9300440528634362, + "grad_norm": 2.4386861549294476, + "learning_rate": 1.1979467117906143e-05, + "loss": 0.6929488182067871, + "step": 2639 + }, + { + "epoch": 0.9303964757709251, + "grad_norm": 1.7413716913523498, + "learning_rate": 1.1973755674536027e-05, + "loss": 0.7025216221809387, + "step": 2640 + }, + { + "epoch": 0.9307488986784141, + "grad_norm": 1.5278537581621425, + "learning_rate": 1.1968043561137287e-05, + "loss": 0.6618740558624268, + "step": 2641 + }, + { + "epoch": 0.9311013215859031, + "grad_norm": 1.3720349025623486, + "learning_rate": 1.1962330779649002e-05, + "loss": 0.5308352708816528, + "step": 2642 + }, + { + "epoch": 0.931453744493392, + "grad_norm": 1.6043152610659495, + "learning_rate": 1.1956617332010488e-05, + "loss": 0.6559470891952515, + "step": 2643 + }, + { + "epoch": 0.9318061674008811, + "grad_norm": 1.5758989244918422, + "learning_rate": 1.1950903220161286e-05, + "loss": 0.6572221517562866, + "step": 2644 + }, + { + "epoch": 0.93215859030837, + "grad_norm": 1.7357943090474917, + "learning_rate": 1.194518844604115e-05, + "loss": 0.7854161262512207, + "step": 2645 + }, + { + "epoch": 0.932511013215859, + "grad_norm": 1.555855365183626, + "learning_rate": 1.1939473011590075e-05, + "loss": 0.6471760869026184, + "step": 2646 + }, + { + "epoch": 0.932863436123348, + "grad_norm": 1.5672890328663938, + "learning_rate": 1.1933756918748271e-05, + "loss": 0.6261184215545654, + "step": 2647 + }, + { + "epoch": 0.933215859030837, + "grad_norm": 1.425764950800843, + "learning_rate": 1.1928040169456176e-05, + "loss": 0.6876180171966553, + "step": 2648 + }, + { + "epoch": 0.933568281938326, + "grad_norm": 1.6203483271740744, + "learning_rate": 1.1922322765654446e-05, + "loss": 0.6782447099685669, + "step": 2649 + }, + { + "epoch": 0.933920704845815, + "grad_norm": 1.640471126849017, + "learning_rate": 1.1916604709283958e-05, + "loss": 0.6085894107818604, + "step": 2650 + }, + { + "epoch": 0.934273127753304, + "grad_norm": 1.6964969219798813, + "learning_rate": 1.1910886002285822e-05, + "loss": 0.6940577030181885, + "step": 2651 + }, + { + "epoch": 0.934625550660793, + "grad_norm": 1.4704189591593113, + "learning_rate": 1.1905166646601356e-05, + "loss": 0.8204144239425659, + "step": 2652 + }, + { + "epoch": 0.9349779735682819, + "grad_norm": 1.389489538033466, + "learning_rate": 1.1899446644172106e-05, + "loss": 0.6184309720993042, + "step": 2653 + }, + { + "epoch": 0.9353303964757709, + "grad_norm": 2.1507675107714306, + "learning_rate": 1.1893725996939831e-05, + "loss": 0.7499250173568726, + "step": 2654 + }, + { + "epoch": 0.9356828193832599, + "grad_norm": 1.739709417281562, + "learning_rate": 1.1888004706846519e-05, + "loss": 0.7021058797836304, + "step": 2655 + }, + { + "epoch": 0.9360352422907489, + "grad_norm": 1.4311959050457856, + "learning_rate": 1.188228277583436e-05, + "loss": 0.6005666255950928, + "step": 2656 + }, + { + "epoch": 0.9363876651982379, + "grad_norm": 1.4910024814198868, + "learning_rate": 1.1876560205845782e-05, + "loss": 0.6572481393814087, + "step": 2657 + }, + { + "epoch": 0.9367400881057268, + "grad_norm": 1.5258435486694566, + "learning_rate": 1.187083699882341e-05, + "loss": 0.7402434349060059, + "step": 2658 + }, + { + "epoch": 0.9370925110132159, + "grad_norm": 1.4352893489445113, + "learning_rate": 1.1865113156710106e-05, + "loss": 0.6693596243858337, + "step": 2659 + }, + { + "epoch": 0.9374449339207048, + "grad_norm": 1.6704808140330663, + "learning_rate": 1.1859388681448925e-05, + "loss": 0.7708617448806763, + "step": 2660 + }, + { + "epoch": 0.9377973568281939, + "grad_norm": 1.4245143913781195, + "learning_rate": 1.1853663574983154e-05, + "loss": 0.5871701836585999, + "step": 2661 + }, + { + "epoch": 0.9381497797356828, + "grad_norm": 1.505716027406483, + "learning_rate": 1.1847937839256287e-05, + "loss": 0.6492994427680969, + "step": 2662 + }, + { + "epoch": 0.9385022026431719, + "grad_norm": 1.3908643684674444, + "learning_rate": 1.1842211476212038e-05, + "loss": 0.6803429126739502, + "step": 2663 + }, + { + "epoch": 0.9388546255506608, + "grad_norm": 1.5017846140199234, + "learning_rate": 1.1836484487794318e-05, + "loss": 0.5602244734764099, + "step": 2664 + }, + { + "epoch": 0.9392070484581497, + "grad_norm": 1.2797636855685697, + "learning_rate": 1.183075687594727e-05, + "loss": 0.6562157869338989, + "step": 2665 + }, + { + "epoch": 0.9395594713656388, + "grad_norm": 1.4855818018568143, + "learning_rate": 1.182502864261524e-05, + "loss": 0.71474289894104, + "step": 2666 + }, + { + "epoch": 0.9399118942731277, + "grad_norm": 1.5995143445420303, + "learning_rate": 1.1819299789742782e-05, + "loss": 0.7130062580108643, + "step": 2667 + }, + { + "epoch": 0.9402643171806168, + "grad_norm": 1.645740195320987, + "learning_rate": 1.1813570319274663e-05, + "loss": 0.788813054561615, + "step": 2668 + }, + { + "epoch": 0.9406167400881057, + "grad_norm": 1.965041520497338, + "learning_rate": 1.1807840233155863e-05, + "loss": 0.6485022306442261, + "step": 2669 + }, + { + "epoch": 0.9409691629955947, + "grad_norm": 1.6399057690578631, + "learning_rate": 1.1802109533331562e-05, + "loss": 0.4491521418094635, + "step": 2670 + }, + { + "epoch": 0.9413215859030837, + "grad_norm": 1.6744760497066637, + "learning_rate": 1.1796378221747162e-05, + "loss": 0.6073683500289917, + "step": 2671 + }, + { + "epoch": 0.9416740088105727, + "grad_norm": 1.859395754773969, + "learning_rate": 1.179064630034826e-05, + "loss": 0.5942971706390381, + "step": 2672 + }, + { + "epoch": 0.9420264317180617, + "grad_norm": 1.4303169952284007, + "learning_rate": 1.1784913771080667e-05, + "loss": 0.7295013666152954, + "step": 2673 + }, + { + "epoch": 0.9423788546255507, + "grad_norm": 1.8192026049611665, + "learning_rate": 1.1779180635890394e-05, + "loss": 0.7347372770309448, + "step": 2674 + }, + { + "epoch": 0.9427312775330396, + "grad_norm": 1.5350977995485566, + "learning_rate": 1.1773446896723668e-05, + "loss": 0.5591942667961121, + "step": 2675 + }, + { + "epoch": 0.9430837004405286, + "grad_norm": 1.5036340589436215, + "learning_rate": 1.1767712555526911e-05, + "loss": 0.822568953037262, + "step": 2676 + }, + { + "epoch": 0.9434361233480176, + "grad_norm": 1.4619836017557306, + "learning_rate": 1.1761977614246757e-05, + "loss": 0.649920642375946, + "step": 2677 + }, + { + "epoch": 0.9437885462555066, + "grad_norm": 1.4884584586985279, + "learning_rate": 1.1756242074830036e-05, + "loss": 0.6298861503601074, + "step": 2678 + }, + { + "epoch": 0.9441409691629956, + "grad_norm": 1.6194483495779424, + "learning_rate": 1.1750505939223787e-05, + "loss": 0.81938636302948, + "step": 2679 + }, + { + "epoch": 0.9444933920704845, + "grad_norm": 1.4751430048371623, + "learning_rate": 1.1744769209375248e-05, + "loss": 0.6627225875854492, + "step": 2680 + }, + { + "epoch": 0.9448458149779736, + "grad_norm": 1.310837287475738, + "learning_rate": 1.1739031887231864e-05, + "loss": 0.6563318371772766, + "step": 2681 + }, + { + "epoch": 0.9451982378854625, + "grad_norm": 1.3782616320804129, + "learning_rate": 1.1733293974741273e-05, + "loss": 0.5702694654464722, + "step": 2682 + }, + { + "epoch": 0.9455506607929516, + "grad_norm": 1.5543579440741437, + "learning_rate": 1.1727555473851321e-05, + "loss": 0.685553789138794, + "step": 2683 + }, + { + "epoch": 0.9459030837004405, + "grad_norm": 1.2085432227797441, + "learning_rate": 1.172181638651005e-05, + "loss": 0.6092622876167297, + "step": 2684 + }, + { + "epoch": 0.9462555066079296, + "grad_norm": 2.0946243925185013, + "learning_rate": 1.1716076714665701e-05, + "loss": 0.6650614738464355, + "step": 2685 + }, + { + "epoch": 0.9466079295154185, + "grad_norm": 1.6479809864443196, + "learning_rate": 1.171033646026671e-05, + "loss": 0.7665754556655884, + "step": 2686 + }, + { + "epoch": 0.9469603524229074, + "grad_norm": 1.3199886923676785, + "learning_rate": 1.1704595625261722e-05, + "loss": 0.6365277171134949, + "step": 2687 + }, + { + "epoch": 0.9473127753303965, + "grad_norm": 1.4825934002405374, + "learning_rate": 1.1698854211599565e-05, + "loss": 0.6622267961502075, + "step": 2688 + }, + { + "epoch": 0.9476651982378854, + "grad_norm": 1.4519347010464663, + "learning_rate": 1.1693112221229278e-05, + "loss": 0.6636145710945129, + "step": 2689 + }, + { + "epoch": 0.9480176211453745, + "grad_norm": 1.3381328445735352, + "learning_rate": 1.168736965610008e-05, + "loss": 0.6943212747573853, + "step": 2690 + }, + { + "epoch": 0.9483700440528634, + "grad_norm": 1.5439836232478343, + "learning_rate": 1.1681626518161397e-05, + "loss": 0.7479512691497803, + "step": 2691 + }, + { + "epoch": 0.9487224669603525, + "grad_norm": 1.5424571304173897, + "learning_rate": 1.1675882809362846e-05, + "loss": 0.7227041721343994, + "step": 2692 + }, + { + "epoch": 0.9490748898678414, + "grad_norm": 1.3855049912904343, + "learning_rate": 1.1670138531654238e-05, + "loss": 0.7366166114807129, + "step": 2693 + }, + { + "epoch": 0.9494273127753304, + "grad_norm": 1.634945701470733, + "learning_rate": 1.1664393686985571e-05, + "loss": 0.8634493350982666, + "step": 2694 + }, + { + "epoch": 0.9497797356828194, + "grad_norm": 1.3102748532201536, + "learning_rate": 1.165864827730705e-05, + "loss": 0.5802862048149109, + "step": 2695 + }, + { + "epoch": 0.9501321585903084, + "grad_norm": 1.571840947668404, + "learning_rate": 1.1652902304569053e-05, + "loss": 0.5931085348129272, + "step": 2696 + }, + { + "epoch": 0.9504845814977974, + "grad_norm": 1.7175179856841813, + "learning_rate": 1.164715577072217e-05, + "loss": 0.7684508562088013, + "step": 2697 + }, + { + "epoch": 0.9508370044052863, + "grad_norm": 1.6094834386500196, + "learning_rate": 1.1641408677717158e-05, + "loss": 0.94246906042099, + "step": 2698 + }, + { + "epoch": 0.9511894273127753, + "grad_norm": 1.3999360216133725, + "learning_rate": 1.1635661027504985e-05, + "loss": 0.7072316408157349, + "step": 2699 + }, + { + "epoch": 0.9515418502202643, + "grad_norm": 1.5926279454886292, + "learning_rate": 1.16299128220368e-05, + "loss": 0.5872572064399719, + "step": 2700 + }, + { + "epoch": 0.9518942731277533, + "grad_norm": 1.4987885212929257, + "learning_rate": 1.1624164063263931e-05, + "loss": 0.6549060344696045, + "step": 2701 + }, + { + "epoch": 0.9522466960352423, + "grad_norm": 1.6773153304869155, + "learning_rate": 1.161841475313791e-05, + "loss": 0.7338137626647949, + "step": 2702 + }, + { + "epoch": 0.9525991189427313, + "grad_norm": 1.6523970676343225, + "learning_rate": 1.161266489361045e-05, + "loss": 0.6942911148071289, + "step": 2703 + }, + { + "epoch": 0.9529515418502202, + "grad_norm": 2.037450532351288, + "learning_rate": 1.1606914486633444e-05, + "loss": 0.674375057220459, + "step": 2704 + }, + { + "epoch": 0.9533039647577093, + "grad_norm": 1.6450610385875453, + "learning_rate": 1.160116353415898e-05, + "loss": 0.6790377497673035, + "step": 2705 + }, + { + "epoch": 0.9536563876651982, + "grad_norm": 1.6724856793361191, + "learning_rate": 1.1595412038139326e-05, + "loss": 0.5902142524719238, + "step": 2706 + }, + { + "epoch": 0.9540088105726873, + "grad_norm": 1.4286047469499437, + "learning_rate": 1.1589660000526937e-05, + "loss": 0.7034019231796265, + "step": 2707 + }, + { + "epoch": 0.9543612334801762, + "grad_norm": 3.1062423334867106, + "learning_rate": 1.158390742327445e-05, + "loss": 0.6986846923828125, + "step": 2708 + }, + { + "epoch": 0.9547136563876651, + "grad_norm": 1.8367783325674814, + "learning_rate": 1.1578154308334683e-05, + "loss": 0.6972544193267822, + "step": 2709 + }, + { + "epoch": 0.9550660792951542, + "grad_norm": 1.3370474194561557, + "learning_rate": 1.1572400657660646e-05, + "loss": 0.6312702298164368, + "step": 2710 + }, + { + "epoch": 0.9554185022026431, + "grad_norm": 1.7161015062577845, + "learning_rate": 1.1566646473205518e-05, + "loss": 0.7584360241889954, + "step": 2711 + }, + { + "epoch": 0.9557709251101322, + "grad_norm": 1.256436023255263, + "learning_rate": 1.156089175692267e-05, + "loss": 0.700894296169281, + "step": 2712 + }, + { + "epoch": 0.9561233480176211, + "grad_norm": 1.3257581819044393, + "learning_rate": 1.1555136510765645e-05, + "loss": 0.5637902617454529, + "step": 2713 + }, + { + "epoch": 0.9564757709251102, + "grad_norm": 1.388319575976614, + "learning_rate": 1.1549380736688173e-05, + "loss": 0.4537314772605896, + "step": 2714 + }, + { + "epoch": 0.9568281938325991, + "grad_norm": 1.8324279373886256, + "learning_rate": 1.1543624436644161e-05, + "loss": 0.7880423069000244, + "step": 2715 + }, + { + "epoch": 0.9571806167400881, + "grad_norm": 1.6310441104063826, + "learning_rate": 1.1537867612587692e-05, + "loss": 0.7314344644546509, + "step": 2716 + }, + { + "epoch": 0.9575330396475771, + "grad_norm": 1.7810937354544796, + "learning_rate": 1.1532110266473026e-05, + "loss": 0.9550024271011353, + "step": 2717 + }, + { + "epoch": 0.9578854625550661, + "grad_norm": 1.3474455317445524, + "learning_rate": 1.152635240025461e-05, + "loss": 0.6482470035552979, + "step": 2718 + }, + { + "epoch": 0.9582378854625551, + "grad_norm": 1.6637520992254753, + "learning_rate": 1.152059401588705e-05, + "loss": 0.6347365975379944, + "step": 2719 + }, + { + "epoch": 0.958590308370044, + "grad_norm": 1.469780222161662, + "learning_rate": 1.151483511532515e-05, + "loss": 0.7214993238449097, + "step": 2720 + }, + { + "epoch": 0.958942731277533, + "grad_norm": 1.4597118679681749, + "learning_rate": 1.1509075700523869e-05, + "loss": 0.6255312561988831, + "step": 2721 + }, + { + "epoch": 0.959295154185022, + "grad_norm": 1.4735593911126945, + "learning_rate": 1.1503315773438352e-05, + "loss": 0.6152437925338745, + "step": 2722 + }, + { + "epoch": 0.959647577092511, + "grad_norm": 1.8178378627357112, + "learning_rate": 1.1497555336023916e-05, + "loss": 0.6565401554107666, + "step": 2723 + }, + { + "epoch": 0.96, + "grad_norm": 1.5268947365741583, + "learning_rate": 1.1491794390236047e-05, + "loss": 0.796178936958313, + "step": 2724 + }, + { + "epoch": 0.960352422907489, + "grad_norm": 1.4289859748860345, + "learning_rate": 1.1486032938030409e-05, + "loss": 0.6243436336517334, + "step": 2725 + }, + { + "epoch": 0.960704845814978, + "grad_norm": 3.1702620206811036, + "learning_rate": 1.148027098136284e-05, + "loss": 0.6043159365653992, + "step": 2726 + }, + { + "epoch": 0.961057268722467, + "grad_norm": 2.2643023721896554, + "learning_rate": 1.1474508522189334e-05, + "loss": 0.7268002033233643, + "step": 2727 + }, + { + "epoch": 0.9614096916299559, + "grad_norm": 1.6105062692265093, + "learning_rate": 1.1468745562466076e-05, + "loss": 0.6156840324401855, + "step": 2728 + }, + { + "epoch": 0.961762114537445, + "grad_norm": 1.3602355982897767, + "learning_rate": 1.1462982104149409e-05, + "loss": 0.8415796756744385, + "step": 2729 + }, + { + "epoch": 0.9621145374449339, + "grad_norm": 1.7603646172978014, + "learning_rate": 1.145721814919585e-05, + "loss": 0.5983521342277527, + "step": 2730 + }, + { + "epoch": 0.962466960352423, + "grad_norm": 1.6358592349658665, + "learning_rate": 1.1451453699562077e-05, + "loss": 0.6144511699676514, + "step": 2731 + }, + { + "epoch": 0.9628193832599119, + "grad_norm": 1.66844617820458, + "learning_rate": 1.1445688757204942e-05, + "loss": 0.6449630260467529, + "step": 2732 + }, + { + "epoch": 0.9631718061674008, + "grad_norm": 1.5343236560799753, + "learning_rate": 1.1439923324081465e-05, + "loss": 0.7321716547012329, + "step": 2733 + }, + { + "epoch": 0.9635242290748899, + "grad_norm": 1.9877317345810759, + "learning_rate": 1.1434157402148838e-05, + "loss": 0.8354923129081726, + "step": 2734 + }, + { + "epoch": 0.9638766519823788, + "grad_norm": 1.3653549857555707, + "learning_rate": 1.14283909933644e-05, + "loss": 0.728820264339447, + "step": 2735 + }, + { + "epoch": 0.9642290748898679, + "grad_norm": 1.4013626479373464, + "learning_rate": 1.1422624099685675e-05, + "loss": 0.6683202981948853, + "step": 2736 + }, + { + "epoch": 0.9645814977973568, + "grad_norm": 1.6203635868462385, + "learning_rate": 1.141685672307034e-05, + "loss": 0.7159590125083923, + "step": 2737 + }, + { + "epoch": 0.9649339207048458, + "grad_norm": 1.9197883933040156, + "learning_rate": 1.1411088865476245e-05, + "loss": 0.8269981145858765, + "step": 2738 + }, + { + "epoch": 0.9652863436123348, + "grad_norm": 1.7561037821195844, + "learning_rate": 1.1405320528861393e-05, + "loss": 0.6993168592453003, + "step": 2739 + }, + { + "epoch": 0.9656387665198238, + "grad_norm": 1.4700171152077626, + "learning_rate": 1.1399551715183956e-05, + "loss": 0.6296184062957764, + "step": 2740 + }, + { + "epoch": 0.9659911894273128, + "grad_norm": 1.5505746175576802, + "learning_rate": 1.1393782426402267e-05, + "loss": 0.670283317565918, + "step": 2741 + }, + { + "epoch": 0.9663436123348018, + "grad_norm": 1.6125051339337373, + "learning_rate": 1.1388012664474824e-05, + "loss": 0.9248946905136108, + "step": 2742 + }, + { + "epoch": 0.9666960352422908, + "grad_norm": 1.7027770081175677, + "learning_rate": 1.1382242431360272e-05, + "loss": 0.7965992093086243, + "step": 2743 + }, + { + "epoch": 0.9670484581497797, + "grad_norm": 1.6413263453773168, + "learning_rate": 1.1376471729017435e-05, + "loss": 0.632454514503479, + "step": 2744 + }, + { + "epoch": 0.9674008810572687, + "grad_norm": 1.4364322830343181, + "learning_rate": 1.1370700559405283e-05, + "loss": 0.6463649272918701, + "step": 2745 + }, + { + "epoch": 0.9677533039647577, + "grad_norm": 1.5890798975591325, + "learning_rate": 1.1364928924482952e-05, + "loss": 0.5864677429199219, + "step": 2746 + }, + { + "epoch": 0.9681057268722467, + "grad_norm": 1.5090045708209912, + "learning_rate": 1.1359156826209726e-05, + "loss": 0.6313967108726501, + "step": 2747 + }, + { + "epoch": 0.9684581497797357, + "grad_norm": 1.2634359711899723, + "learning_rate": 1.1353384266545056e-05, + "loss": 0.5736903548240662, + "step": 2748 + }, + { + "epoch": 0.9688105726872247, + "grad_norm": 1.3956693120918684, + "learning_rate": 1.1347611247448544e-05, + "loss": 0.672286868095398, + "step": 2749 + }, + { + "epoch": 0.9691629955947136, + "grad_norm": 1.7905269273993527, + "learning_rate": 1.1341837770879957e-05, + "loss": 0.7181379795074463, + "step": 2750 + }, + { + "epoch": 0.9695154185022027, + "grad_norm": 1.3192307426609728, + "learning_rate": 1.1336063838799204e-05, + "loss": 0.6160816550254822, + "step": 2751 + }, + { + "epoch": 0.9698678414096916, + "grad_norm": 1.3858752821091025, + "learning_rate": 1.1330289453166361e-05, + "loss": 0.737337589263916, + "step": 2752 + }, + { + "epoch": 0.9702202643171807, + "grad_norm": 1.4067461052680075, + "learning_rate": 1.1324514615941644e-05, + "loss": 0.6752150058746338, + "step": 2753 + }, + { + "epoch": 0.9705726872246696, + "grad_norm": 1.502210352579975, + "learning_rate": 1.1318739329085438e-05, + "loss": 0.6917784214019775, + "step": 2754 + }, + { + "epoch": 0.9709251101321585, + "grad_norm": 1.873477988490531, + "learning_rate": 1.131296359455827e-05, + "loss": 0.7863353490829468, + "step": 2755 + }, + { + "epoch": 0.9712775330396476, + "grad_norm": 1.338648959960645, + "learning_rate": 1.1307187414320823e-05, + "loss": 0.6236519813537598, + "step": 2756 + }, + { + "epoch": 0.9716299559471365, + "grad_norm": 1.443196389025093, + "learning_rate": 1.130141079033393e-05, + "loss": 0.6957560181617737, + "step": 2757 + }, + { + "epoch": 0.9719823788546256, + "grad_norm": 1.6687230505642796, + "learning_rate": 1.1295633724558574e-05, + "loss": 0.6460270881652832, + "step": 2758 + }, + { + "epoch": 0.9723348017621145, + "grad_norm": 1.4575621917812085, + "learning_rate": 1.1289856218955892e-05, + "loss": 0.7352741956710815, + "step": 2759 + }, + { + "epoch": 0.9726872246696036, + "grad_norm": 1.7999835448567072, + "learning_rate": 1.1284078275487165e-05, + "loss": 0.6285911798477173, + "step": 2760 + }, + { + "epoch": 0.9730396475770925, + "grad_norm": 1.4280819376163427, + "learning_rate": 1.1278299896113823e-05, + "loss": 0.6577984094619751, + "step": 2761 + }, + { + "epoch": 0.9733920704845815, + "grad_norm": 1.4424142490511096, + "learning_rate": 1.1272521082797452e-05, + "loss": 0.6445770859718323, + "step": 2762 + }, + { + "epoch": 0.9737444933920705, + "grad_norm": 1.3911141072298185, + "learning_rate": 1.1266741837499773e-05, + "loss": 0.557687520980835, + "step": 2763 + }, + { + "epoch": 0.9740969162995595, + "grad_norm": 1.559776829553993, + "learning_rate": 1.1260962162182664e-05, + "loss": 0.6117650866508484, + "step": 2764 + }, + { + "epoch": 0.9744493392070485, + "grad_norm": 1.4751836492364416, + "learning_rate": 1.1255182058808143e-05, + "loss": 0.6498113870620728, + "step": 2765 + }, + { + "epoch": 0.9748017621145374, + "grad_norm": 1.9707928584824135, + "learning_rate": 1.1249401529338375e-05, + "loss": 0.8738062381744385, + "step": 2766 + }, + { + "epoch": 0.9751541850220264, + "grad_norm": 1.6389865398372674, + "learning_rate": 1.1243620575735672e-05, + "loss": 0.551408052444458, + "step": 2767 + }, + { + "epoch": 0.9755066079295154, + "grad_norm": 1.645802380531443, + "learning_rate": 1.1237839199962488e-05, + "loss": 0.7197355031967163, + "step": 2768 + }, + { + "epoch": 0.9758590308370044, + "grad_norm": 1.5393826706252047, + "learning_rate": 1.1232057403981415e-05, + "loss": 0.5704015493392944, + "step": 2769 + }, + { + "epoch": 0.9762114537444934, + "grad_norm": 1.373872634740153, + "learning_rate": 1.1226275189755199e-05, + "loss": 0.603929877281189, + "step": 2770 + }, + { + "epoch": 0.9765638766519824, + "grad_norm": 1.731229349756288, + "learning_rate": 1.1220492559246719e-05, + "loss": 0.8652673363685608, + "step": 2771 + }, + { + "epoch": 0.9769162995594713, + "grad_norm": 1.5891679358388853, + "learning_rate": 1.1214709514418998e-05, + "loss": 0.6827684044837952, + "step": 2772 + }, + { + "epoch": 0.9772687224669604, + "grad_norm": 1.3323036683469254, + "learning_rate": 1.1208926057235197e-05, + "loss": 0.5584808588027954, + "step": 2773 + }, + { + "epoch": 0.9776211453744493, + "grad_norm": 1.5495557729443614, + "learning_rate": 1.1203142189658627e-05, + "loss": 0.7242820262908936, + "step": 2774 + }, + { + "epoch": 0.9779735682819384, + "grad_norm": 1.3489108616226997, + "learning_rate": 1.1197357913652725e-05, + "loss": 0.5299571752548218, + "step": 2775 + }, + { + "epoch": 0.9783259911894273, + "grad_norm": 1.8541326435971137, + "learning_rate": 1.1191573231181074e-05, + "loss": 0.69478440284729, + "step": 2776 + }, + { + "epoch": 0.9786784140969162, + "grad_norm": 1.540885425711554, + "learning_rate": 1.1185788144207394e-05, + "loss": 0.6997090578079224, + "step": 2777 + }, + { + "epoch": 0.9790308370044053, + "grad_norm": 1.422432956680528, + "learning_rate": 1.1180002654695543e-05, + "loss": 0.6882679462432861, + "step": 2778 + }, + { + "epoch": 0.9793832599118942, + "grad_norm": 1.5811365233101125, + "learning_rate": 1.1174216764609514e-05, + "loss": 0.6434916257858276, + "step": 2779 + }, + { + "epoch": 0.9797356828193833, + "grad_norm": 1.5811226707061032, + "learning_rate": 1.1168430475913437e-05, + "loss": 0.6614376902580261, + "step": 2780 + }, + { + "epoch": 0.9800881057268722, + "grad_norm": 1.380437766979243, + "learning_rate": 1.1162643790571574e-05, + "loss": 0.6440471410751343, + "step": 2781 + }, + { + "epoch": 0.9804405286343613, + "grad_norm": 1.6997398594970703, + "learning_rate": 1.1156856710548327e-05, + "loss": 0.6493573188781738, + "step": 2782 + }, + { + "epoch": 0.9807929515418502, + "grad_norm": 1.5246321952125226, + "learning_rate": 1.1151069237808231e-05, + "loss": 0.660174548625946, + "step": 2783 + }, + { + "epoch": 0.9811453744493392, + "grad_norm": 1.7392611870715098, + "learning_rate": 1.1145281374315953e-05, + "loss": 0.8041812181472778, + "step": 2784 + }, + { + "epoch": 0.9814977973568282, + "grad_norm": 1.3479949919135392, + "learning_rate": 1.1139493122036289e-05, + "loss": 0.4758625030517578, + "step": 2785 + }, + { + "epoch": 0.9818502202643172, + "grad_norm": 1.6334305751982239, + "learning_rate": 1.113370448293417e-05, + "loss": 0.6482613682746887, + "step": 2786 + }, + { + "epoch": 0.9822026431718062, + "grad_norm": 1.475447708954463, + "learning_rate": 1.1127915458974665e-05, + "loss": 0.6911569237709045, + "step": 2787 + }, + { + "epoch": 0.9825550660792951, + "grad_norm": 1.362340888945518, + "learning_rate": 1.1122126052122963e-05, + "loss": 0.6851824522018433, + "step": 2788 + }, + { + "epoch": 0.9829074889867842, + "grad_norm": 1.5792587066367831, + "learning_rate": 1.111633626434439e-05, + "loss": 0.6405081748962402, + "step": 2789 + }, + { + "epoch": 0.9832599118942731, + "grad_norm": 1.5781550908818451, + "learning_rate": 1.1110546097604391e-05, + "loss": 0.7064476013183594, + "step": 2790 + }, + { + "epoch": 0.9836123348017621, + "grad_norm": 1.4647903320195184, + "learning_rate": 1.1104755553868559e-05, + "loss": 0.641350269317627, + "step": 2791 + }, + { + "epoch": 0.9839647577092511, + "grad_norm": 1.4142953897430577, + "learning_rate": 1.1098964635102597e-05, + "loss": 0.748977780342102, + "step": 2792 + }, + { + "epoch": 0.9843171806167401, + "grad_norm": 1.3989289975006294, + "learning_rate": 1.1093173343272342e-05, + "loss": 0.6033440828323364, + "step": 2793 + }, + { + "epoch": 0.984669603524229, + "grad_norm": 1.2877663440814373, + "learning_rate": 1.1087381680343754e-05, + "loss": 0.5684633255004883, + "step": 2794 + }, + { + "epoch": 0.9850220264317181, + "grad_norm": 1.5189384787980884, + "learning_rate": 1.1081589648282928e-05, + "loss": 0.7041289210319519, + "step": 2795 + }, + { + "epoch": 0.985374449339207, + "grad_norm": 1.5616342989862266, + "learning_rate": 1.1075797249056079e-05, + "loss": 0.7189786434173584, + "step": 2796 + }, + { + "epoch": 0.9857268722466961, + "grad_norm": 1.534620191791425, + "learning_rate": 1.1070004484629543e-05, + "loss": 0.5114344358444214, + "step": 2797 + }, + { + "epoch": 0.986079295154185, + "grad_norm": 1.6541092784437663, + "learning_rate": 1.1064211356969782e-05, + "loss": 0.5897136926651001, + "step": 2798 + }, + { + "epoch": 0.986431718061674, + "grad_norm": 1.5980123151797752, + "learning_rate": 1.1058417868043387e-05, + "loss": 0.8490760326385498, + "step": 2799 + }, + { + "epoch": 0.986784140969163, + "grad_norm": 1.5100542298165633, + "learning_rate": 1.1052624019817065e-05, + "loss": 0.6392524242401123, + "step": 2800 + }, + { + "epoch": 0.9871365638766519, + "grad_norm": 1.5630522519900902, + "learning_rate": 1.104682981425765e-05, + "loss": 0.7267303466796875, + "step": 2801 + }, + { + "epoch": 0.987488986784141, + "grad_norm": 1.5413815660334662, + "learning_rate": 1.1041035253332087e-05, + "loss": 0.6622469425201416, + "step": 2802 + }, + { + "epoch": 0.9878414096916299, + "grad_norm": 1.4547931829788883, + "learning_rate": 1.1035240339007454e-05, + "loss": 0.643883466720581, + "step": 2803 + }, + { + "epoch": 0.988193832599119, + "grad_norm": 1.4919310534649226, + "learning_rate": 1.1029445073250945e-05, + "loss": 0.6281142234802246, + "step": 2804 + }, + { + "epoch": 0.9885462555066079, + "grad_norm": 1.606048707782168, + "learning_rate": 1.1023649458029873e-05, + "loss": 0.6356241703033447, + "step": 2805 + }, + { + "epoch": 0.988898678414097, + "grad_norm": 1.7018688321982895, + "learning_rate": 1.1017853495311664e-05, + "loss": 0.8118115663528442, + "step": 2806 + }, + { + "epoch": 0.9892511013215859, + "grad_norm": 1.4779776881835476, + "learning_rate": 1.1012057187063872e-05, + "loss": 0.7673395276069641, + "step": 2807 + }, + { + "epoch": 0.9896035242290749, + "grad_norm": 1.5158382122898324, + "learning_rate": 1.1006260535254159e-05, + "loss": 0.6617262959480286, + "step": 2808 + }, + { + "epoch": 0.9899559471365639, + "grad_norm": 1.7342419352159402, + "learning_rate": 1.1000463541850315e-05, + "loss": 0.537519097328186, + "step": 2809 + }, + { + "epoch": 0.9903083700440528, + "grad_norm": 1.8093297060046025, + "learning_rate": 1.0994666208820229e-05, + "loss": 0.6281024813652039, + "step": 2810 + }, + { + "epoch": 0.9906607929515419, + "grad_norm": 1.4111971416204439, + "learning_rate": 1.0988868538131922e-05, + "loss": 0.7189136743545532, + "step": 2811 + }, + { + "epoch": 0.9910132158590308, + "grad_norm": 1.3844162550962045, + "learning_rate": 1.098307053175352e-05, + "loss": 0.622093677520752, + "step": 2812 + }, + { + "epoch": 0.9913656387665198, + "grad_norm": 1.4032650881900075, + "learning_rate": 1.0977272191653272e-05, + "loss": 0.6774802207946777, + "step": 2813 + }, + { + "epoch": 0.9917180616740088, + "grad_norm": 1.490303383982121, + "learning_rate": 1.0971473519799523e-05, + "loss": 0.5999646186828613, + "step": 2814 + }, + { + "epoch": 0.9920704845814978, + "grad_norm": 1.3508886274303966, + "learning_rate": 1.096567451816075e-05, + "loss": 0.6450619697570801, + "step": 2815 + }, + { + "epoch": 0.9924229074889868, + "grad_norm": 1.8693455627252262, + "learning_rate": 1.0959875188705529e-05, + "loss": 0.693134069442749, + "step": 2816 + }, + { + "epoch": 0.9927753303964758, + "grad_norm": 1.744167199385734, + "learning_rate": 1.0954075533402557e-05, + "loss": 0.8968616724014282, + "step": 2817 + }, + { + "epoch": 0.9931277533039647, + "grad_norm": 1.5750441805034816, + "learning_rate": 1.0948275554220632e-05, + "loss": 0.6114391088485718, + "step": 2818 + }, + { + "epoch": 0.9934801762114538, + "grad_norm": 1.3761860122661305, + "learning_rate": 1.0942475253128667e-05, + "loss": 0.7583796977996826, + "step": 2819 + }, + { + "epoch": 0.9938325991189427, + "grad_norm": 2.0494911253957735, + "learning_rate": 1.0936674632095683e-05, + "loss": 0.5683549046516418, + "step": 2820 + }, + { + "epoch": 0.9941850220264317, + "grad_norm": 1.4100630352107084, + "learning_rate": 1.0930873693090815e-05, + "loss": 0.5664689540863037, + "step": 2821 + }, + { + "epoch": 0.9945374449339207, + "grad_norm": 1.1859055454278844, + "learning_rate": 1.0925072438083296e-05, + "loss": 0.5799476504325867, + "step": 2822 + }, + { + "epoch": 0.9948898678414096, + "grad_norm": 1.4558284543811444, + "learning_rate": 1.0919270869042475e-05, + "loss": 0.6879112720489502, + "step": 2823 + }, + { + "epoch": 0.9952422907488987, + "grad_norm": 1.3673096151886848, + "learning_rate": 1.09134689879378e-05, + "loss": 0.6348927021026611, + "step": 2824 + }, + { + "epoch": 0.9955947136563876, + "grad_norm": 1.5301215006310536, + "learning_rate": 1.0907666796738839e-05, + "loss": 0.55754554271698, + "step": 2825 + }, + { + "epoch": 0.9959471365638767, + "grad_norm": 1.6611255848189581, + "learning_rate": 1.090186429741524e-05, + "loss": 0.6664899587631226, + "step": 2826 + }, + { + "epoch": 0.9962995594713656, + "grad_norm": 1.3580224067934683, + "learning_rate": 1.0896061491936782e-05, + "loss": 0.6521929502487183, + "step": 2827 + }, + { + "epoch": 0.9966519823788547, + "grad_norm": 1.4217882734660863, + "learning_rate": 1.0890258382273333e-05, + "loss": 0.542471170425415, + "step": 2828 + }, + { + "epoch": 0.9970044052863436, + "grad_norm": 1.3242120868836005, + "learning_rate": 1.0884454970394871e-05, + "loss": 0.60117506980896, + "step": 2829 + }, + { + "epoch": 0.9973568281938326, + "grad_norm": 1.5563969946549858, + "learning_rate": 1.0878651258271471e-05, + "loss": 0.6783676147460938, + "step": 2830 + }, + { + "epoch": 0.9977092511013216, + "grad_norm": 1.4867095260992749, + "learning_rate": 1.0872847247873315e-05, + "loss": 0.7080766558647156, + "step": 2831 + }, + { + "epoch": 0.9980616740088105, + "grad_norm": 1.7595047000981443, + "learning_rate": 1.0867042941170677e-05, + "loss": 0.9228106141090393, + "step": 2832 + }, + { + "epoch": 0.9984140969162996, + "grad_norm": 1.749212162747955, + "learning_rate": 1.086123834013395e-05, + "loss": 0.7601282596588135, + "step": 2833 + }, + { + "epoch": 0.9987665198237885, + "grad_norm": 1.388473564306277, + "learning_rate": 1.0855433446733607e-05, + "loss": 0.7101393342018127, + "step": 2834 + }, + { + "epoch": 0.9991189427312775, + "grad_norm": 1.426665891638417, + "learning_rate": 1.084962826294023e-05, + "loss": 0.5006242394447327, + "step": 2835 + }, + { + "epoch": 0.9994713656387665, + "grad_norm": 1.6063601330711992, + "learning_rate": 1.08438227907245e-05, + "loss": 0.7270148992538452, + "step": 2836 + }, + { + "epoch": 0.9998237885462555, + "grad_norm": 1.5770914971205114, + "learning_rate": 1.0838017032057194e-05, + "loss": 0.7252628803253174, + "step": 2837 + }, + { + "epoch": 1.0, + "grad_norm": 2.9062070384731578, + "learning_rate": 1.0832210988909187e-05, + "loss": 0.4579252004623413, + "step": 2838 + }, + { + "epoch": 1.000352422907489, + "grad_norm": 1.410073366222354, + "learning_rate": 1.0826404663251446e-05, + "loss": 0.635676920413971, + "step": 2839 + }, + { + "epoch": 1.0007048458149779, + "grad_norm": 1.5085425099131595, + "learning_rate": 1.0820598057055039e-05, + "loss": 0.6083015203475952, + "step": 2840 + }, + { + "epoch": 1.001057268722467, + "grad_norm": 1.2571881093552235, + "learning_rate": 1.0814791172291132e-05, + "loss": 0.5641704797744751, + "step": 2841 + }, + { + "epoch": 1.001409691629956, + "grad_norm": 1.448254627835315, + "learning_rate": 1.0808984010930981e-05, + "loss": 0.7668559551239014, + "step": 2842 + }, + { + "epoch": 1.001762114537445, + "grad_norm": 1.7836674103878665, + "learning_rate": 1.0803176574945933e-05, + "loss": 0.5205796957015991, + "step": 2843 + }, + { + "epoch": 1.0021145374449338, + "grad_norm": 1.2460568970106132, + "learning_rate": 1.0797368866307431e-05, + "loss": 0.6771252155303955, + "step": 2844 + }, + { + "epoch": 1.002466960352423, + "grad_norm": 1.3246167691239887, + "learning_rate": 1.0791560886987016e-05, + "loss": 0.6101677417755127, + "step": 2845 + }, + { + "epoch": 1.002819383259912, + "grad_norm": 1.683370422985012, + "learning_rate": 1.0785752638956315e-05, + "loss": 0.5651522874832153, + "step": 2846 + }, + { + "epoch": 1.0031718061674009, + "grad_norm": 1.3543139981801942, + "learning_rate": 1.0779944124187048e-05, + "loss": 0.6814571619033813, + "step": 2847 + }, + { + "epoch": 1.0035242290748898, + "grad_norm": 1.5579116379809095, + "learning_rate": 1.0774135344651023e-05, + "loss": 0.6786171197891235, + "step": 2848 + }, + { + "epoch": 1.003876651982379, + "grad_norm": 1.341282658364188, + "learning_rate": 1.0768326302320136e-05, + "loss": 0.5244907736778259, + "step": 2849 + }, + { + "epoch": 1.004229074889868, + "grad_norm": 1.5100504884551087, + "learning_rate": 1.0762516999166383e-05, + "loss": 0.6368712186813354, + "step": 2850 + }, + { + "epoch": 1.0045814977973568, + "grad_norm": 1.3929085404961679, + "learning_rate": 1.0756707437161841e-05, + "loss": 0.6389411687850952, + "step": 2851 + }, + { + "epoch": 1.0049339207048458, + "grad_norm": 1.796913818431425, + "learning_rate": 1.0750897618278675e-05, + "loss": 0.6257550716400146, + "step": 2852 + }, + { + "epoch": 1.0052863436123347, + "grad_norm": 1.384078231158131, + "learning_rate": 1.0745087544489132e-05, + "loss": 0.49478042125701904, + "step": 2853 + }, + { + "epoch": 1.0056387665198239, + "grad_norm": 1.3713236142324383, + "learning_rate": 1.0739277217765558e-05, + "loss": 0.6350952386856079, + "step": 2854 + }, + { + "epoch": 1.0059911894273128, + "grad_norm": 1.4287669419061304, + "learning_rate": 1.0733466640080374e-05, + "loss": 0.6057480573654175, + "step": 2855 + }, + { + "epoch": 1.0063436123348017, + "grad_norm": 1.5646694084149986, + "learning_rate": 1.0727655813406094e-05, + "loss": 0.5545427799224854, + "step": 2856 + }, + { + "epoch": 1.0066960352422907, + "grad_norm": 1.371726691889951, + "learning_rate": 1.0721844739715311e-05, + "loss": 0.55484938621521, + "step": 2857 + }, + { + "epoch": 1.0070484581497798, + "grad_norm": 1.6325523903522516, + "learning_rate": 1.0716033420980703e-05, + "loss": 0.6889834403991699, + "step": 2858 + }, + { + "epoch": 1.0074008810572688, + "grad_norm": 1.928061303452338, + "learning_rate": 1.0710221859175031e-05, + "loss": 0.7259023189544678, + "step": 2859 + }, + { + "epoch": 1.0077533039647577, + "grad_norm": 1.7213820381224034, + "learning_rate": 1.0704410056271144e-05, + "loss": 0.6200032234191895, + "step": 2860 + }, + { + "epoch": 1.0081057268722466, + "grad_norm": 1.2488919699208767, + "learning_rate": 1.069859801424196e-05, + "loss": 0.5357909202575684, + "step": 2861 + }, + { + "epoch": 1.0084581497797356, + "grad_norm": 1.462725629247434, + "learning_rate": 1.0692785735060495e-05, + "loss": 0.8121966123580933, + "step": 2862 + }, + { + "epoch": 1.0088105726872247, + "grad_norm": 1.5047486906511685, + "learning_rate": 1.0686973220699834e-05, + "loss": 0.5698819160461426, + "step": 2863 + }, + { + "epoch": 1.0091629955947137, + "grad_norm": 1.3352019656375154, + "learning_rate": 1.0681160473133144e-05, + "loss": 0.6598206162452698, + "step": 2864 + }, + { + "epoch": 1.0095154185022026, + "grad_norm": 1.571854196128042, + "learning_rate": 1.0675347494333667e-05, + "loss": 0.7574363946914673, + "step": 2865 + }, + { + "epoch": 1.0098678414096915, + "grad_norm": 2.0265508752029007, + "learning_rate": 1.0669534286274737e-05, + "loss": 0.6749663949012756, + "step": 2866 + }, + { + "epoch": 1.0102202643171807, + "grad_norm": 1.5445692097493786, + "learning_rate": 1.0663720850929753e-05, + "loss": 0.5932409763336182, + "step": 2867 + }, + { + "epoch": 1.0105726872246696, + "grad_norm": 1.4883467064779885, + "learning_rate": 1.0657907190272197e-05, + "loss": 0.7070773839950562, + "step": 2868 + }, + { + "epoch": 1.0109251101321586, + "grad_norm": 1.6639794076635466, + "learning_rate": 1.0652093306275621e-05, + "loss": 0.531635582447052, + "step": 2869 + }, + { + "epoch": 1.0112775330396475, + "grad_norm": 1.5967103256398283, + "learning_rate": 1.0646279200913665e-05, + "loss": 0.5966447591781616, + "step": 2870 + }, + { + "epoch": 1.0116299559471367, + "grad_norm": 1.5047477869564347, + "learning_rate": 1.0640464876160033e-05, + "loss": 0.6308450698852539, + "step": 2871 + }, + { + "epoch": 1.0119823788546256, + "grad_norm": 1.6938927429813924, + "learning_rate": 1.0634650333988508e-05, + "loss": 0.6477035284042358, + "step": 2872 + }, + { + "epoch": 1.0123348017621145, + "grad_norm": 1.4725648899614407, + "learning_rate": 1.0628835576372942e-05, + "loss": 0.5856079459190369, + "step": 2873 + }, + { + "epoch": 1.0126872246696035, + "grad_norm": 1.6415031005435194, + "learning_rate": 1.062302060528727e-05, + "loss": 0.733691930770874, + "step": 2874 + }, + { + "epoch": 1.0130396475770924, + "grad_norm": 1.6528326658043055, + "learning_rate": 1.0617205422705495e-05, + "loss": 0.6020156145095825, + "step": 2875 + }, + { + "epoch": 1.0133920704845816, + "grad_norm": 1.5978613503890422, + "learning_rate": 1.0611390030601685e-05, + "loss": 0.4980982542037964, + "step": 2876 + }, + { + "epoch": 1.0137444933920705, + "grad_norm": 1.5178573200522583, + "learning_rate": 1.0605574430949983e-05, + "loss": 0.6498349905014038, + "step": 2877 + }, + { + "epoch": 1.0140969162995594, + "grad_norm": 1.7318519084472541, + "learning_rate": 1.0599758625724612e-05, + "loss": 0.6456383466720581, + "step": 2878 + }, + { + "epoch": 1.0144493392070484, + "grad_norm": 1.7056738628689527, + "learning_rate": 1.059394261689985e-05, + "loss": 0.6047386527061462, + "step": 2879 + }, + { + "epoch": 1.0148017621145375, + "grad_norm": 1.6633316847391189, + "learning_rate": 1.0588126406450056e-05, + "loss": 0.641674816608429, + "step": 2880 + }, + { + "epoch": 1.0151541850220265, + "grad_norm": 1.549495353719679, + "learning_rate": 1.0582309996349648e-05, + "loss": 0.6157702207565308, + "step": 2881 + }, + { + "epoch": 1.0155066079295154, + "grad_norm": 1.614686141937513, + "learning_rate": 1.057649338857312e-05, + "loss": 0.6004809737205505, + "step": 2882 + }, + { + "epoch": 1.0158590308370044, + "grad_norm": 1.460588924951717, + "learning_rate": 1.0570676585095028e-05, + "loss": 0.5534430742263794, + "step": 2883 + }, + { + "epoch": 1.0162114537444933, + "grad_norm": 2.0058626486485367, + "learning_rate": 1.0564859587889997e-05, + "loss": 0.7781813144683838, + "step": 2884 + }, + { + "epoch": 1.0165638766519824, + "grad_norm": 1.9228872779765243, + "learning_rate": 1.0559042398932713e-05, + "loss": 0.6949760913848877, + "step": 2885 + }, + { + "epoch": 1.0169162995594714, + "grad_norm": 1.51396598780538, + "learning_rate": 1.0553225020197932e-05, + "loss": 0.5718453526496887, + "step": 2886 + }, + { + "epoch": 1.0172687224669603, + "grad_norm": 1.7835909963123882, + "learning_rate": 1.0547407453660471e-05, + "loss": 0.6689345836639404, + "step": 2887 + }, + { + "epoch": 1.0176211453744493, + "grad_norm": 1.5559332596209525, + "learning_rate": 1.0541589701295222e-05, + "loss": 0.6615442037582397, + "step": 2888 + }, + { + "epoch": 1.0179735682819384, + "grad_norm": 1.4810070180145358, + "learning_rate": 1.0535771765077121e-05, + "loss": 0.6458337306976318, + "step": 2889 + }, + { + "epoch": 1.0183259911894273, + "grad_norm": 1.4770072284014752, + "learning_rate": 1.052995364698118e-05, + "loss": 0.5330519676208496, + "step": 2890 + }, + { + "epoch": 1.0186784140969163, + "grad_norm": 1.4780636522187705, + "learning_rate": 1.0524135348982467e-05, + "loss": 0.6219571232795715, + "step": 2891 + }, + { + "epoch": 1.0190308370044052, + "grad_norm": 1.4624191661889683, + "learning_rate": 1.0518316873056118e-05, + "loss": 0.6731684803962708, + "step": 2892 + }, + { + "epoch": 1.0193832599118944, + "grad_norm": 1.614741871357758, + "learning_rate": 1.0512498221177319e-05, + "loss": 0.6126813888549805, + "step": 2893 + }, + { + "epoch": 1.0197356828193833, + "grad_norm": 1.4895494518265573, + "learning_rate": 1.0506679395321325e-05, + "loss": 0.5796904563903809, + "step": 2894 + }, + { + "epoch": 1.0200881057268723, + "grad_norm": 1.5545739969005041, + "learning_rate": 1.050086039746344e-05, + "loss": 0.5765914916992188, + "step": 2895 + }, + { + "epoch": 1.0204405286343612, + "grad_norm": 1.3710954206781227, + "learning_rate": 1.0495041229579043e-05, + "loss": 0.4798969328403473, + "step": 2896 + }, + { + "epoch": 1.0207929515418501, + "grad_norm": 1.551476741605498, + "learning_rate": 1.0489221893643553e-05, + "loss": 0.673927366733551, + "step": 2897 + }, + { + "epoch": 1.0211453744493393, + "grad_norm": 1.6211129054938926, + "learning_rate": 1.0483402391632453e-05, + "loss": 0.5681431293487549, + "step": 2898 + }, + { + "epoch": 1.0214977973568282, + "grad_norm": 1.3128793329209902, + "learning_rate": 1.0477582725521287e-05, + "loss": 0.6156354546546936, + "step": 2899 + }, + { + "epoch": 1.0218502202643172, + "grad_norm": 1.4369078255379546, + "learning_rate": 1.0471762897285652e-05, + "loss": 0.6569045782089233, + "step": 2900 + }, + { + "epoch": 1.022202643171806, + "grad_norm": 1.4293089736412674, + "learning_rate": 1.046594290890119e-05, + "loss": 0.6125048995018005, + "step": 2901 + }, + { + "epoch": 1.0225550660792952, + "grad_norm": 1.6465466140905431, + "learning_rate": 1.0460122762343614e-05, + "loss": 0.604046106338501, + "step": 2902 + }, + { + "epoch": 1.0229074889867842, + "grad_norm": 1.5461286198100506, + "learning_rate": 1.0454302459588677e-05, + "loss": 0.4569816589355469, + "step": 2903 + }, + { + "epoch": 1.0232599118942731, + "grad_norm": 1.6187784923192434, + "learning_rate": 1.0448482002612194e-05, + "loss": 0.5764607787132263, + "step": 2904 + }, + { + "epoch": 1.023612334801762, + "grad_norm": 1.503585291483294, + "learning_rate": 1.044266139339003e-05, + "loss": 0.5859626531600952, + "step": 2905 + }, + { + "epoch": 1.023964757709251, + "grad_norm": 1.6642769825669268, + "learning_rate": 1.04368406338981e-05, + "loss": 0.7326341271400452, + "step": 2906 + }, + { + "epoch": 1.0243171806167402, + "grad_norm": 1.613324765385094, + "learning_rate": 1.0431019726112366e-05, + "loss": 0.6355161070823669, + "step": 2907 + }, + { + "epoch": 1.024669603524229, + "grad_norm": 1.5833367942965741, + "learning_rate": 1.0425198672008851e-05, + "loss": 0.6990653872489929, + "step": 2908 + }, + { + "epoch": 1.025022026431718, + "grad_norm": 2.3098262824716542, + "learning_rate": 1.0419377473563621e-05, + "loss": 0.631952166557312, + "step": 2909 + }, + { + "epoch": 1.025374449339207, + "grad_norm": 1.4397039525414863, + "learning_rate": 1.041355613275279e-05, + "loss": 0.4872596561908722, + "step": 2910 + }, + { + "epoch": 1.0257268722466961, + "grad_norm": 1.5222931253330352, + "learning_rate": 1.0407734651552522e-05, + "loss": 0.5334043502807617, + "step": 2911 + }, + { + "epoch": 1.026079295154185, + "grad_norm": 1.5817730675020623, + "learning_rate": 1.0401913031939026e-05, + "loss": 0.5971134305000305, + "step": 2912 + }, + { + "epoch": 1.026431718061674, + "grad_norm": 1.7562208471394358, + "learning_rate": 1.0396091275888567e-05, + "loss": 0.6527851819992065, + "step": 2913 + }, + { + "epoch": 1.026784140969163, + "grad_norm": 1.5387477454353993, + "learning_rate": 1.0390269385377444e-05, + "loss": 0.4515818953514099, + "step": 2914 + }, + { + "epoch": 1.027136563876652, + "grad_norm": 1.4624804092376522, + "learning_rate": 1.0384447362382013e-05, + "loss": 0.530797004699707, + "step": 2915 + }, + { + "epoch": 1.027488986784141, + "grad_norm": 1.4915704465108583, + "learning_rate": 1.0378625208878666e-05, + "loss": 0.5477641224861145, + "step": 2916 + }, + { + "epoch": 1.02784140969163, + "grad_norm": 1.6025052451883606, + "learning_rate": 1.0372802926843843e-05, + "loss": 0.6390479207038879, + "step": 2917 + }, + { + "epoch": 1.028193832599119, + "grad_norm": 1.5706073153963707, + "learning_rate": 1.0366980518254028e-05, + "loss": 0.610755443572998, + "step": 2918 + }, + { + "epoch": 1.0285462555066078, + "grad_norm": 1.4805888577219812, + "learning_rate": 1.036115798508575e-05, + "loss": 0.5427766442298889, + "step": 2919 + }, + { + "epoch": 1.028898678414097, + "grad_norm": 1.4610582929917253, + "learning_rate": 1.0355335329315573e-05, + "loss": 0.621055006980896, + "step": 2920 + }, + { + "epoch": 1.029251101321586, + "grad_norm": 1.7760527372961, + "learning_rate": 1.0349512552920114e-05, + "loss": 0.6098253726959229, + "step": 2921 + }, + { + "epoch": 1.0296035242290749, + "grad_norm": 1.8967300437588117, + "learning_rate": 1.0343689657876017e-05, + "loss": 0.591664731502533, + "step": 2922 + }, + { + "epoch": 1.0299559471365638, + "grad_norm": 1.616730113059231, + "learning_rate": 1.033786664615998e-05, + "loss": 0.6531485915184021, + "step": 2923 + }, + { + "epoch": 1.030308370044053, + "grad_norm": 1.5937698715448299, + "learning_rate": 1.0332043519748727e-05, + "loss": 0.6933655738830566, + "step": 2924 + }, + { + "epoch": 1.030660792951542, + "grad_norm": 1.5987643686429562, + "learning_rate": 1.0326220280619036e-05, + "loss": 0.6512705087661743, + "step": 2925 + }, + { + "epoch": 1.0310132158590308, + "grad_norm": 1.829250792437923, + "learning_rate": 1.0320396930747712e-05, + "loss": 0.5671502947807312, + "step": 2926 + }, + { + "epoch": 1.0313656387665198, + "grad_norm": 1.6239123058071627, + "learning_rate": 1.0314573472111601e-05, + "loss": 0.6795192360877991, + "step": 2927 + }, + { + "epoch": 1.0317180616740087, + "grad_norm": 1.5985127083182307, + "learning_rate": 1.0308749906687585e-05, + "loss": 0.6357578039169312, + "step": 2928 + }, + { + "epoch": 1.0320704845814979, + "grad_norm": 1.6982196546251649, + "learning_rate": 1.0302926236452588e-05, + "loss": 0.7009944915771484, + "step": 2929 + }, + { + "epoch": 1.0324229074889868, + "grad_norm": 1.4806960711115318, + "learning_rate": 1.0297102463383557e-05, + "loss": 0.4685679078102112, + "step": 2930 + }, + { + "epoch": 1.0327753303964757, + "grad_norm": 1.5429925693746163, + "learning_rate": 1.0291278589457488e-05, + "loss": 0.6359078884124756, + "step": 2931 + }, + { + "epoch": 1.0331277533039647, + "grad_norm": 1.8631741910761805, + "learning_rate": 1.0285454616651398e-05, + "loss": 0.6606266498565674, + "step": 2932 + }, + { + "epoch": 1.0334801762114538, + "grad_norm": 1.7076039728900445, + "learning_rate": 1.0279630546942353e-05, + "loss": 0.5405932664871216, + "step": 2933 + }, + { + "epoch": 1.0338325991189428, + "grad_norm": 1.4934491606364382, + "learning_rate": 1.0273806382307443e-05, + "loss": 0.8072758316993713, + "step": 2934 + }, + { + "epoch": 1.0341850220264317, + "grad_norm": 1.5899951805886359, + "learning_rate": 1.0267982124723783e-05, + "loss": 0.6923058032989502, + "step": 2935 + }, + { + "epoch": 1.0345374449339206, + "grad_norm": 1.7156977270346485, + "learning_rate": 1.0262157776168533e-05, + "loss": 0.5577275156974792, + "step": 2936 + }, + { + "epoch": 1.0348898678414098, + "grad_norm": 1.6363417924911698, + "learning_rate": 1.0256333338618875e-05, + "loss": 0.6780786514282227, + "step": 2937 + }, + { + "epoch": 1.0352422907488987, + "grad_norm": 1.6093019454005904, + "learning_rate": 1.0250508814052029e-05, + "loss": 0.6966040134429932, + "step": 2938 + }, + { + "epoch": 1.0355947136563877, + "grad_norm": 1.4912092272159942, + "learning_rate": 1.0244684204445237e-05, + "loss": 0.5726339817047119, + "step": 2939 + }, + { + "epoch": 1.0359471365638766, + "grad_norm": 1.372791278777169, + "learning_rate": 1.0238859511775768e-05, + "loss": 0.64924156665802, + "step": 2940 + }, + { + "epoch": 1.0362995594713655, + "grad_norm": 1.5498611273448277, + "learning_rate": 1.0233034738020933e-05, + "loss": 0.49121707677841187, + "step": 2941 + }, + { + "epoch": 1.0366519823788547, + "grad_norm": 1.4698297870867278, + "learning_rate": 1.0227209885158053e-05, + "loss": 0.5505814552307129, + "step": 2942 + }, + { + "epoch": 1.0370044052863436, + "grad_norm": 1.658171020881214, + "learning_rate": 1.022138495516449e-05, + "loss": 0.7429872751235962, + "step": 2943 + }, + { + "epoch": 1.0373568281938326, + "grad_norm": 1.5946562373848934, + "learning_rate": 1.0215559950017624e-05, + "loss": 0.6492434740066528, + "step": 2944 + }, + { + "epoch": 1.0377092511013215, + "grad_norm": 1.5139165780476451, + "learning_rate": 1.0209734871694865e-05, + "loss": 0.5418736338615417, + "step": 2945 + }, + { + "epoch": 1.0380616740088107, + "grad_norm": 1.676058492453494, + "learning_rate": 1.0203909722173644e-05, + "loss": 0.6252620220184326, + "step": 2946 + }, + { + "epoch": 1.0384140969162996, + "grad_norm": 1.4699238771485563, + "learning_rate": 1.0198084503431416e-05, + "loss": 0.5124455690383911, + "step": 2947 + }, + { + "epoch": 1.0387665198237885, + "grad_norm": 1.4358343290990208, + "learning_rate": 1.0192259217445663e-05, + "loss": 0.5729688405990601, + "step": 2948 + }, + { + "epoch": 1.0391189427312775, + "grad_norm": 1.8222711908460536, + "learning_rate": 1.0186433866193893e-05, + "loss": 0.5891536474227905, + "step": 2949 + }, + { + "epoch": 1.0394713656387666, + "grad_norm": 1.7110443983801997, + "learning_rate": 1.0180608451653626e-05, + "loss": 0.774397075176239, + "step": 2950 + }, + { + "epoch": 1.0398237885462556, + "grad_norm": 1.4480826912481708, + "learning_rate": 1.0174782975802408e-05, + "loss": 0.5987098813056946, + "step": 2951 + }, + { + "epoch": 1.0401762114537445, + "grad_norm": 1.634577600554869, + "learning_rate": 1.016895744061781e-05, + "loss": 0.5334598422050476, + "step": 2952 + }, + { + "epoch": 1.0405286343612334, + "grad_norm": 1.7236175912347957, + "learning_rate": 1.0163131848077421e-05, + "loss": 0.5946340560913086, + "step": 2953 + }, + { + "epoch": 1.0408810572687224, + "grad_norm": 1.601606630295311, + "learning_rate": 1.0157306200158847e-05, + "loss": 0.5780941247940063, + "step": 2954 + }, + { + "epoch": 1.0412334801762115, + "grad_norm": 1.6785528445522104, + "learning_rate": 1.0151480498839712e-05, + "loss": 0.6348963975906372, + "step": 2955 + }, + { + "epoch": 1.0415859030837005, + "grad_norm": 1.717999985242494, + "learning_rate": 1.014565474609766e-05, + "loss": 0.6868102550506592, + "step": 2956 + }, + { + "epoch": 1.0419383259911894, + "grad_norm": 1.6612318546166622, + "learning_rate": 1.0139828943910358e-05, + "loss": 0.6507548689842224, + "step": 2957 + }, + { + "epoch": 1.0422907488986783, + "grad_norm": 1.7617270521903845, + "learning_rate": 1.0134003094255478e-05, + "loss": 0.6358312964439392, + "step": 2958 + }, + { + "epoch": 1.0426431718061675, + "grad_norm": 1.5725895362844704, + "learning_rate": 1.0128177199110723e-05, + "loss": 0.7530224919319153, + "step": 2959 + }, + { + "epoch": 1.0429955947136564, + "grad_norm": 1.5496338862557548, + "learning_rate": 1.012235126045379e-05, + "loss": 0.545819878578186, + "step": 2960 + }, + { + "epoch": 1.0433480176211454, + "grad_norm": 1.5828250584633938, + "learning_rate": 1.011652528026242e-05, + "loss": 0.6626788377761841, + "step": 2961 + }, + { + "epoch": 1.0437004405286343, + "grad_norm": 1.6913571400986156, + "learning_rate": 1.0110699260514336e-05, + "loss": 0.6407896280288696, + "step": 2962 + }, + { + "epoch": 1.0440528634361232, + "grad_norm": 1.4558906354554821, + "learning_rate": 1.0104873203187307e-05, + "loss": 0.5633673667907715, + "step": 2963 + }, + { + "epoch": 1.0444052863436124, + "grad_norm": 1.6991226564822444, + "learning_rate": 1.0099047110259081e-05, + "loss": 0.5356892943382263, + "step": 2964 + }, + { + "epoch": 1.0447577092511013, + "grad_norm": 1.6571256461175092, + "learning_rate": 1.0093220983707448e-05, + "loss": 0.5527205467224121, + "step": 2965 + }, + { + "epoch": 1.0451101321585903, + "grad_norm": 1.5928434384321621, + "learning_rate": 1.008739482551019e-05, + "loss": 0.6148320436477661, + "step": 2966 + }, + { + "epoch": 1.0454625550660792, + "grad_norm": 1.8604930696261837, + "learning_rate": 1.0081568637645111e-05, + "loss": 0.5713976621627808, + "step": 2967 + }, + { + "epoch": 1.0458149779735684, + "grad_norm": 1.4811105317563769, + "learning_rate": 1.0075742422090015e-05, + "loss": 0.5836226940155029, + "step": 2968 + }, + { + "epoch": 1.0461674008810573, + "grad_norm": 1.829134506733255, + "learning_rate": 1.0069916180822727e-05, + "loss": 0.6452749371528625, + "step": 2969 + }, + { + "epoch": 1.0465198237885462, + "grad_norm": 1.507975881410604, + "learning_rate": 1.006408991582107e-05, + "loss": 0.5468501448631287, + "step": 2970 + }, + { + "epoch": 1.0468722466960352, + "grad_norm": 1.6217984014708016, + "learning_rate": 1.0058263629062883e-05, + "loss": 0.5195704698562622, + "step": 2971 + }, + { + "epoch": 1.0472246696035241, + "grad_norm": 1.603914403857505, + "learning_rate": 1.0052437322526003e-05, + "loss": 0.5144641995429993, + "step": 2972 + }, + { + "epoch": 1.0475770925110133, + "grad_norm": 1.767647834896278, + "learning_rate": 1.004661099818829e-05, + "loss": 0.7258927822113037, + "step": 2973 + }, + { + "epoch": 1.0479295154185022, + "grad_norm": 1.8920163745404244, + "learning_rate": 1.004078465802759e-05, + "loss": 0.6108053922653198, + "step": 2974 + }, + { + "epoch": 1.0482819383259911, + "grad_norm": 1.5703096539855212, + "learning_rate": 1.0034958304021766e-05, + "loss": 0.612535834312439, + "step": 2975 + }, + { + "epoch": 1.04863436123348, + "grad_norm": 1.6902304674604145, + "learning_rate": 1.0029131938148686e-05, + "loss": 0.7272380590438843, + "step": 2976 + }, + { + "epoch": 1.0489867841409692, + "grad_norm": 1.4306480582223446, + "learning_rate": 1.0023305562386222e-05, + "loss": 0.4748264253139496, + "step": 2977 + }, + { + "epoch": 1.0493392070484582, + "grad_norm": 1.7625234188194432, + "learning_rate": 1.0017479178712245e-05, + "loss": 0.6686758399009705, + "step": 2978 + }, + { + "epoch": 1.0496916299559471, + "grad_norm": 1.6796969203533192, + "learning_rate": 1.0011652789104631e-05, + "loss": 0.5003838539123535, + "step": 2979 + }, + { + "epoch": 1.050044052863436, + "grad_norm": 1.7305572983583226, + "learning_rate": 1.0005826395541257e-05, + "loss": 0.6210055351257324, + "step": 2980 + }, + { + "epoch": 1.0503964757709252, + "grad_norm": 1.6943397299052507, + "learning_rate": 1e-05, + "loss": 0.6160269975662231, + "step": 2981 + }, + { + "epoch": 1.0507488986784141, + "grad_norm": 1.6249468093767248, + "learning_rate": 9.994173604458748e-06, + "loss": 0.6432052850723267, + "step": 2982 + }, + { + "epoch": 1.051101321585903, + "grad_norm": 1.6764234439374022, + "learning_rate": 9.988347210895372e-06, + "loss": 0.588628888130188, + "step": 2983 + }, + { + "epoch": 1.051453744493392, + "grad_norm": 1.5595740377523009, + "learning_rate": 9.982520821287758e-06, + "loss": 0.6694320440292358, + "step": 2984 + }, + { + "epoch": 1.051806167400881, + "grad_norm": 1.7276474901524372, + "learning_rate": 9.976694437613778e-06, + "loss": 0.8591301441192627, + "step": 2985 + }, + { + "epoch": 1.0521585903083701, + "grad_norm": 1.6697380234108412, + "learning_rate": 9.970868061851315e-06, + "loss": 0.6000436544418335, + "step": 2986 + }, + { + "epoch": 1.052511013215859, + "grad_norm": 1.5357275356358564, + "learning_rate": 9.965041695978239e-06, + "loss": 0.624568521976471, + "step": 2987 + }, + { + "epoch": 1.052863436123348, + "grad_norm": 1.4223866897031825, + "learning_rate": 9.959215341972414e-06, + "loss": 0.6173535585403442, + "step": 2988 + }, + { + "epoch": 1.053215859030837, + "grad_norm": 1.7069399452687213, + "learning_rate": 9.953389001811716e-06, + "loss": 0.5991729497909546, + "step": 2989 + }, + { + "epoch": 1.053568281938326, + "grad_norm": 1.782972390393551, + "learning_rate": 9.947562677473999e-06, + "loss": 0.570953905582428, + "step": 2990 + }, + { + "epoch": 1.053920704845815, + "grad_norm": 1.7332305108715658, + "learning_rate": 9.941736370937119e-06, + "loss": 0.6079390048980713, + "step": 2991 + }, + { + "epoch": 1.054273127753304, + "grad_norm": 2.110617001097567, + "learning_rate": 9.935910084178934e-06, + "loss": 0.599539577960968, + "step": 2992 + }, + { + "epoch": 1.0546255506607929, + "grad_norm": 1.5854202353385896, + "learning_rate": 9.930083819177273e-06, + "loss": 0.6736180186271667, + "step": 2993 + }, + { + "epoch": 1.054977973568282, + "grad_norm": 1.6240153775210555, + "learning_rate": 9.924257577909987e-06, + "loss": 0.6953197717666626, + "step": 2994 + }, + { + "epoch": 1.055330396475771, + "grad_norm": 1.8737137053755175, + "learning_rate": 9.918431362354892e-06, + "loss": 0.6670099496841431, + "step": 2995 + }, + { + "epoch": 1.05568281938326, + "grad_norm": 1.844007753613641, + "learning_rate": 9.912605174489811e-06, + "loss": 0.5829994678497314, + "step": 2996 + }, + { + "epoch": 1.0560352422907489, + "grad_norm": 1.9198236703913207, + "learning_rate": 9.906779016292554e-06, + "loss": 0.5926212072372437, + "step": 2997 + }, + { + "epoch": 1.0563876651982378, + "grad_norm": 1.4868752944824364, + "learning_rate": 9.900952889740922e-06, + "loss": 0.6085237860679626, + "step": 2998 + }, + { + "epoch": 1.056740088105727, + "grad_norm": 1.8046049827658854, + "learning_rate": 9.895126796812698e-06, + "loss": 0.5348918437957764, + "step": 2999 + }, + { + "epoch": 1.0570925110132159, + "grad_norm": 1.79509807280399, + "learning_rate": 9.889300739485666e-06, + "loss": 0.6325811743736267, + "step": 3000 + }, + { + "epoch": 1.0574449339207048, + "grad_norm": 1.6006099839795653, + "learning_rate": 9.883474719737582e-06, + "loss": 0.6262463927268982, + "step": 3001 + }, + { + "epoch": 1.0577973568281938, + "grad_norm": 1.5914788157951554, + "learning_rate": 9.877648739546213e-06, + "loss": 0.5863393545150757, + "step": 3002 + }, + { + "epoch": 1.058149779735683, + "grad_norm": 2.0254476885032924, + "learning_rate": 9.871822800889284e-06, + "loss": 0.6200219392776489, + "step": 3003 + }, + { + "epoch": 1.0585022026431719, + "grad_norm": 1.6216300774961065, + "learning_rate": 9.865996905744523e-06, + "loss": 0.6994227170944214, + "step": 3004 + }, + { + "epoch": 1.0588546255506608, + "grad_norm": 1.735404014120002, + "learning_rate": 9.860171056089646e-06, + "loss": 0.6458406448364258, + "step": 3005 + }, + { + "epoch": 1.0592070484581497, + "grad_norm": 1.6209915560634427, + "learning_rate": 9.854345253902342e-06, + "loss": 0.6814782619476318, + "step": 3006 + }, + { + "epoch": 1.0595594713656387, + "grad_norm": 1.455508358080935, + "learning_rate": 9.84851950116029e-06, + "loss": 0.521275520324707, + "step": 3007 + }, + { + "epoch": 1.0599118942731278, + "grad_norm": 1.486020788258086, + "learning_rate": 9.84269379984116e-06, + "loss": 0.5541207790374756, + "step": 3008 + }, + { + "epoch": 1.0602643171806168, + "grad_norm": 1.7060435970959642, + "learning_rate": 9.836868151922579e-06, + "loss": 0.578704833984375, + "step": 3009 + }, + { + "epoch": 1.0606167400881057, + "grad_norm": 1.5220368339292814, + "learning_rate": 9.831042559382193e-06, + "loss": 0.6280980706214905, + "step": 3010 + }, + { + "epoch": 1.0609691629955946, + "grad_norm": 1.8314917502019485, + "learning_rate": 9.825217024197595e-06, + "loss": 0.6059408783912659, + "step": 3011 + }, + { + "epoch": 1.0613215859030838, + "grad_norm": 1.6362891327789773, + "learning_rate": 9.819391548346377e-06, + "loss": 0.6375449299812317, + "step": 3012 + }, + { + "epoch": 1.0616740088105727, + "grad_norm": 2.503364134053993, + "learning_rate": 9.81356613380611e-06, + "loss": 0.5959592461585999, + "step": 3013 + }, + { + "epoch": 1.0620264317180617, + "grad_norm": 1.735073300438408, + "learning_rate": 9.807740782554337e-06, + "loss": 0.7636409401893616, + "step": 3014 + }, + { + "epoch": 1.0623788546255506, + "grad_norm": 2.2227407713805722, + "learning_rate": 9.801915496568586e-06, + "loss": 0.6136656999588013, + "step": 3015 + }, + { + "epoch": 1.0627312775330395, + "grad_norm": 1.7360474444382674, + "learning_rate": 9.796090277826361e-06, + "loss": 0.4659839868545532, + "step": 3016 + }, + { + "epoch": 1.0630837004405287, + "grad_norm": 1.699131973967987, + "learning_rate": 9.790265128305137e-06, + "loss": 0.6053155660629272, + "step": 3017 + }, + { + "epoch": 1.0634361233480176, + "grad_norm": 1.698457126583602, + "learning_rate": 9.78444004998238e-06, + "loss": 0.6885203123092651, + "step": 3018 + }, + { + "epoch": 1.0637885462555066, + "grad_norm": 1.5620062631250171, + "learning_rate": 9.778615044835513e-06, + "loss": 0.4985584616661072, + "step": 3019 + }, + { + "epoch": 1.0641409691629955, + "grad_norm": 1.699890122838272, + "learning_rate": 9.772790114841948e-06, + "loss": 0.5782307386398315, + "step": 3020 + }, + { + "epoch": 1.0644933920704847, + "grad_norm": 1.7427928970766464, + "learning_rate": 9.766965261979072e-06, + "loss": 0.5819451212882996, + "step": 3021 + }, + { + "epoch": 1.0648458149779736, + "grad_norm": 1.9531302264016444, + "learning_rate": 9.761140488224232e-06, + "loss": 0.7316779494285583, + "step": 3022 + }, + { + "epoch": 1.0651982378854625, + "grad_norm": 2.4211241065200633, + "learning_rate": 9.755315795554766e-06, + "loss": 0.5986718535423279, + "step": 3023 + }, + { + "epoch": 1.0655506607929515, + "grad_norm": 1.5565361520380023, + "learning_rate": 9.749491185947977e-06, + "loss": 0.5052427053451538, + "step": 3024 + }, + { + "epoch": 1.0659030837004406, + "grad_norm": 1.658020296029534, + "learning_rate": 9.743666661381123e-06, + "loss": 0.7370901107788086, + "step": 3025 + }, + { + "epoch": 1.0662555066079296, + "grad_norm": 1.575987435195716, + "learning_rate": 9.73784222383147e-06, + "loss": 0.6423007249832153, + "step": 3026 + }, + { + "epoch": 1.0666079295154185, + "grad_norm": 1.94896820476588, + "learning_rate": 9.73201787527622e-06, + "loss": 0.5679126977920532, + "step": 3027 + }, + { + "epoch": 1.0669603524229074, + "grad_norm": 2.498602043471406, + "learning_rate": 9.72619361769256e-06, + "loss": 0.5890183448791504, + "step": 3028 + }, + { + "epoch": 1.0673127753303966, + "grad_norm": 1.7647674693242208, + "learning_rate": 9.720369453057648e-06, + "loss": 0.6772822141647339, + "step": 3029 + }, + { + "epoch": 1.0676651982378855, + "grad_norm": 2.109810086892336, + "learning_rate": 9.714545383348602e-06, + "loss": 0.8275488615036011, + "step": 3030 + }, + { + "epoch": 1.0680176211453745, + "grad_norm": 1.6620933678667917, + "learning_rate": 9.708721410542517e-06, + "loss": 0.5369541645050049, + "step": 3031 + }, + { + "epoch": 1.0683700440528634, + "grad_norm": 1.611800532750273, + "learning_rate": 9.70289753661645e-06, + "loss": 0.7173746824264526, + "step": 3032 + }, + { + "epoch": 1.0687224669603523, + "grad_norm": 1.7405771304623092, + "learning_rate": 9.697073763547415e-06, + "loss": 0.597034215927124, + "step": 3033 + }, + { + "epoch": 1.0690748898678415, + "grad_norm": 1.867958529307263, + "learning_rate": 9.691250093312419e-06, + "loss": 0.6680281162261963, + "step": 3034 + }, + { + "epoch": 1.0694273127753304, + "grad_norm": 1.4898600082698874, + "learning_rate": 9.6854265278884e-06, + "loss": 0.6155321002006531, + "step": 3035 + }, + { + "epoch": 1.0697797356828194, + "grad_norm": 2.4613840016445314, + "learning_rate": 9.67960306925229e-06, + "loss": 0.5945199728012085, + "step": 3036 + }, + { + "epoch": 1.0701321585903083, + "grad_norm": 1.7063166475670735, + "learning_rate": 9.673779719380967e-06, + "loss": 0.6492328643798828, + "step": 3037 + }, + { + "epoch": 1.0704845814977975, + "grad_norm": 1.8638826733925389, + "learning_rate": 9.667956480251273e-06, + "loss": 0.6501325964927673, + "step": 3038 + }, + { + "epoch": 1.0708370044052864, + "grad_norm": 1.4216071761527918, + "learning_rate": 9.662133353840025e-06, + "loss": 0.5956053733825684, + "step": 3039 + }, + { + "epoch": 1.0711894273127753, + "grad_norm": 1.7546711372901296, + "learning_rate": 9.656310342123988e-06, + "loss": 0.5966510772705078, + "step": 3040 + }, + { + "epoch": 1.0715418502202643, + "grad_norm": 1.7715803220306194, + "learning_rate": 9.65048744707989e-06, + "loss": 0.7096615433692932, + "step": 3041 + }, + { + "epoch": 1.0718942731277532, + "grad_norm": 1.5279732385894715, + "learning_rate": 9.644664670684429e-06, + "loss": 0.6697839498519897, + "step": 3042 + }, + { + "epoch": 1.0722466960352424, + "grad_norm": 1.6318262899161158, + "learning_rate": 9.638842014914253e-06, + "loss": 0.6288081407546997, + "step": 3043 + }, + { + "epoch": 1.0725991189427313, + "grad_norm": 1.6830476156095877, + "learning_rate": 9.633019481745973e-06, + "loss": 0.5870436429977417, + "step": 3044 + }, + { + "epoch": 1.0729515418502202, + "grad_norm": 1.4073037692368846, + "learning_rate": 9.62719707315616e-06, + "loss": 0.5540846586227417, + "step": 3045 + }, + { + "epoch": 1.0733039647577092, + "grad_norm": 1.8276869267624827, + "learning_rate": 9.621374791121335e-06, + "loss": 0.6134544014930725, + "step": 3046 + }, + { + "epoch": 1.0736563876651983, + "grad_norm": 1.9310361455307938, + "learning_rate": 9.61555263761799e-06, + "loss": 0.5537046194076538, + "step": 3047 + }, + { + "epoch": 1.0740088105726873, + "grad_norm": 1.5553451953770387, + "learning_rate": 9.60973061462256e-06, + "loss": 0.6423748731613159, + "step": 3048 + }, + { + "epoch": 1.0743612334801762, + "grad_norm": 1.7219317421679232, + "learning_rate": 9.603908724111438e-06, + "loss": 0.575737714767456, + "step": 3049 + }, + { + "epoch": 1.0747136563876651, + "grad_norm": 1.7334347992355148, + "learning_rate": 9.598086968060976e-06, + "loss": 0.5326197147369385, + "step": 3050 + }, + { + "epoch": 1.075066079295154, + "grad_norm": 1.5560472770838902, + "learning_rate": 9.592265348447481e-06, + "loss": 0.6533973217010498, + "step": 3051 + }, + { + "epoch": 1.0754185022026432, + "grad_norm": 1.5101678591543142, + "learning_rate": 9.586443867247212e-06, + "loss": 0.5536586046218872, + "step": 3052 + }, + { + "epoch": 1.0757709251101322, + "grad_norm": 1.6611779528904365, + "learning_rate": 9.580622526436382e-06, + "loss": 0.6024892926216125, + "step": 3053 + }, + { + "epoch": 1.076123348017621, + "grad_norm": 1.5423440836231639, + "learning_rate": 9.574801327991148e-06, + "loss": 0.5070478320121765, + "step": 3054 + }, + { + "epoch": 1.07647577092511, + "grad_norm": 1.9040251147858696, + "learning_rate": 9.568980273887637e-06, + "loss": 0.6518458127975464, + "step": 3055 + }, + { + "epoch": 1.0768281938325992, + "grad_norm": 1.8761852451910037, + "learning_rate": 9.563159366101905e-06, + "loss": 0.6120346784591675, + "step": 3056 + }, + { + "epoch": 1.0771806167400881, + "grad_norm": 1.7428937123650154, + "learning_rate": 9.557338606609973e-06, + "loss": 0.6725353598594666, + "step": 3057 + }, + { + "epoch": 1.077533039647577, + "grad_norm": 1.5136863007311347, + "learning_rate": 9.551517997387809e-06, + "loss": 0.5311183333396912, + "step": 3058 + }, + { + "epoch": 1.077885462555066, + "grad_norm": 1.8000300040025692, + "learning_rate": 9.545697540411324e-06, + "loss": 0.5728713274002075, + "step": 3059 + }, + { + "epoch": 1.0782378854625552, + "grad_norm": 1.7991281029512354, + "learning_rate": 9.53987723765639e-06, + "loss": 0.5527676343917847, + "step": 3060 + }, + { + "epoch": 1.078590308370044, + "grad_norm": 1.9177712397501578, + "learning_rate": 9.534057091098813e-06, + "loss": 0.7529809474945068, + "step": 3061 + }, + { + "epoch": 1.078942731277533, + "grad_norm": 1.6975104946869117, + "learning_rate": 9.528237102714352e-06, + "loss": 0.5485205054283142, + "step": 3062 + }, + { + "epoch": 1.079295154185022, + "grad_norm": 1.8773141561341242, + "learning_rate": 9.522417274478716e-06, + "loss": 0.785184383392334, + "step": 3063 + }, + { + "epoch": 1.079647577092511, + "grad_norm": 1.692195190429073, + "learning_rate": 9.516597608367547e-06, + "loss": 0.5645574331283569, + "step": 3064 + }, + { + "epoch": 1.08, + "grad_norm": 1.6815198266991151, + "learning_rate": 9.51077810635645e-06, + "loss": 0.5878466367721558, + "step": 3065 + }, + { + "epoch": 1.080352422907489, + "grad_norm": 1.7635464385467587, + "learning_rate": 9.504958770420962e-06, + "loss": 0.6610634922981262, + "step": 3066 + }, + { + "epoch": 1.080704845814978, + "grad_norm": 1.8113852263213976, + "learning_rate": 9.49913960253656e-06, + "loss": 0.5928626656532288, + "step": 3067 + }, + { + "epoch": 1.0810572687224669, + "grad_norm": 1.7322633216843277, + "learning_rate": 9.49332060467868e-06, + "loss": 0.7038083672523499, + "step": 3068 + }, + { + "epoch": 1.081409691629956, + "grad_norm": 1.3686406289588096, + "learning_rate": 9.487501778822685e-06, + "loss": 0.5966217517852783, + "step": 3069 + }, + { + "epoch": 1.081762114537445, + "grad_norm": 1.686172060324731, + "learning_rate": 9.481683126943884e-06, + "loss": 0.6594187021255493, + "step": 3070 + }, + { + "epoch": 1.082114537444934, + "grad_norm": 1.4709153501511232, + "learning_rate": 9.475864651017536e-06, + "loss": 0.450161874294281, + "step": 3071 + }, + { + "epoch": 1.0824669603524228, + "grad_norm": 1.9209170149530705, + "learning_rate": 9.470046353018821e-06, + "loss": 0.6459252834320068, + "step": 3072 + }, + { + "epoch": 1.082819383259912, + "grad_norm": 1.5818284678879686, + "learning_rate": 9.464228234922882e-06, + "loss": 0.6505793929100037, + "step": 3073 + }, + { + "epoch": 1.083171806167401, + "grad_norm": 1.5944722571395005, + "learning_rate": 9.458410298704781e-06, + "loss": 0.6480910778045654, + "step": 3074 + }, + { + "epoch": 1.0835242290748899, + "grad_norm": 1.530550500951046, + "learning_rate": 9.452592546339527e-06, + "loss": 0.6494983434677124, + "step": 3075 + }, + { + "epoch": 1.0838766519823788, + "grad_norm": 1.560525752678919, + "learning_rate": 9.44677497980207e-06, + "loss": 0.4710897207260132, + "step": 3076 + }, + { + "epoch": 1.0842290748898677, + "grad_norm": 1.5265540562186208, + "learning_rate": 9.440957601067294e-06, + "loss": 0.599402904510498, + "step": 3077 + }, + { + "epoch": 1.084581497797357, + "grad_norm": 1.9340764168188993, + "learning_rate": 9.435140412110006e-06, + "loss": 0.665642499923706, + "step": 3078 + }, + { + "epoch": 1.0849339207048458, + "grad_norm": 1.8868033009058576, + "learning_rate": 9.429323414904975e-06, + "loss": 0.5861828923225403, + "step": 3079 + }, + { + "epoch": 1.0852863436123348, + "grad_norm": 1.581789489047221, + "learning_rate": 9.42350661142688e-06, + "loss": 0.6115351915359497, + "step": 3080 + }, + { + "epoch": 1.0856387665198237, + "grad_norm": 1.6610293276945491, + "learning_rate": 9.417690003650353e-06, + "loss": 0.6627066135406494, + "step": 3081 + }, + { + "epoch": 1.0859911894273129, + "grad_norm": 1.5744692750190625, + "learning_rate": 9.411873593549947e-06, + "loss": 0.6155676245689392, + "step": 3082 + }, + { + "epoch": 1.0863436123348018, + "grad_norm": 1.59429166731528, + "learning_rate": 9.406057383100151e-06, + "loss": 0.5429089069366455, + "step": 3083 + }, + { + "epoch": 1.0866960352422907, + "grad_norm": 1.638763712553269, + "learning_rate": 9.400241374275391e-06, + "loss": 0.5416614413261414, + "step": 3084 + }, + { + "epoch": 1.0870484581497797, + "grad_norm": 1.5652840639245515, + "learning_rate": 9.394425569050018e-06, + "loss": 0.6708710193634033, + "step": 3085 + }, + { + "epoch": 1.0874008810572686, + "grad_norm": 1.6407899201706977, + "learning_rate": 9.388609969398318e-06, + "loss": 0.588347315788269, + "step": 3086 + }, + { + "epoch": 1.0877533039647578, + "grad_norm": 1.6990356352816562, + "learning_rate": 9.38279457729451e-06, + "loss": 0.4999222755432129, + "step": 3087 + }, + { + "epoch": 1.0881057268722467, + "grad_norm": 1.5508462782114225, + "learning_rate": 9.37697939471273e-06, + "loss": 0.5400034189224243, + "step": 3088 + }, + { + "epoch": 1.0884581497797357, + "grad_norm": 1.6869985582255194, + "learning_rate": 9.37116442362706e-06, + "loss": 0.5583670139312744, + "step": 3089 + }, + { + "epoch": 1.0888105726872246, + "grad_norm": 2.063349590123988, + "learning_rate": 9.365349666011497e-06, + "loss": 0.6863820552825928, + "step": 3090 + }, + { + "epoch": 1.0891629955947137, + "grad_norm": 1.7395123823701124, + "learning_rate": 9.35953512383997e-06, + "loss": 0.6422115564346313, + "step": 3091 + }, + { + "epoch": 1.0895154185022027, + "grad_norm": 1.7254266288951046, + "learning_rate": 9.353720799086337e-06, + "loss": 0.7106888294219971, + "step": 3092 + }, + { + "epoch": 1.0898678414096916, + "grad_norm": 1.7765997338600088, + "learning_rate": 9.347906693724379e-06, + "loss": 0.6070472002029419, + "step": 3093 + }, + { + "epoch": 1.0902202643171806, + "grad_norm": 2.653468303504809, + "learning_rate": 9.342092809727807e-06, + "loss": 0.577377200126648, + "step": 3094 + }, + { + "epoch": 1.0905726872246695, + "grad_norm": 2.222722693331331, + "learning_rate": 9.336279149070252e-06, + "loss": 0.6249948740005493, + "step": 3095 + }, + { + "epoch": 1.0909251101321586, + "grad_norm": 1.7155188858933852, + "learning_rate": 9.330465713725265e-06, + "loss": 0.5515183210372925, + "step": 3096 + }, + { + "epoch": 1.0912775330396476, + "grad_norm": 1.866411497064146, + "learning_rate": 9.324652505666336e-06, + "loss": 0.6074613332748413, + "step": 3097 + }, + { + "epoch": 1.0916299559471365, + "grad_norm": 1.7632595046666684, + "learning_rate": 9.318839526866863e-06, + "loss": 0.6520178318023682, + "step": 3098 + }, + { + "epoch": 1.0919823788546255, + "grad_norm": 1.4274715968201055, + "learning_rate": 9.31302677930017e-06, + "loss": 0.45863813161849976, + "step": 3099 + }, + { + "epoch": 1.0923348017621146, + "grad_norm": 1.6772052003130429, + "learning_rate": 9.307214264939508e-06, + "loss": 0.610805869102478, + "step": 3100 + }, + { + "epoch": 1.0926872246696036, + "grad_norm": 1.6545163632346178, + "learning_rate": 9.30140198575804e-06, + "loss": 0.5954282283782959, + "step": 3101 + }, + { + "epoch": 1.0930396475770925, + "grad_norm": 1.4805927694864789, + "learning_rate": 9.29558994372886e-06, + "loss": 0.6941400170326233, + "step": 3102 + }, + { + "epoch": 1.0933920704845814, + "grad_norm": 1.4236727289117346, + "learning_rate": 9.289778140824974e-06, + "loss": 0.6723533868789673, + "step": 3103 + }, + { + "epoch": 1.0937444933920706, + "grad_norm": 1.5690147341016918, + "learning_rate": 9.2839665790193e-06, + "loss": 0.49137037992477417, + "step": 3104 + }, + { + "epoch": 1.0940969162995595, + "grad_norm": 1.6112616837583658, + "learning_rate": 9.278155260284692e-06, + "loss": 0.5827045440673828, + "step": 3105 + }, + { + "epoch": 1.0944493392070485, + "grad_norm": 1.7496187485651187, + "learning_rate": 9.272344186593909e-06, + "loss": 0.6391462683677673, + "step": 3106 + }, + { + "epoch": 1.0948017621145374, + "grad_norm": 1.857839078789808, + "learning_rate": 9.266533359919628e-06, + "loss": 0.4994915723800659, + "step": 3107 + }, + { + "epoch": 1.0951541850220265, + "grad_norm": 1.7820549618718244, + "learning_rate": 9.260722782234445e-06, + "loss": 0.6480728387832642, + "step": 3108 + }, + { + "epoch": 1.0955066079295155, + "grad_norm": 1.9724258404436363, + "learning_rate": 9.25491245551087e-06, + "loss": 0.5734057426452637, + "step": 3109 + }, + { + "epoch": 1.0958590308370044, + "grad_norm": 1.5757198230236702, + "learning_rate": 9.249102381721328e-06, + "loss": 0.5650345087051392, + "step": 3110 + }, + { + "epoch": 1.0962114537444934, + "grad_norm": 1.6196253415823336, + "learning_rate": 9.243292562838164e-06, + "loss": 0.6261975765228271, + "step": 3111 + }, + { + "epoch": 1.0965638766519823, + "grad_norm": 1.6283298345999566, + "learning_rate": 9.237483000833619e-06, + "loss": 0.730735182762146, + "step": 3112 + }, + { + "epoch": 1.0969162995594715, + "grad_norm": 1.614573149399901, + "learning_rate": 9.231673697679867e-06, + "loss": 0.6198948621749878, + "step": 3113 + }, + { + "epoch": 1.0972687224669604, + "grad_norm": 1.526191646446162, + "learning_rate": 9.225864655348982e-06, + "loss": 0.5302865505218506, + "step": 3114 + }, + { + "epoch": 1.0976211453744493, + "grad_norm": 1.6895671377093768, + "learning_rate": 9.220055875812955e-06, + "loss": 0.5995128154754639, + "step": 3115 + }, + { + "epoch": 1.0979735682819383, + "grad_norm": 1.5451580100020488, + "learning_rate": 9.214247361043687e-06, + "loss": 0.3801479935646057, + "step": 3116 + }, + { + "epoch": 1.0983259911894274, + "grad_norm": 1.7467243659333909, + "learning_rate": 9.208439113012984e-06, + "loss": 0.5617209076881409, + "step": 3117 + }, + { + "epoch": 1.0986784140969164, + "grad_norm": 2.3313501330545776, + "learning_rate": 9.202631133692572e-06, + "loss": 0.5233842134475708, + "step": 3118 + }, + { + "epoch": 1.0990308370044053, + "grad_norm": 1.5308784453968334, + "learning_rate": 9.196823425054073e-06, + "loss": 0.5300124883651733, + "step": 3119 + }, + { + "epoch": 1.0993832599118942, + "grad_norm": 1.6766914696070794, + "learning_rate": 9.191015989069024e-06, + "loss": 0.686185359954834, + "step": 3120 + }, + { + "epoch": 1.0997356828193832, + "grad_norm": 4.625699614895419, + "learning_rate": 9.18520882770887e-06, + "loss": 0.6043056845664978, + "step": 3121 + }, + { + "epoch": 1.1000881057268723, + "grad_norm": 1.4445640616396158, + "learning_rate": 9.179401942944961e-06, + "loss": 0.6299905776977539, + "step": 3122 + }, + { + "epoch": 1.1004405286343613, + "grad_norm": 1.639683344548818, + "learning_rate": 9.173595336748557e-06, + "loss": 0.57872474193573, + "step": 3123 + }, + { + "epoch": 1.1007929515418502, + "grad_norm": 1.6533643796746975, + "learning_rate": 9.167789011090818e-06, + "loss": 0.5638746023178101, + "step": 3124 + }, + { + "epoch": 1.1011453744493391, + "grad_norm": 1.9780317067618627, + "learning_rate": 9.161982967942806e-06, + "loss": 0.6150490045547485, + "step": 3125 + }, + { + "epoch": 1.1014977973568283, + "grad_norm": 1.6035565827670604, + "learning_rate": 9.156177209275503e-06, + "loss": 0.547231912612915, + "step": 3126 + }, + { + "epoch": 1.1018502202643172, + "grad_norm": 1.753224578445511, + "learning_rate": 9.150371737059773e-06, + "loss": 0.6999325752258301, + "step": 3127 + }, + { + "epoch": 1.1022026431718062, + "grad_norm": 1.868897492269033, + "learning_rate": 9.144566553266396e-06, + "loss": 0.7175568342208862, + "step": 3128 + }, + { + "epoch": 1.102555066079295, + "grad_norm": 1.6615553040601516, + "learning_rate": 9.138761659866054e-06, + "loss": 0.7308273911476135, + "step": 3129 + }, + { + "epoch": 1.102907488986784, + "grad_norm": 1.6216416819643327, + "learning_rate": 9.132957058829323e-06, + "loss": 0.5951930284500122, + "step": 3130 + }, + { + "epoch": 1.1032599118942732, + "grad_norm": 1.8459198222998503, + "learning_rate": 9.127152752126688e-06, + "loss": 0.5684988498687744, + "step": 3131 + }, + { + "epoch": 1.1036123348017621, + "grad_norm": 1.6778026851292638, + "learning_rate": 9.121348741728532e-06, + "loss": 0.6490764617919922, + "step": 3132 + }, + { + "epoch": 1.103964757709251, + "grad_norm": 1.9759558630482505, + "learning_rate": 9.115545029605129e-06, + "loss": 0.7795257568359375, + "step": 3133 + }, + { + "epoch": 1.10431718061674, + "grad_norm": 1.677150279034534, + "learning_rate": 9.10974161772667e-06, + "loss": 0.5443774461746216, + "step": 3134 + }, + { + "epoch": 1.1046696035242292, + "grad_norm": 1.4979331299176493, + "learning_rate": 9.103938508063223e-06, + "loss": 0.48989373445510864, + "step": 3135 + }, + { + "epoch": 1.105022026431718, + "grad_norm": 1.7384756252454785, + "learning_rate": 9.098135702584762e-06, + "loss": 0.5628808736801147, + "step": 3136 + }, + { + "epoch": 1.105374449339207, + "grad_norm": 1.7853238397751252, + "learning_rate": 9.092333203261168e-06, + "loss": 0.6549321413040161, + "step": 3137 + }, + { + "epoch": 1.105726872246696, + "grad_norm": 1.6854667721006384, + "learning_rate": 9.0865310120622e-06, + "loss": 0.7353606224060059, + "step": 3138 + }, + { + "epoch": 1.106079295154185, + "grad_norm": 1.4467352618974103, + "learning_rate": 9.080729130957528e-06, + "loss": 0.650668203830719, + "step": 3139 + }, + { + "epoch": 1.106431718061674, + "grad_norm": 1.4313841589857448, + "learning_rate": 9.07492756191671e-06, + "loss": 0.5618860721588135, + "step": 3140 + }, + { + "epoch": 1.106784140969163, + "grad_norm": 1.6263891772619556, + "learning_rate": 9.069126306909187e-06, + "loss": 0.5532773733139038, + "step": 3141 + }, + { + "epoch": 1.107136563876652, + "grad_norm": 1.5761547934103723, + "learning_rate": 9.06332536790432e-06, + "loss": 0.6240289211273193, + "step": 3142 + }, + { + "epoch": 1.1074889867841409, + "grad_norm": 1.6326282131144043, + "learning_rate": 9.057524746871335e-06, + "loss": 0.5952814221382141, + "step": 3143 + }, + { + "epoch": 1.10784140969163, + "grad_norm": 1.7063742447281478, + "learning_rate": 9.051724445779373e-06, + "loss": 0.6011646389961243, + "step": 3144 + }, + { + "epoch": 1.108193832599119, + "grad_norm": 1.54385403751274, + "learning_rate": 9.045924466597448e-06, + "loss": 0.6964641213417053, + "step": 3145 + }, + { + "epoch": 1.108546255506608, + "grad_norm": 1.9798851390043897, + "learning_rate": 9.040124811294473e-06, + "loss": 0.6821622848510742, + "step": 3146 + }, + { + "epoch": 1.1088986784140968, + "grad_norm": 1.569676973352834, + "learning_rate": 9.034325481839253e-06, + "loss": 0.5045080184936523, + "step": 3147 + }, + { + "epoch": 1.109251101321586, + "grad_norm": 1.608921739397865, + "learning_rate": 9.028526480200482e-06, + "loss": 0.5709735155105591, + "step": 3148 + }, + { + "epoch": 1.109603524229075, + "grad_norm": 1.6331449251948336, + "learning_rate": 9.022727808346731e-06, + "loss": 0.5882325172424316, + "step": 3149 + }, + { + "epoch": 1.1099559471365639, + "grad_norm": 1.6560869042500304, + "learning_rate": 9.016929468246482e-06, + "loss": 0.627426266670227, + "step": 3150 + }, + { + "epoch": 1.1103083700440528, + "grad_norm": 1.5720686051365462, + "learning_rate": 9.011131461868078e-06, + "loss": 0.42419761419296265, + "step": 3151 + }, + { + "epoch": 1.110660792951542, + "grad_norm": 1.487398401726564, + "learning_rate": 9.005333791179775e-06, + "loss": 0.5261023044586182, + "step": 3152 + }, + { + "epoch": 1.111013215859031, + "grad_norm": 1.853640852117203, + "learning_rate": 8.999536458149692e-06, + "loss": 0.6654448509216309, + "step": 3153 + }, + { + "epoch": 1.1113656387665198, + "grad_norm": 1.8252144061899127, + "learning_rate": 8.993739464745843e-06, + "loss": 0.5939514636993408, + "step": 3154 + }, + { + "epoch": 1.1117180616740088, + "grad_norm": 2.120048901517583, + "learning_rate": 8.987942812936133e-06, + "loss": 0.6381959319114685, + "step": 3155 + }, + { + "epoch": 1.1120704845814977, + "grad_norm": 1.5708485505419778, + "learning_rate": 8.982146504688343e-06, + "loss": 0.5474847555160522, + "step": 3156 + }, + { + "epoch": 1.1124229074889869, + "grad_norm": 1.9617265332983251, + "learning_rate": 8.97635054197013e-06, + "loss": 0.6306884288787842, + "step": 3157 + }, + { + "epoch": 1.1127753303964758, + "grad_norm": 1.6582794196349533, + "learning_rate": 8.97055492674906e-06, + "loss": 0.5988807678222656, + "step": 3158 + }, + { + "epoch": 1.1131277533039647, + "grad_norm": 1.4627681911625667, + "learning_rate": 8.964759660992547e-06, + "loss": 0.6316757202148438, + "step": 3159 + }, + { + "epoch": 1.1134801762114537, + "grad_norm": 2.1475966254528265, + "learning_rate": 8.958964746667917e-06, + "loss": 0.6031370162963867, + "step": 3160 + }, + { + "epoch": 1.1138325991189428, + "grad_norm": 1.631780585948097, + "learning_rate": 8.953170185742357e-06, + "loss": 0.6334977149963379, + "step": 3161 + }, + { + "epoch": 1.1141850220264318, + "grad_norm": 1.7666867258825858, + "learning_rate": 8.947375980182937e-06, + "loss": 0.49237731099128723, + "step": 3162 + }, + { + "epoch": 1.1145374449339207, + "grad_norm": 1.8113939325794732, + "learning_rate": 8.941582131956615e-06, + "loss": 0.7349523305892944, + "step": 3163 + }, + { + "epoch": 1.1148898678414096, + "grad_norm": 1.9764498599764084, + "learning_rate": 8.935788643030218e-06, + "loss": 0.5048422813415527, + "step": 3164 + }, + { + "epoch": 1.1152422907488986, + "grad_norm": 1.90381850621639, + "learning_rate": 8.92999551537046e-06, + "loss": 0.6217244267463684, + "step": 3165 + }, + { + "epoch": 1.1155947136563877, + "grad_norm": 1.6579628905821213, + "learning_rate": 8.924202750943926e-06, + "loss": 0.4949147701263428, + "step": 3166 + }, + { + "epoch": 1.1159471365638767, + "grad_norm": 1.8665150826118222, + "learning_rate": 8.918410351717074e-06, + "loss": 0.5975630283355713, + "step": 3167 + }, + { + "epoch": 1.1162995594713656, + "grad_norm": 1.8627553919144322, + "learning_rate": 8.91261831965625e-06, + "loss": 0.7546026110649109, + "step": 3168 + }, + { + "epoch": 1.1166519823788545, + "grad_norm": 1.8785066059323416, + "learning_rate": 8.906826656727665e-06, + "loss": 0.6238037347793579, + "step": 3169 + }, + { + "epoch": 1.1170044052863437, + "grad_norm": 1.7775910427875068, + "learning_rate": 8.901035364897407e-06, + "loss": 0.617587685585022, + "step": 3170 + }, + { + "epoch": 1.1173568281938326, + "grad_norm": 1.6345696523196545, + "learning_rate": 8.895244446131445e-06, + "loss": 0.4834432005882263, + "step": 3171 + }, + { + "epoch": 1.1177092511013216, + "grad_norm": 1.8061061322305951, + "learning_rate": 8.889453902395608e-06, + "loss": 0.614972710609436, + "step": 3172 + }, + { + "epoch": 1.1180616740088105, + "grad_norm": 2.26536947887869, + "learning_rate": 8.883663735655612e-06, + "loss": 0.6468379497528076, + "step": 3173 + }, + { + "epoch": 1.1184140969162994, + "grad_norm": 1.8154030785363677, + "learning_rate": 8.877873947877042e-06, + "loss": 0.6372466683387756, + "step": 3174 + }, + { + "epoch": 1.1187665198237886, + "grad_norm": 1.8831907584481906, + "learning_rate": 8.872084541025336e-06, + "loss": 0.6295863389968872, + "step": 3175 + }, + { + "epoch": 1.1191189427312775, + "grad_norm": 1.7211075291863254, + "learning_rate": 8.866295517065831e-06, + "loss": 0.6109524369239807, + "step": 3176 + }, + { + "epoch": 1.1194713656387665, + "grad_norm": 1.6861537948886334, + "learning_rate": 8.860506877963715e-06, + "loss": 0.6724812388420105, + "step": 3177 + }, + { + "epoch": 1.1198237885462554, + "grad_norm": 1.4091706259139964, + "learning_rate": 8.854718625684049e-06, + "loss": 0.6612162590026855, + "step": 3178 + }, + { + "epoch": 1.1201762114537446, + "grad_norm": 1.6332443405139663, + "learning_rate": 8.84893076219177e-06, + "loss": 0.6209636926651001, + "step": 3179 + }, + { + "epoch": 1.1205286343612335, + "grad_norm": 1.7567347030111673, + "learning_rate": 8.843143289451673e-06, + "loss": 0.8548281192779541, + "step": 3180 + }, + { + "epoch": 1.1208810572687224, + "grad_norm": 1.742397796953756, + "learning_rate": 8.837356209428428e-06, + "loss": 0.4621508717536926, + "step": 3181 + }, + { + "epoch": 1.1212334801762114, + "grad_norm": 1.8553184481302196, + "learning_rate": 8.831569524086568e-06, + "loss": 0.5065817832946777, + "step": 3182 + }, + { + "epoch": 1.1215859030837005, + "grad_norm": 1.5532313157641433, + "learning_rate": 8.825783235390488e-06, + "loss": 0.5467691421508789, + "step": 3183 + }, + { + "epoch": 1.1219383259911895, + "grad_norm": 1.3786030341795126, + "learning_rate": 8.81999734530446e-06, + "loss": 0.4938517212867737, + "step": 3184 + }, + { + "epoch": 1.1222907488986784, + "grad_norm": 1.4972934746199023, + "learning_rate": 8.814211855792609e-06, + "loss": 0.6125702857971191, + "step": 3185 + }, + { + "epoch": 1.1226431718061674, + "grad_norm": 1.427476145591487, + "learning_rate": 8.80842676881893e-06, + "loss": 0.5272841453552246, + "step": 3186 + }, + { + "epoch": 1.1229955947136563, + "grad_norm": 1.8463623605620603, + "learning_rate": 8.802642086347278e-06, + "loss": 0.5595715045928955, + "step": 3187 + }, + { + "epoch": 1.1233480176211454, + "grad_norm": 1.7533827268189746, + "learning_rate": 8.796857810341375e-06, + "loss": 0.7178677916526794, + "step": 3188 + }, + { + "epoch": 1.1237004405286344, + "grad_norm": 2.166791630557212, + "learning_rate": 8.791073942764806e-06, + "loss": 0.6000991463661194, + "step": 3189 + }, + { + "epoch": 1.1240528634361233, + "grad_norm": 1.7926160729471858, + "learning_rate": 8.785290485581008e-06, + "loss": 0.537361741065979, + "step": 3190 + }, + { + "epoch": 1.1244052863436123, + "grad_norm": 1.7666842188914018, + "learning_rate": 8.779507440753286e-06, + "loss": 0.7135556936264038, + "step": 3191 + }, + { + "epoch": 1.1247577092511014, + "grad_norm": 1.7053825384185084, + "learning_rate": 8.773724810244805e-06, + "loss": 0.501063346862793, + "step": 3192 + }, + { + "epoch": 1.1251101321585903, + "grad_norm": 1.679109568038749, + "learning_rate": 8.767942596018587e-06, + "loss": 0.6885302662849426, + "step": 3193 + }, + { + "epoch": 1.1254625550660793, + "grad_norm": 1.321748305255468, + "learning_rate": 8.762160800037516e-06, + "loss": 0.5902360081672668, + "step": 3194 + }, + { + "epoch": 1.1258149779735682, + "grad_norm": 1.687654327550192, + "learning_rate": 8.75637942426433e-06, + "loss": 0.6308953762054443, + "step": 3195 + }, + { + "epoch": 1.1261674008810574, + "grad_norm": 1.8380657710321036, + "learning_rate": 8.750598470661625e-06, + "loss": 0.5710124969482422, + "step": 3196 + }, + { + "epoch": 1.1265198237885463, + "grad_norm": 1.76295044659038, + "learning_rate": 8.744817941191862e-06, + "loss": 0.6110632419586182, + "step": 3197 + }, + { + "epoch": 1.1268722466960353, + "grad_norm": 1.7274451742305768, + "learning_rate": 8.73903783781734e-06, + "loss": 0.5274624824523926, + "step": 3198 + }, + { + "epoch": 1.1272246696035242, + "grad_norm": 1.549070468504263, + "learning_rate": 8.733258162500228e-06, + "loss": 0.6144713163375854, + "step": 3199 + }, + { + "epoch": 1.1275770925110131, + "grad_norm": 1.8001185698886477, + "learning_rate": 8.727478917202551e-06, + "loss": 0.6404621005058289, + "step": 3200 + }, + { + "epoch": 1.1279295154185023, + "grad_norm": 1.602548541775438, + "learning_rate": 8.721700103886177e-06, + "loss": 0.5693025588989258, + "step": 3201 + }, + { + "epoch": 1.1282819383259912, + "grad_norm": 1.6563446017851289, + "learning_rate": 8.715921724512838e-06, + "loss": 0.5631159543991089, + "step": 3202 + }, + { + "epoch": 1.1286343612334802, + "grad_norm": 1.5785191171510689, + "learning_rate": 8.710143781044113e-06, + "loss": 0.648078441619873, + "step": 3203 + }, + { + "epoch": 1.128986784140969, + "grad_norm": 2.0721270642934666, + "learning_rate": 8.704366275441426e-06, + "loss": 0.6858379244804382, + "step": 3204 + }, + { + "epoch": 1.1293392070484582, + "grad_norm": 1.8203927475030908, + "learning_rate": 8.698589209666074e-06, + "loss": 0.7244000434875488, + "step": 3205 + }, + { + "epoch": 1.1296916299559472, + "grad_norm": 1.7775130777760553, + "learning_rate": 8.692812585679182e-06, + "loss": 0.5918365716934204, + "step": 3206 + }, + { + "epoch": 1.1300440528634361, + "grad_norm": 1.8950041670387165, + "learning_rate": 8.687036405441733e-06, + "loss": 0.6893443465232849, + "step": 3207 + }, + { + "epoch": 1.130396475770925, + "grad_norm": 1.6934464725865028, + "learning_rate": 8.681260670914564e-06, + "loss": 0.729834794998169, + "step": 3208 + }, + { + "epoch": 1.130748898678414, + "grad_norm": 1.9278305082183818, + "learning_rate": 8.675485384058356e-06, + "loss": 0.6525821685791016, + "step": 3209 + }, + { + "epoch": 1.1311013215859032, + "grad_norm": 1.7892045210081244, + "learning_rate": 8.669710546833642e-06, + "loss": 0.6799874305725098, + "step": 3210 + }, + { + "epoch": 1.131453744493392, + "grad_norm": 1.6216385781826248, + "learning_rate": 8.6639361612008e-06, + "loss": 0.5614932775497437, + "step": 3211 + }, + { + "epoch": 1.131806167400881, + "grad_norm": 1.6912315117870094, + "learning_rate": 8.658162229120045e-06, + "loss": 0.5975101590156555, + "step": 3212 + }, + { + "epoch": 1.13215859030837, + "grad_norm": 1.7352702737909875, + "learning_rate": 8.652388752551458e-06, + "loss": 0.5367887020111084, + "step": 3213 + }, + { + "epoch": 1.1325110132158591, + "grad_norm": 1.360358935584503, + "learning_rate": 8.646615733454949e-06, + "loss": 0.4451865553855896, + "step": 3214 + }, + { + "epoch": 1.132863436123348, + "grad_norm": 1.8983821913108012, + "learning_rate": 8.64084317379028e-06, + "loss": 0.6482576131820679, + "step": 3215 + }, + { + "epoch": 1.133215859030837, + "grad_norm": 1.5858394578763535, + "learning_rate": 8.635071075517053e-06, + "loss": 0.5890318155288696, + "step": 3216 + }, + { + "epoch": 1.133568281938326, + "grad_norm": 1.6567929917802857, + "learning_rate": 8.629299440594719e-06, + "loss": 0.554576575756073, + "step": 3217 + }, + { + "epoch": 1.1339207048458149, + "grad_norm": 1.6966150183280715, + "learning_rate": 8.623528270982567e-06, + "loss": 0.5987116694450378, + "step": 3218 + }, + { + "epoch": 1.134273127753304, + "grad_norm": 1.8696533969224407, + "learning_rate": 8.617757568639731e-06, + "loss": 0.49857625365257263, + "step": 3219 + }, + { + "epoch": 1.134625550660793, + "grad_norm": 1.6960564098429034, + "learning_rate": 8.61198733552518e-06, + "loss": 0.6116641759872437, + "step": 3220 + }, + { + "epoch": 1.134977973568282, + "grad_norm": 1.6619215502907394, + "learning_rate": 8.606217573597738e-06, + "loss": 0.4346674978733063, + "step": 3221 + }, + { + "epoch": 1.1353303964757708, + "grad_norm": 1.6058889875943096, + "learning_rate": 8.600448284816046e-06, + "loss": 0.6973283290863037, + "step": 3222 + }, + { + "epoch": 1.13568281938326, + "grad_norm": 1.547791232560021, + "learning_rate": 8.594679471138613e-06, + "loss": 0.5457896590232849, + "step": 3223 + }, + { + "epoch": 1.136035242290749, + "grad_norm": 1.6457593373386994, + "learning_rate": 8.58891113452376e-06, + "loss": 0.4520479440689087, + "step": 3224 + }, + { + "epoch": 1.1363876651982379, + "grad_norm": 1.6501706928794149, + "learning_rate": 8.58314327692966e-06, + "loss": 0.6169587969779968, + "step": 3225 + }, + { + "epoch": 1.1367400881057268, + "grad_norm": 1.729795732302939, + "learning_rate": 8.577375900314327e-06, + "loss": 0.6398670673370361, + "step": 3226 + }, + { + "epoch": 1.1370925110132157, + "grad_norm": 1.6846614829900397, + "learning_rate": 8.571609006635604e-06, + "loss": 0.5772207975387573, + "step": 3227 + }, + { + "epoch": 1.137444933920705, + "grad_norm": 1.5622430074284195, + "learning_rate": 8.565842597851165e-06, + "loss": 0.5561503171920776, + "step": 3228 + }, + { + "epoch": 1.1377973568281938, + "grad_norm": 1.644881271079104, + "learning_rate": 8.560076675918537e-06, + "loss": 0.4702373743057251, + "step": 3229 + }, + { + "epoch": 1.1381497797356828, + "grad_norm": 1.778044829497574, + "learning_rate": 8.554311242795061e-06, + "loss": 0.5967564582824707, + "step": 3230 + }, + { + "epoch": 1.138502202643172, + "grad_norm": 1.782270527802186, + "learning_rate": 8.548546300437928e-06, + "loss": 0.4749453663825989, + "step": 3231 + }, + { + "epoch": 1.1388546255506609, + "grad_norm": 2.2009062727733046, + "learning_rate": 8.542781850804155e-06, + "loss": 0.6939869523048401, + "step": 3232 + }, + { + "epoch": 1.1392070484581498, + "grad_norm": 1.4327701228186707, + "learning_rate": 8.537017895850593e-06, + "loss": 0.5618892908096313, + "step": 3233 + }, + { + "epoch": 1.1395594713656387, + "grad_norm": 1.6784618730938181, + "learning_rate": 8.531254437533925e-06, + "loss": 0.6627654433250427, + "step": 3234 + }, + { + "epoch": 1.1399118942731277, + "grad_norm": 1.770712809653697, + "learning_rate": 8.525491477810671e-06, + "loss": 0.6365151405334473, + "step": 3235 + }, + { + "epoch": 1.1402643171806168, + "grad_norm": 1.6623213186798471, + "learning_rate": 8.519729018637164e-06, + "loss": 0.5207303762435913, + "step": 3236 + }, + { + "epoch": 1.1406167400881058, + "grad_norm": 1.8240600257881658, + "learning_rate": 8.513967061969594e-06, + "loss": 0.7469059228897095, + "step": 3237 + }, + { + "epoch": 1.1409691629955947, + "grad_norm": 1.7786802310337648, + "learning_rate": 8.508205609763955e-06, + "loss": 0.5778630971908569, + "step": 3238 + }, + { + "epoch": 1.1413215859030836, + "grad_norm": 1.756406665695002, + "learning_rate": 8.502444663976089e-06, + "loss": 0.5447480082511902, + "step": 3239 + }, + { + "epoch": 1.1416740088105728, + "grad_norm": 1.628690443424602, + "learning_rate": 8.496684226561653e-06, + "loss": 0.6002986431121826, + "step": 3240 + }, + { + "epoch": 1.1420264317180617, + "grad_norm": 1.7257255594282812, + "learning_rate": 8.490924299476133e-06, + "loss": 0.7627072930335999, + "step": 3241 + }, + { + "epoch": 1.1423788546255507, + "grad_norm": 1.725113553289998, + "learning_rate": 8.485164884674854e-06, + "loss": 0.6406078338623047, + "step": 3242 + }, + { + "epoch": 1.1427312775330396, + "grad_norm": 2.110533369358698, + "learning_rate": 8.479405984112949e-06, + "loss": 0.47047436237335205, + "step": 3243 + }, + { + "epoch": 1.1430837004405285, + "grad_norm": 2.0564519486525903, + "learning_rate": 8.473647599745393e-06, + "loss": 0.6702529191970825, + "step": 3244 + }, + { + "epoch": 1.1434361233480177, + "grad_norm": 2.1168699536348488, + "learning_rate": 8.467889733526977e-06, + "loss": 0.6570258140563965, + "step": 3245 + }, + { + "epoch": 1.1437885462555066, + "grad_norm": 11.021488641985083, + "learning_rate": 8.462132387412312e-06, + "loss": 0.6248423457145691, + "step": 3246 + }, + { + "epoch": 1.1441409691629956, + "grad_norm": 1.6339128666105858, + "learning_rate": 8.456375563355842e-06, + "loss": 0.7377427816390991, + "step": 3247 + }, + { + "epoch": 1.1444933920704845, + "grad_norm": 1.8159484011485405, + "learning_rate": 8.45061926331183e-06, + "loss": 0.6469020843505859, + "step": 3248 + }, + { + "epoch": 1.1448458149779737, + "grad_norm": 1.81461416151687, + "learning_rate": 8.444863489234356e-06, + "loss": 0.6417430639266968, + "step": 3249 + }, + { + "epoch": 1.1451982378854626, + "grad_norm": 1.7715952211280361, + "learning_rate": 8.439108243077335e-06, + "loss": 0.5447275638580322, + "step": 3250 + }, + { + "epoch": 1.1455506607929515, + "grad_norm": 1.8341737914542349, + "learning_rate": 8.433353526794484e-06, + "loss": 0.6621315479278564, + "step": 3251 + }, + { + "epoch": 1.1459030837004405, + "grad_norm": 1.850872292820976, + "learning_rate": 8.42759934233936e-06, + "loss": 0.5660392045974731, + "step": 3252 + }, + { + "epoch": 1.1462555066079294, + "grad_norm": 1.695638018183687, + "learning_rate": 8.42184569166532e-06, + "loss": 0.43074172735214233, + "step": 3253 + }, + { + "epoch": 1.1466079295154186, + "grad_norm": 1.6152519611154568, + "learning_rate": 8.416092576725554e-06, + "loss": 0.5863226056098938, + "step": 3254 + }, + { + "epoch": 1.1469603524229075, + "grad_norm": 1.8724827582882198, + "learning_rate": 8.410339999473067e-06, + "loss": 0.6003422737121582, + "step": 3255 + }, + { + "epoch": 1.1473127753303964, + "grad_norm": 1.806876842860533, + "learning_rate": 8.404587961860678e-06, + "loss": 0.6109241247177124, + "step": 3256 + }, + { + "epoch": 1.1476651982378854, + "grad_norm": 1.7768687099142642, + "learning_rate": 8.398836465841021e-06, + "loss": 0.5749140977859497, + "step": 3257 + }, + { + "epoch": 1.1480176211453745, + "grad_norm": 1.762377433704451, + "learning_rate": 8.393085513366557e-06, + "loss": 0.6920739412307739, + "step": 3258 + }, + { + "epoch": 1.1483700440528635, + "grad_norm": 1.903311052790267, + "learning_rate": 8.38733510638955e-06, + "loss": 0.6632573008537292, + "step": 3259 + }, + { + "epoch": 1.1487224669603524, + "grad_norm": 1.925929272799836, + "learning_rate": 8.381585246862091e-06, + "loss": 0.6396503448486328, + "step": 3260 + }, + { + "epoch": 1.1490748898678413, + "grad_norm": 4.327872701462553, + "learning_rate": 8.375835936736072e-06, + "loss": 0.5975937843322754, + "step": 3261 + }, + { + "epoch": 1.1494273127753303, + "grad_norm": 1.9097739370767552, + "learning_rate": 8.370087177963204e-06, + "loss": 0.6297920346260071, + "step": 3262 + }, + { + "epoch": 1.1497797356828194, + "grad_norm": 1.6773858737351708, + "learning_rate": 8.364338972495016e-06, + "loss": 0.7004375457763672, + "step": 3263 + }, + { + "epoch": 1.1501321585903084, + "grad_norm": 1.9905333664754346, + "learning_rate": 8.358591322282845e-06, + "loss": 0.5850871801376343, + "step": 3264 + }, + { + "epoch": 1.1504845814977973, + "grad_norm": 1.6216139435027066, + "learning_rate": 8.352844229277834e-06, + "loss": 0.493900865316391, + "step": 3265 + }, + { + "epoch": 1.1508370044052865, + "grad_norm": 1.8994324319983171, + "learning_rate": 8.34709769543095e-06, + "loss": 0.573354959487915, + "step": 3266 + }, + { + "epoch": 1.1511894273127754, + "grad_norm": 2.1672972359364175, + "learning_rate": 8.341351722692951e-06, + "loss": 0.7154442667961121, + "step": 3267 + }, + { + "epoch": 1.1515418502202643, + "grad_norm": 1.705511845117997, + "learning_rate": 8.335606313014432e-06, + "loss": 0.5429074764251709, + "step": 3268 + }, + { + "epoch": 1.1518942731277533, + "grad_norm": 1.8606068751906144, + "learning_rate": 8.329861468345768e-06, + "loss": 0.6938891410827637, + "step": 3269 + }, + { + "epoch": 1.1522466960352422, + "grad_norm": 5.765839224937511, + "learning_rate": 8.324117190637157e-06, + "loss": 0.7114205360412598, + "step": 3270 + }, + { + "epoch": 1.1525991189427314, + "grad_norm": 1.761532917196708, + "learning_rate": 8.318373481838605e-06, + "loss": 0.5353071093559265, + "step": 3271 + }, + { + "epoch": 1.1529515418502203, + "grad_norm": 1.931038515640054, + "learning_rate": 8.312630343899921e-06, + "loss": 0.7838516235351562, + "step": 3272 + }, + { + "epoch": 1.1533039647577092, + "grad_norm": 2.013028743927059, + "learning_rate": 8.306887778770724e-06, + "loss": 0.630479633808136, + "step": 3273 + }, + { + "epoch": 1.1536563876651982, + "grad_norm": 1.908388737326531, + "learning_rate": 8.301145788400438e-06, + "loss": 0.6568116545677185, + "step": 3274 + }, + { + "epoch": 1.1540088105726873, + "grad_norm": 1.4673620532583986, + "learning_rate": 8.295404374738278e-06, + "loss": 0.5410804748535156, + "step": 3275 + }, + { + "epoch": 1.1543612334801763, + "grad_norm": 2.0887831204496017, + "learning_rate": 8.289663539733292e-06, + "loss": 0.6699862480163574, + "step": 3276 + }, + { + "epoch": 1.1547136563876652, + "grad_norm": 2.146352543425904, + "learning_rate": 8.283923285334304e-06, + "loss": 0.6828576326370239, + "step": 3277 + }, + { + "epoch": 1.1550660792951541, + "grad_norm": 1.6441665475307043, + "learning_rate": 8.278183613489951e-06, + "loss": 0.5569214820861816, + "step": 3278 + }, + { + "epoch": 1.155418502202643, + "grad_norm": 1.5736783771881073, + "learning_rate": 8.27244452614868e-06, + "loss": 0.6276477575302124, + "step": 3279 + }, + { + "epoch": 1.1557709251101322, + "grad_norm": 1.639795393267647, + "learning_rate": 8.266706025258727e-06, + "loss": 0.5752792954444885, + "step": 3280 + }, + { + "epoch": 1.1561233480176212, + "grad_norm": 1.8007170708068962, + "learning_rate": 8.260968112768137e-06, + "loss": 0.6149388551712036, + "step": 3281 + }, + { + "epoch": 1.1564757709251101, + "grad_norm": 1.8241425629966381, + "learning_rate": 8.255230790624755e-06, + "loss": 0.6399196982383728, + "step": 3282 + }, + { + "epoch": 1.156828193832599, + "grad_norm": 1.8065599712551461, + "learning_rate": 8.249494060776215e-06, + "loss": 0.6927458047866821, + "step": 3283 + }, + { + "epoch": 1.1571806167400882, + "grad_norm": 1.5535864037785454, + "learning_rate": 8.243757925169968e-06, + "loss": 0.5843946933746338, + "step": 3284 + }, + { + "epoch": 1.1575330396475771, + "grad_norm": 1.7771012211418213, + "learning_rate": 8.238022385753248e-06, + "loss": 0.6469332575798035, + "step": 3285 + }, + { + "epoch": 1.157885462555066, + "grad_norm": 1.5500454202505596, + "learning_rate": 8.23228744447309e-06, + "loss": 0.572630763053894, + "step": 3286 + }, + { + "epoch": 1.158237885462555, + "grad_norm": 1.7219264264044976, + "learning_rate": 8.226553103276335e-06, + "loss": 0.6872239112854004, + "step": 3287 + }, + { + "epoch": 1.158590308370044, + "grad_norm": 1.7206454172461807, + "learning_rate": 8.220819364109607e-06, + "loss": 0.5116995573043823, + "step": 3288 + }, + { + "epoch": 1.1589427312775331, + "grad_norm": 1.7411708693012447, + "learning_rate": 8.215086228919336e-06, + "loss": 0.6179347038269043, + "step": 3289 + }, + { + "epoch": 1.159295154185022, + "grad_norm": 1.5098995111565061, + "learning_rate": 8.209353699651745e-06, + "loss": 0.573688805103302, + "step": 3290 + }, + { + "epoch": 1.159647577092511, + "grad_norm": 1.6209084651188936, + "learning_rate": 8.20362177825284e-06, + "loss": 0.6622583866119385, + "step": 3291 + }, + { + "epoch": 1.16, + "grad_norm": 1.9043199482736668, + "learning_rate": 8.197890466668441e-06, + "loss": 0.4945096969604492, + "step": 3292 + }, + { + "epoch": 1.160352422907489, + "grad_norm": 1.9477760218669748, + "learning_rate": 8.19215976684414e-06, + "loss": 0.5657082796096802, + "step": 3293 + }, + { + "epoch": 1.160704845814978, + "grad_norm": 1.7301213281073105, + "learning_rate": 8.186429680725339e-06, + "loss": 0.5684623122215271, + "step": 3294 + }, + { + "epoch": 1.161057268722467, + "grad_norm": 1.7781389678625354, + "learning_rate": 8.180700210257223e-06, + "loss": 0.567638635635376, + "step": 3295 + }, + { + "epoch": 1.1614096916299559, + "grad_norm": 1.8973989975016394, + "learning_rate": 8.174971357384762e-06, + "loss": 0.7182992696762085, + "step": 3296 + }, + { + "epoch": 1.1617621145374448, + "grad_norm": 1.7202032555937063, + "learning_rate": 8.169243124052731e-06, + "loss": 0.7188737392425537, + "step": 3297 + }, + { + "epoch": 1.162114537444934, + "grad_norm": 1.632750713102644, + "learning_rate": 8.163515512205687e-06, + "loss": 0.5532418489456177, + "step": 3298 + }, + { + "epoch": 1.162466960352423, + "grad_norm": 2.2725291479645136, + "learning_rate": 8.157788523787967e-06, + "loss": 0.7167447209358215, + "step": 3299 + }, + { + "epoch": 1.1628193832599119, + "grad_norm": 1.8053860419209504, + "learning_rate": 8.152062160743716e-06, + "loss": 0.633411169052124, + "step": 3300 + }, + { + "epoch": 1.1631718061674008, + "grad_norm": 1.8006555184567121, + "learning_rate": 8.146336425016849e-06, + "loss": 0.6686321496963501, + "step": 3301 + }, + { + "epoch": 1.16352422907489, + "grad_norm": 1.884331587638867, + "learning_rate": 8.140611318551078e-06, + "loss": 0.608701765537262, + "step": 3302 + }, + { + "epoch": 1.1638766519823789, + "grad_norm": 1.6532674404979102, + "learning_rate": 8.1348868432899e-06, + "loss": 0.5607466101646423, + "step": 3303 + }, + { + "epoch": 1.1642290748898678, + "grad_norm": 1.9224536271892947, + "learning_rate": 8.12916300117659e-06, + "loss": 0.6397457122802734, + "step": 3304 + }, + { + "epoch": 1.1645814977973568, + "grad_norm": 1.9075190910370474, + "learning_rate": 8.123439794154223e-06, + "loss": 0.6681507229804993, + "step": 3305 + }, + { + "epoch": 1.1649339207048457, + "grad_norm": 1.7601065273352539, + "learning_rate": 8.117717224165645e-06, + "loss": 0.5549972057342529, + "step": 3306 + }, + { + "epoch": 1.1652863436123349, + "grad_norm": 1.9981914923817063, + "learning_rate": 8.111995293153486e-06, + "loss": 0.7519058585166931, + "step": 3307 + }, + { + "epoch": 1.1656387665198238, + "grad_norm": 1.8817978978557874, + "learning_rate": 8.106274003060172e-06, + "loss": 0.7100121378898621, + "step": 3308 + }, + { + "epoch": 1.1659911894273127, + "grad_norm": 2.081586750876693, + "learning_rate": 8.100553355827897e-06, + "loss": 0.6297321319580078, + "step": 3309 + }, + { + "epoch": 1.1663436123348019, + "grad_norm": 2.2854313216105635, + "learning_rate": 8.094833353398645e-06, + "loss": 0.6875895857810974, + "step": 3310 + }, + { + "epoch": 1.1666960352422908, + "grad_norm": 1.7297215389141958, + "learning_rate": 8.08911399771418e-06, + "loss": 0.5369099974632263, + "step": 3311 + }, + { + "epoch": 1.1670484581497798, + "grad_norm": 1.7209622601094259, + "learning_rate": 8.083395290716042e-06, + "loss": 0.5598124265670776, + "step": 3312 + }, + { + "epoch": 1.1674008810572687, + "grad_norm": 1.6153396072397332, + "learning_rate": 8.077677234345557e-06, + "loss": 0.6438342332839966, + "step": 3313 + }, + { + "epoch": 1.1677533039647576, + "grad_norm": 1.649767256033485, + "learning_rate": 8.07195983054383e-06, + "loss": 0.5558618307113647, + "step": 3314 + }, + { + "epoch": 1.1681057268722468, + "grad_norm": 1.744681713922102, + "learning_rate": 8.06624308125173e-06, + "loss": 0.5729602575302124, + "step": 3315 + }, + { + "epoch": 1.1684581497797357, + "grad_norm": 2.294706401477936, + "learning_rate": 8.060526988409929e-06, + "loss": 0.5094903707504272, + "step": 3316 + }, + { + "epoch": 1.1688105726872247, + "grad_norm": 1.6352779890455922, + "learning_rate": 8.054811553958853e-06, + "loss": 0.6605818867683411, + "step": 3317 + }, + { + "epoch": 1.1691629955947136, + "grad_norm": 2.240048633930669, + "learning_rate": 8.04909677983872e-06, + "loss": 0.7929576635360718, + "step": 3318 + }, + { + "epoch": 1.1695154185022028, + "grad_norm": 1.7445241989865017, + "learning_rate": 8.043382667989514e-06, + "loss": 0.5915192365646362, + "step": 3319 + }, + { + "epoch": 1.1698678414096917, + "grad_norm": 1.6537456786938194, + "learning_rate": 8.037669220351e-06, + "loss": 0.5923853516578674, + "step": 3320 + }, + { + "epoch": 1.1702202643171806, + "grad_norm": 1.7692219343864357, + "learning_rate": 8.031956438862718e-06, + "loss": 0.7034223079681396, + "step": 3321 + }, + { + "epoch": 1.1705726872246696, + "grad_norm": 1.699093684077835, + "learning_rate": 8.026244325463975e-06, + "loss": 0.6093307733535767, + "step": 3322 + }, + { + "epoch": 1.1709251101321585, + "grad_norm": 1.820021264359909, + "learning_rate": 8.020532882093862e-06, + "loss": 0.5709424614906311, + "step": 3323 + }, + { + "epoch": 1.1712775330396477, + "grad_norm": 1.6327248259933085, + "learning_rate": 8.01482211069123e-06, + "loss": 0.5242069959640503, + "step": 3324 + }, + { + "epoch": 1.1716299559471366, + "grad_norm": 1.8755413800206977, + "learning_rate": 8.009112013194707e-06, + "loss": 0.5869580507278442, + "step": 3325 + }, + { + "epoch": 1.1719823788546255, + "grad_norm": 1.927667149386539, + "learning_rate": 8.0034025915427e-06, + "loss": 0.7281460762023926, + "step": 3326 + }, + { + "epoch": 1.1723348017621145, + "grad_norm": 1.8020991914636244, + "learning_rate": 7.997693847673378e-06, + "loss": 0.6877723336219788, + "step": 3327 + }, + { + "epoch": 1.1726872246696036, + "grad_norm": 1.4739994768631006, + "learning_rate": 7.991985783524676e-06, + "loss": 0.6045002937316895, + "step": 3328 + }, + { + "epoch": 1.1730396475770926, + "grad_norm": 1.7637996531853402, + "learning_rate": 7.986278401034315e-06, + "loss": 0.5698690414428711, + "step": 3329 + }, + { + "epoch": 1.1733920704845815, + "grad_norm": 1.879664532548966, + "learning_rate": 7.980571702139759e-06, + "loss": 0.6802438497543335, + "step": 3330 + }, + { + "epoch": 1.1737444933920704, + "grad_norm": 1.9432824884843154, + "learning_rate": 7.974865688778271e-06, + "loss": 0.5840654373168945, + "step": 3331 + }, + { + "epoch": 1.1740969162995594, + "grad_norm": 1.7557288678447098, + "learning_rate": 7.969160362886855e-06, + "loss": 0.5203073024749756, + "step": 3332 + }, + { + "epoch": 1.1744493392070485, + "grad_norm": 1.5188701776399616, + "learning_rate": 7.963455726402292e-06, + "loss": 0.4558306932449341, + "step": 3333 + }, + { + "epoch": 1.1748017621145375, + "grad_norm": 1.8464169088081481, + "learning_rate": 7.957751781261132e-06, + "loss": 0.6200483441352844, + "step": 3334 + }, + { + "epoch": 1.1751541850220264, + "grad_norm": 1.4009839443781218, + "learning_rate": 7.952048529399686e-06, + "loss": 0.559386670589447, + "step": 3335 + }, + { + "epoch": 1.1755066079295153, + "grad_norm": 1.5776847118393618, + "learning_rate": 7.946345972754026e-06, + "loss": 0.5521356463432312, + "step": 3336 + }, + { + "epoch": 1.1758590308370045, + "grad_norm": 1.6725655120909741, + "learning_rate": 7.940644113260001e-06, + "loss": 0.6235495805740356, + "step": 3337 + }, + { + "epoch": 1.1762114537444934, + "grad_norm": 1.6364629990686756, + "learning_rate": 7.934942952853203e-06, + "loss": 0.5196648836135864, + "step": 3338 + }, + { + "epoch": 1.1765638766519824, + "grad_norm": 1.658819201732712, + "learning_rate": 7.929242493469013e-06, + "loss": 0.5959422588348389, + "step": 3339 + }, + { + "epoch": 1.1769162995594713, + "grad_norm": 1.8867606277211662, + "learning_rate": 7.923542737042549e-06, + "loss": 0.5400167107582092, + "step": 3340 + }, + { + "epoch": 1.1772687224669602, + "grad_norm": 1.8686352871929341, + "learning_rate": 7.917843685508702e-06, + "loss": 0.688996434211731, + "step": 3341 + }, + { + "epoch": 1.1776211453744494, + "grad_norm": 1.844624213320976, + "learning_rate": 7.912145340802127e-06, + "loss": 0.623216450214386, + "step": 3342 + }, + { + "epoch": 1.1779735682819383, + "grad_norm": 1.7951119497780943, + "learning_rate": 7.906447704857233e-06, + "loss": 0.587382435798645, + "step": 3343 + }, + { + "epoch": 1.1783259911894273, + "grad_norm": 1.4508698182802122, + "learning_rate": 7.900750779608187e-06, + "loss": 0.6033053398132324, + "step": 3344 + }, + { + "epoch": 1.1786784140969162, + "grad_norm": 1.5026274052311877, + "learning_rate": 7.895054566988924e-06, + "loss": 0.557671308517456, + "step": 3345 + }, + { + "epoch": 1.1790308370044054, + "grad_norm": 1.6193785911353318, + "learning_rate": 7.889359068933122e-06, + "loss": 0.4550681710243225, + "step": 3346 + }, + { + "epoch": 1.1793832599118943, + "grad_norm": 1.7532225132073032, + "learning_rate": 7.883664287374235e-06, + "loss": 0.6417531967163086, + "step": 3347 + }, + { + "epoch": 1.1797356828193832, + "grad_norm": 2.046641045277204, + "learning_rate": 7.877970224245458e-06, + "loss": 0.703549861907959, + "step": 3348 + }, + { + "epoch": 1.1800881057268722, + "grad_norm": 1.9966595548369739, + "learning_rate": 7.87227688147975e-06, + "loss": 0.7438976764678955, + "step": 3349 + }, + { + "epoch": 1.1804405286343613, + "grad_norm": 1.9757665254478705, + "learning_rate": 7.866584261009823e-06, + "loss": 0.5563932657241821, + "step": 3350 + }, + { + "epoch": 1.1807929515418503, + "grad_norm": 1.9705828017858218, + "learning_rate": 7.860892364768145e-06, + "loss": 0.6332740783691406, + "step": 3351 + }, + { + "epoch": 1.1811453744493392, + "grad_norm": 1.6800252042998722, + "learning_rate": 7.855201194686938e-06, + "loss": 0.5207923650741577, + "step": 3352 + }, + { + "epoch": 1.1814977973568281, + "grad_norm": 1.704285155728578, + "learning_rate": 7.849510752698179e-06, + "loss": 0.5930209755897522, + "step": 3353 + }, + { + "epoch": 1.1818502202643173, + "grad_norm": 1.9626347095192314, + "learning_rate": 7.843821040733588e-06, + "loss": 0.6207472085952759, + "step": 3354 + }, + { + "epoch": 1.1822026431718062, + "grad_norm": 1.631891920380694, + "learning_rate": 7.838132060724657e-06, + "loss": 0.5487867593765259, + "step": 3355 + }, + { + "epoch": 1.1825550660792952, + "grad_norm": 1.719446635213068, + "learning_rate": 7.83244381460261e-06, + "loss": 0.5457941889762878, + "step": 3356 + }, + { + "epoch": 1.182907488986784, + "grad_norm": 4.79087339281713, + "learning_rate": 7.826756304298428e-06, + "loss": 0.5203769207000732, + "step": 3357 + }, + { + "epoch": 1.183259911894273, + "grad_norm": 2.2130523974851006, + "learning_rate": 7.821069531742848e-06, + "loss": 0.7241770029067993, + "step": 3358 + }, + { + "epoch": 1.1836123348017622, + "grad_norm": 1.872241533824603, + "learning_rate": 7.815383498866351e-06, + "loss": 0.5085904598236084, + "step": 3359 + }, + { + "epoch": 1.1839647577092511, + "grad_norm": 1.7457024495825946, + "learning_rate": 7.80969820759917e-06, + "loss": 0.6219276785850525, + "step": 3360 + }, + { + "epoch": 1.18431718061674, + "grad_norm": 1.657619548935653, + "learning_rate": 7.804013659871286e-06, + "loss": 0.5621576309204102, + "step": 3361 + }, + { + "epoch": 1.184669603524229, + "grad_norm": 2.006942738555184, + "learning_rate": 7.798329857612415e-06, + "loss": 0.6862529516220093, + "step": 3362 + }, + { + "epoch": 1.1850220264317182, + "grad_norm": 1.6254700608957282, + "learning_rate": 7.792646802752045e-06, + "loss": 0.5536706447601318, + "step": 3363 + }, + { + "epoch": 1.185374449339207, + "grad_norm": 1.8365676060407183, + "learning_rate": 7.786964497219389e-06, + "loss": 0.7158493995666504, + "step": 3364 + }, + { + "epoch": 1.185726872246696, + "grad_norm": 1.5882377854785632, + "learning_rate": 7.781282942943411e-06, + "loss": 0.6510338187217712, + "step": 3365 + }, + { + "epoch": 1.186079295154185, + "grad_norm": 1.6887309758558333, + "learning_rate": 7.775602141852827e-06, + "loss": 0.4999651312828064, + "step": 3366 + }, + { + "epoch": 1.186431718061674, + "grad_norm": 1.7482854003458987, + "learning_rate": 7.769922095876088e-06, + "loss": 0.566371738910675, + "step": 3367 + }, + { + "epoch": 1.186784140969163, + "grad_norm": 1.8523910267151578, + "learning_rate": 7.764242806941396e-06, + "loss": 0.6424880623817444, + "step": 3368 + }, + { + "epoch": 1.187136563876652, + "grad_norm": 1.7770666290685069, + "learning_rate": 7.758564276976696e-06, + "loss": 0.6731792688369751, + "step": 3369 + }, + { + "epoch": 1.187488986784141, + "grad_norm": 1.8284341736993877, + "learning_rate": 7.752886507909661e-06, + "loss": 0.7350698113441467, + "step": 3370 + }, + { + "epoch": 1.1878414096916299, + "grad_norm": 1.6211597569244138, + "learning_rate": 7.747209501667729e-06, + "loss": 0.49212586879730225, + "step": 3371 + }, + { + "epoch": 1.188193832599119, + "grad_norm": 1.8399284999038652, + "learning_rate": 7.741533260178058e-06, + "loss": 0.46775591373443604, + "step": 3372 + }, + { + "epoch": 1.188546255506608, + "grad_norm": 1.9173381710912725, + "learning_rate": 7.73585778536756e-06, + "loss": 0.7006367444992065, + "step": 3373 + }, + { + "epoch": 1.188898678414097, + "grad_norm": 1.9011259462553447, + "learning_rate": 7.730183079162882e-06, + "loss": 0.6403789520263672, + "step": 3374 + }, + { + "epoch": 1.1892511013215858, + "grad_norm": 1.7192698764020407, + "learning_rate": 7.724509143490409e-06, + "loss": 0.5788881778717041, + "step": 3375 + }, + { + "epoch": 1.1896035242290748, + "grad_norm": 1.8160886708158774, + "learning_rate": 7.718835980276265e-06, + "loss": 0.5216118693351746, + "step": 3376 + }, + { + "epoch": 1.189955947136564, + "grad_norm": 1.8022868379388808, + "learning_rate": 7.713163591446318e-06, + "loss": 0.5951248407363892, + "step": 3377 + }, + { + "epoch": 1.1903083700440529, + "grad_norm": 1.7460515067285554, + "learning_rate": 7.707491978926157e-06, + "loss": 0.4975050687789917, + "step": 3378 + }, + { + "epoch": 1.1906607929515418, + "grad_norm": 1.770763460120106, + "learning_rate": 7.701821144641127e-06, + "loss": 0.6019243001937866, + "step": 3379 + }, + { + "epoch": 1.1910132158590307, + "grad_norm": 1.7832166509700509, + "learning_rate": 7.696151090516292e-06, + "loss": 0.6395450830459595, + "step": 3380 + }, + { + "epoch": 1.19136563876652, + "grad_norm": 1.6347986183513594, + "learning_rate": 7.690481818476468e-06, + "loss": 0.579787015914917, + "step": 3381 + }, + { + "epoch": 1.1917180616740088, + "grad_norm": 1.4744637046036069, + "learning_rate": 7.684813330446191e-06, + "loss": 0.5136005878448486, + "step": 3382 + }, + { + "epoch": 1.1920704845814978, + "grad_norm": 1.7266158280823927, + "learning_rate": 7.679145628349734e-06, + "loss": 0.6639782190322876, + "step": 3383 + }, + { + "epoch": 1.1924229074889867, + "grad_norm": 1.8900727159770023, + "learning_rate": 7.673478714111111e-06, + "loss": 0.5575984716415405, + "step": 3384 + }, + { + "epoch": 1.1927753303964757, + "grad_norm": 2.0885094289190658, + "learning_rate": 7.667812589654062e-06, + "loss": 0.6456045508384705, + "step": 3385 + }, + { + "epoch": 1.1931277533039648, + "grad_norm": 1.9286041654650978, + "learning_rate": 7.662147256902055e-06, + "loss": 0.6936196088790894, + "step": 3386 + }, + { + "epoch": 1.1934801762114537, + "grad_norm": 1.758654368664718, + "learning_rate": 7.656482717778299e-06, + "loss": 0.5490384697914124, + "step": 3387 + }, + { + "epoch": 1.1938325991189427, + "grad_norm": 1.9621511017976598, + "learning_rate": 7.650818974205727e-06, + "loss": 0.6973621845245361, + "step": 3388 + }, + { + "epoch": 1.1941850220264318, + "grad_norm": 1.835769632858156, + "learning_rate": 7.645156028107005e-06, + "loss": 0.7471047639846802, + "step": 3389 + }, + { + "epoch": 1.1945374449339208, + "grad_norm": 1.7902415027725214, + "learning_rate": 7.639493881404526e-06, + "loss": 0.6205108165740967, + "step": 3390 + }, + { + "epoch": 1.1948898678414097, + "grad_norm": 1.6920866725907067, + "learning_rate": 7.63383253602041e-06, + "loss": 0.747038722038269, + "step": 3391 + }, + { + "epoch": 1.1952422907488987, + "grad_norm": 1.5771320255200836, + "learning_rate": 7.628171993876514e-06, + "loss": 0.5185794830322266, + "step": 3392 + }, + { + "epoch": 1.1955947136563876, + "grad_norm": 1.6878325344643712, + "learning_rate": 7.6225122568944124e-06, + "loss": 0.6059385538101196, + "step": 3393 + }, + { + "epoch": 1.1959471365638767, + "grad_norm": 1.6275144870635614, + "learning_rate": 7.6168533269954045e-06, + "loss": 0.5154507160186768, + "step": 3394 + }, + { + "epoch": 1.1962995594713657, + "grad_norm": 1.8584269669132367, + "learning_rate": 7.611195206100529e-06, + "loss": 0.684306263923645, + "step": 3395 + }, + { + "epoch": 1.1966519823788546, + "grad_norm": 1.60676147024925, + "learning_rate": 7.605537896130537e-06, + "loss": 0.5637205839157104, + "step": 3396 + }, + { + "epoch": 1.1970044052863436, + "grad_norm": 2.099988274984523, + "learning_rate": 7.599881399005913e-06, + "loss": 0.700809121131897, + "step": 3397 + }, + { + "epoch": 1.1973568281938327, + "grad_norm": 1.8285381374549698, + "learning_rate": 7.594225716646859e-06, + "loss": 0.45139041543006897, + "step": 3398 + }, + { + "epoch": 1.1977092511013216, + "grad_norm": 1.9616153744225684, + "learning_rate": 7.588570850973301e-06, + "loss": 0.6623016595840454, + "step": 3399 + }, + { + "epoch": 1.1980616740088106, + "grad_norm": 1.5510325285611402, + "learning_rate": 7.582916803904899e-06, + "loss": 0.47430598735809326, + "step": 3400 + }, + { + "epoch": 1.1984140969162995, + "grad_norm": 1.7180906175268718, + "learning_rate": 7.57726357736101e-06, + "loss": 0.7190637588500977, + "step": 3401 + }, + { + "epoch": 1.1987665198237885, + "grad_norm": 1.4703339836450204, + "learning_rate": 7.571611173260747e-06, + "loss": 0.552079439163208, + "step": 3402 + }, + { + "epoch": 1.1991189427312776, + "grad_norm": 1.665813020849203, + "learning_rate": 7.565959593522914e-06, + "loss": 0.5499744415283203, + "step": 3403 + }, + { + "epoch": 1.1994713656387666, + "grad_norm": 1.6507149154277247, + "learning_rate": 7.560308840066046e-06, + "loss": 0.6013774871826172, + "step": 3404 + }, + { + "epoch": 1.1998237885462555, + "grad_norm": 1.5847999964914972, + "learning_rate": 7.554658914808404e-06, + "loss": 0.5489538908004761, + "step": 3405 + }, + { + "epoch": 1.2001762114537444, + "grad_norm": 1.72263968265959, + "learning_rate": 7.549009819667956e-06, + "loss": 0.6124382615089417, + "step": 3406 + }, + { + "epoch": 1.2005286343612336, + "grad_norm": 2.1073738195754594, + "learning_rate": 7.543361556562397e-06, + "loss": 0.6895862817764282, + "step": 3407 + }, + { + "epoch": 1.2008810572687225, + "grad_norm": 2.063900978481081, + "learning_rate": 7.537714127409139e-06, + "loss": 0.6632197499275208, + "step": 3408 + }, + { + "epoch": 1.2012334801762115, + "grad_norm": 1.6352648722318401, + "learning_rate": 7.5320675341253e-06, + "loss": 0.5940145254135132, + "step": 3409 + }, + { + "epoch": 1.2015859030837004, + "grad_norm": 1.884013328310988, + "learning_rate": 7.526421778627735e-06, + "loss": 0.646323561668396, + "step": 3410 + }, + { + "epoch": 1.2019383259911893, + "grad_norm": 1.7070941231545174, + "learning_rate": 7.520776862832993e-06, + "loss": 0.6173659563064575, + "step": 3411 + }, + { + "epoch": 1.2022907488986785, + "grad_norm": 1.8582208465763577, + "learning_rate": 7.515132788657347e-06, + "loss": 0.574191689491272, + "step": 3412 + }, + { + "epoch": 1.2026431718061674, + "grad_norm": 1.9220370982111243, + "learning_rate": 7.50948955801679e-06, + "loss": 0.6243089437484741, + "step": 3413 + }, + { + "epoch": 1.2029955947136564, + "grad_norm": 1.7949632694678572, + "learning_rate": 7.503847172827022e-06, + "loss": 0.692270040512085, + "step": 3414 + }, + { + "epoch": 1.2033480176211453, + "grad_norm": 1.6803082040464332, + "learning_rate": 7.498205635003451e-06, + "loss": 0.5929970145225525, + "step": 3415 + }, + { + "epoch": 1.2037004405286345, + "grad_norm": 1.6077232593078599, + "learning_rate": 7.4925649464612126e-06, + "loss": 0.5479272603988647, + "step": 3416 + }, + { + "epoch": 1.2040528634361234, + "grad_norm": 1.5415384890909907, + "learning_rate": 7.486925109115135e-06, + "loss": 0.5923635363578796, + "step": 3417 + }, + { + "epoch": 1.2044052863436123, + "grad_norm": 1.7506756122488851, + "learning_rate": 7.48128612487978e-06, + "loss": 0.6530192494392395, + "step": 3418 + }, + { + "epoch": 1.2047577092511013, + "grad_norm": 1.533550542452438, + "learning_rate": 7.475647995669397e-06, + "loss": 0.5104716420173645, + "step": 3419 + }, + { + "epoch": 1.2051101321585902, + "grad_norm": 1.8415327152950194, + "learning_rate": 7.470010723397958e-06, + "loss": 0.6526790261268616, + "step": 3420 + }, + { + "epoch": 1.2054625550660794, + "grad_norm": 1.746747219195987, + "learning_rate": 7.464374309979143e-06, + "loss": 0.5985254645347595, + "step": 3421 + }, + { + "epoch": 1.2058149779735683, + "grad_norm": 1.9679342498420438, + "learning_rate": 7.458738757326336e-06, + "loss": 0.6575271487236023, + "step": 3422 + }, + { + "epoch": 1.2061674008810572, + "grad_norm": 1.7353179250025277, + "learning_rate": 7.453104067352637e-06, + "loss": 0.5906708836555481, + "step": 3423 + }, + { + "epoch": 1.2065198237885462, + "grad_norm": 1.7518769855954601, + "learning_rate": 7.4474702419708465e-06, + "loss": 0.7992517352104187, + "step": 3424 + }, + { + "epoch": 1.2068722466960353, + "grad_norm": 1.7067520122752557, + "learning_rate": 7.4418372830934645e-06, + "loss": 0.5935543179512024, + "step": 3425 + }, + { + "epoch": 1.2072246696035243, + "grad_norm": 1.877304862966978, + "learning_rate": 7.436205192632719e-06, + "loss": 0.7166613340377808, + "step": 3426 + }, + { + "epoch": 1.2075770925110132, + "grad_norm": 1.7575954983917004, + "learning_rate": 7.430573972500519e-06, + "loss": 0.5254578590393066, + "step": 3427 + }, + { + "epoch": 1.2079295154185021, + "grad_norm": 1.7449214411247376, + "learning_rate": 7.42494362460849e-06, + "loss": 0.6586379408836365, + "step": 3428 + }, + { + "epoch": 1.208281938325991, + "grad_norm": 1.7864206478373184, + "learning_rate": 7.419314150867964e-06, + "loss": 0.6960606575012207, + "step": 3429 + }, + { + "epoch": 1.2086343612334802, + "grad_norm": 1.7557785377406303, + "learning_rate": 7.413685553189969e-06, + "loss": 0.6107728481292725, + "step": 3430 + }, + { + "epoch": 1.2089867841409692, + "grad_norm": 1.624755754090177, + "learning_rate": 7.408057833485241e-06, + "loss": 0.6446499824523926, + "step": 3431 + }, + { + "epoch": 1.209339207048458, + "grad_norm": 1.9153166988080477, + "learning_rate": 7.402430993664216e-06, + "loss": 0.7070472240447998, + "step": 3432 + }, + { + "epoch": 1.2096916299559473, + "grad_norm": 2.004011228140917, + "learning_rate": 7.396805035637023e-06, + "loss": 0.5919365882873535, + "step": 3433 + }, + { + "epoch": 1.2100440528634362, + "grad_norm": 1.7861550041093852, + "learning_rate": 7.391179961313512e-06, + "loss": 0.5975243449211121, + "step": 3434 + }, + { + "epoch": 1.2103964757709251, + "grad_norm": 1.6863010997131964, + "learning_rate": 7.385555772603212e-06, + "loss": 0.5772840976715088, + "step": 3435 + }, + { + "epoch": 1.210748898678414, + "grad_norm": 1.8451401620227157, + "learning_rate": 7.379932471415362e-06, + "loss": 0.7335072755813599, + "step": 3436 + }, + { + "epoch": 1.211101321585903, + "grad_norm": 2.0255796426124877, + "learning_rate": 7.3743100596589e-06, + "loss": 0.6214553713798523, + "step": 3437 + }, + { + "epoch": 1.2114537444933922, + "grad_norm": 1.8204785128516552, + "learning_rate": 7.368688539242457e-06, + "loss": 0.6515316963195801, + "step": 3438 + }, + { + "epoch": 1.211806167400881, + "grad_norm": 1.778475729690813, + "learning_rate": 7.3630679120743665e-06, + "loss": 0.6479551196098328, + "step": 3439 + }, + { + "epoch": 1.21215859030837, + "grad_norm": 1.8992442060407408, + "learning_rate": 7.357448180062657e-06, + "loss": 0.6195069551467896, + "step": 3440 + }, + { + "epoch": 1.212511013215859, + "grad_norm": 1.8044588174946172, + "learning_rate": 7.351829345115047e-06, + "loss": 0.5939193964004517, + "step": 3441 + }, + { + "epoch": 1.2128634361233481, + "grad_norm": 1.7404213735338998, + "learning_rate": 7.346211409138964e-06, + "loss": 0.6346434354782104, + "step": 3442 + }, + { + "epoch": 1.213215859030837, + "grad_norm": 1.7854241859310716, + "learning_rate": 7.340594374041516e-06, + "loss": 0.5924171209335327, + "step": 3443 + }, + { + "epoch": 1.213568281938326, + "grad_norm": 1.4550427635518266, + "learning_rate": 7.334978241729514e-06, + "loss": 0.48560285568237305, + "step": 3444 + }, + { + "epoch": 1.213920704845815, + "grad_norm": 2.0456790867838865, + "learning_rate": 7.329363014109463e-06, + "loss": 0.643998384475708, + "step": 3445 + }, + { + "epoch": 1.2142731277533039, + "grad_norm": 1.9340204732587762, + "learning_rate": 7.323748693087551e-06, + "loss": 0.6041159629821777, + "step": 3446 + }, + { + "epoch": 1.214625550660793, + "grad_norm": 1.991943883280592, + "learning_rate": 7.318135280569674e-06, + "loss": 0.7143498659133911, + "step": 3447 + }, + { + "epoch": 1.214977973568282, + "grad_norm": 1.910490525820005, + "learning_rate": 7.312522778461409e-06, + "loss": 0.5821564197540283, + "step": 3448 + }, + { + "epoch": 1.215330396475771, + "grad_norm": 1.9609409525419488, + "learning_rate": 7.3069111886680166e-06, + "loss": 0.5786745548248291, + "step": 3449 + }, + { + "epoch": 1.2156828193832598, + "grad_norm": 1.7004659993753848, + "learning_rate": 7.3013005130944666e-06, + "loss": 0.6740534901618958, + "step": 3450 + }, + { + "epoch": 1.216035242290749, + "grad_norm": 1.9264837774532027, + "learning_rate": 7.2956907536454045e-06, + "loss": 0.6353983879089355, + "step": 3451 + }, + { + "epoch": 1.216387665198238, + "grad_norm": 1.6467978200520468, + "learning_rate": 7.290081912225172e-06, + "loss": 0.6890027523040771, + "step": 3452 + }, + { + "epoch": 1.2167400881057269, + "grad_norm": 2.194089687314607, + "learning_rate": 7.284473990737795e-06, + "loss": 0.6485118269920349, + "step": 3453 + }, + { + "epoch": 1.2170925110132158, + "grad_norm": 1.8020323615419078, + "learning_rate": 7.2788669910869845e-06, + "loss": 0.5364162921905518, + "step": 3454 + }, + { + "epoch": 1.2174449339207047, + "grad_norm": 1.8770204171846867, + "learning_rate": 7.27326091517615e-06, + "loss": 0.6625754833221436, + "step": 3455 + }, + { + "epoch": 1.217797356828194, + "grad_norm": 1.9138778572255513, + "learning_rate": 7.267655764908374e-06, + "loss": 0.7090050578117371, + "step": 3456 + }, + { + "epoch": 1.2181497797356828, + "grad_norm": 1.7151154871040917, + "learning_rate": 7.26205154218643e-06, + "loss": 0.6556301116943359, + "step": 3457 + }, + { + "epoch": 1.2185022026431718, + "grad_norm": 2.12213118759585, + "learning_rate": 7.2564482489127815e-06, + "loss": 0.7998625636100769, + "step": 3458 + }, + { + "epoch": 1.2188546255506607, + "grad_norm": 1.8721449700246833, + "learning_rate": 7.250845886989568e-06, + "loss": 0.6336952447891235, + "step": 3459 + }, + { + "epoch": 1.2192070484581499, + "grad_norm": 1.7786932342182031, + "learning_rate": 7.245244458318621e-06, + "loss": 0.5072300434112549, + "step": 3460 + }, + { + "epoch": 1.2195594713656388, + "grad_norm": 1.9350920817100896, + "learning_rate": 7.23964396480145e-06, + "loss": 0.6297830939292908, + "step": 3461 + }, + { + "epoch": 1.2199118942731277, + "grad_norm": 1.7384183002767206, + "learning_rate": 7.234044408339243e-06, + "loss": 0.5560386180877686, + "step": 3462 + }, + { + "epoch": 1.2202643171806167, + "grad_norm": 1.7834281461054429, + "learning_rate": 7.228445790832885e-06, + "loss": 0.5180274844169617, + "step": 3463 + }, + { + "epoch": 1.2206167400881056, + "grad_norm": 1.5903839847735544, + "learning_rate": 7.222848114182926e-06, + "loss": 0.4870688319206238, + "step": 3464 + }, + { + "epoch": 1.2209691629955948, + "grad_norm": 1.5913924611315027, + "learning_rate": 7.217251380289602e-06, + "loss": 0.46914681792259216, + "step": 3465 + }, + { + "epoch": 1.2213215859030837, + "grad_norm": 1.6510218664086935, + "learning_rate": 7.211655591052833e-06, + "loss": 0.5980997085571289, + "step": 3466 + }, + { + "epoch": 1.2216740088105726, + "grad_norm": 2.0761228855668468, + "learning_rate": 7.206060748372212e-06, + "loss": 0.5982732772827148, + "step": 3467 + }, + { + "epoch": 1.2220264317180616, + "grad_norm": 1.5384750193393883, + "learning_rate": 7.200466854147019e-06, + "loss": 0.612629771232605, + "step": 3468 + }, + { + "epoch": 1.2223788546255507, + "grad_norm": 1.6776022561511, + "learning_rate": 7.194873910276205e-06, + "loss": 0.606558084487915, + "step": 3469 + }, + { + "epoch": 1.2227312775330397, + "grad_norm": 2.093853594654106, + "learning_rate": 7.189281918658396e-06, + "loss": 0.7133803367614746, + "step": 3470 + }, + { + "epoch": 1.2230837004405286, + "grad_norm": 1.737492396211302, + "learning_rate": 7.183690881191908e-06, + "loss": 0.5640908479690552, + "step": 3471 + }, + { + "epoch": 1.2234361233480175, + "grad_norm": 1.9131350962270206, + "learning_rate": 7.178100799774717e-06, + "loss": 0.6376210451126099, + "step": 3472 + }, + { + "epoch": 1.2237885462555067, + "grad_norm": 1.7418892302924867, + "learning_rate": 7.172511676304481e-06, + "loss": 0.6207184791564941, + "step": 3473 + }, + { + "epoch": 1.2241409691629956, + "grad_norm": 2.0136397077316133, + "learning_rate": 7.166923512678538e-06, + "loss": 0.47848421335220337, + "step": 3474 + }, + { + "epoch": 1.2244933920704846, + "grad_norm": 1.89946756738985, + "learning_rate": 7.161336310793894e-06, + "loss": 0.6052829027175903, + "step": 3475 + }, + { + "epoch": 1.2248458149779735, + "grad_norm": 1.968672987503914, + "learning_rate": 7.155750072547229e-06, + "loss": 0.6050940155982971, + "step": 3476 + }, + { + "epoch": 1.2251982378854627, + "grad_norm": 2.566995671782078, + "learning_rate": 7.150164799834902e-06, + "loss": 0.6121659278869629, + "step": 3477 + }, + { + "epoch": 1.2255506607929516, + "grad_norm": 1.9679344001124786, + "learning_rate": 7.144580494552929e-06, + "loss": 0.6886739730834961, + "step": 3478 + }, + { + "epoch": 1.2259030837004405, + "grad_norm": 1.5760234299307694, + "learning_rate": 7.13899715859702e-06, + "loss": 0.5001103281974792, + "step": 3479 + }, + { + "epoch": 1.2262555066079295, + "grad_norm": 2.1260048612910216, + "learning_rate": 7.133414793862532e-06, + "loss": 0.5948734283447266, + "step": 3480 + }, + { + "epoch": 1.2266079295154184, + "grad_norm": 2.593831579740968, + "learning_rate": 7.127833402244515e-06, + "loss": 0.6179298162460327, + "step": 3481 + }, + { + "epoch": 1.2269603524229076, + "grad_norm": 1.6926296837265904, + "learning_rate": 7.122252985637672e-06, + "loss": 0.5543676614761353, + "step": 3482 + }, + { + "epoch": 1.2273127753303965, + "grad_norm": 1.6008632106545562, + "learning_rate": 7.116673545936379e-06, + "loss": 0.6279658079147339, + "step": 3483 + }, + { + "epoch": 1.2276651982378854, + "grad_norm": 1.5383086530060461, + "learning_rate": 7.111095085034687e-06, + "loss": 0.6692230701446533, + "step": 3484 + }, + { + "epoch": 1.2280176211453744, + "grad_norm": 1.7218507243355061, + "learning_rate": 7.1055176048263085e-06, + "loss": 0.6124502420425415, + "step": 3485 + }, + { + "epoch": 1.2283700440528635, + "grad_norm": 2.0325469007846007, + "learning_rate": 7.09994110720462e-06, + "loss": 0.6241810321807861, + "step": 3486 + }, + { + "epoch": 1.2287224669603525, + "grad_norm": 1.7620353767255947, + "learning_rate": 7.094365594062675e-06, + "loss": 0.6556589603424072, + "step": 3487 + }, + { + "epoch": 1.2290748898678414, + "grad_norm": 1.660185756567605, + "learning_rate": 7.0887910672931815e-06, + "loss": 0.480433851480484, + "step": 3488 + }, + { + "epoch": 1.2294273127753303, + "grad_norm": 1.7666817554476708, + "learning_rate": 7.083217528788524e-06, + "loss": 0.6198803782463074, + "step": 3489 + }, + { + "epoch": 1.2297797356828193, + "grad_norm": 1.7945939958355666, + "learning_rate": 7.077644980440741e-06, + "loss": 0.6368751525878906, + "step": 3490 + }, + { + "epoch": 1.2301321585903084, + "grad_norm": 1.904999974616483, + "learning_rate": 7.072073424141538e-06, + "loss": 0.5992522239685059, + "step": 3491 + }, + { + "epoch": 1.2304845814977974, + "grad_norm": 1.6441410368294835, + "learning_rate": 7.066502861782289e-06, + "loss": 0.5917885303497314, + "step": 3492 + }, + { + "epoch": 1.2308370044052863, + "grad_norm": 1.9090985571817867, + "learning_rate": 7.060933295254027e-06, + "loss": 0.615925669670105, + "step": 3493 + }, + { + "epoch": 1.2311894273127753, + "grad_norm": 1.5510149338562214, + "learning_rate": 7.055364726447437e-06, + "loss": 0.4408820867538452, + "step": 3494 + }, + { + "epoch": 1.2315418502202644, + "grad_norm": 1.706805010144051, + "learning_rate": 7.049797157252889e-06, + "loss": 0.4918386936187744, + "step": 3495 + }, + { + "epoch": 1.2318942731277533, + "grad_norm": 2.0047166519470965, + "learning_rate": 7.0442305895603844e-06, + "loss": 0.6964970827102661, + "step": 3496 + }, + { + "epoch": 1.2322466960352423, + "grad_norm": 1.993882373770559, + "learning_rate": 7.038665025259615e-06, + "loss": 0.5269606113433838, + "step": 3497 + }, + { + "epoch": 1.2325991189427312, + "grad_norm": 1.7338430673292662, + "learning_rate": 7.033100466239908e-06, + "loss": 0.6146842241287231, + "step": 3498 + }, + { + "epoch": 1.2329515418502202, + "grad_norm": 1.8958783101408965, + "learning_rate": 7.027536914390257e-06, + "loss": 0.7163739800453186, + "step": 3499 + }, + { + "epoch": 1.2333039647577093, + "grad_norm": 1.5575657818438158, + "learning_rate": 7.021974371599318e-06, + "loss": 0.5851477980613708, + "step": 3500 + }, + { + "epoch": 1.2336563876651983, + "grad_norm": 1.3831914970718109, + "learning_rate": 7.0164128397554e-06, + "loss": 0.585768461227417, + "step": 3501 + }, + { + "epoch": 1.2340088105726872, + "grad_norm": 1.651121323438745, + "learning_rate": 7.0108523207464706e-06, + "loss": 0.5467718839645386, + "step": 3502 + }, + { + "epoch": 1.2343612334801761, + "grad_norm": 1.8179588757324485, + "learning_rate": 7.0052928164601564e-06, + "loss": 0.638299822807312, + "step": 3503 + }, + { + "epoch": 1.2347136563876653, + "grad_norm": 1.8158584952636452, + "learning_rate": 6.9997343287837275e-06, + "loss": 0.6737650036811829, + "step": 3504 + }, + { + "epoch": 1.2350660792951542, + "grad_norm": 1.7619528960945736, + "learning_rate": 6.9941768596041224e-06, + "loss": 0.6659837961196899, + "step": 3505 + }, + { + "epoch": 1.2354185022026432, + "grad_norm": 1.9059656133131788, + "learning_rate": 6.988620410807932e-06, + "loss": 0.6731020212173462, + "step": 3506 + }, + { + "epoch": 1.235770925110132, + "grad_norm": 1.8111638058637756, + "learning_rate": 6.983064984281389e-06, + "loss": 0.6236598491668701, + "step": 3507 + }, + { + "epoch": 1.236123348017621, + "grad_norm": 1.8485171900570894, + "learning_rate": 6.9775105819103985e-06, + "loss": 0.6233193874359131, + "step": 3508 + }, + { + "epoch": 1.2364757709251102, + "grad_norm": 1.7456936175280036, + "learning_rate": 6.971957205580497e-06, + "loss": 0.5914918184280396, + "step": 3509 + }, + { + "epoch": 1.2368281938325991, + "grad_norm": 2.069060854376664, + "learning_rate": 6.966404857176893e-06, + "loss": 0.6576484441757202, + "step": 3510 + }, + { + "epoch": 1.237180616740088, + "grad_norm": 1.6371442891988068, + "learning_rate": 6.960853538584431e-06, + "loss": 0.5609208941459656, + "step": 3511 + }, + { + "epoch": 1.2375330396475772, + "grad_norm": 1.8336206343046235, + "learning_rate": 6.955303251687609e-06, + "loss": 0.6405455470085144, + "step": 3512 + }, + { + "epoch": 1.2378854625550662, + "grad_norm": 1.6981959386126726, + "learning_rate": 6.949753998370579e-06, + "loss": 0.5621844530105591, + "step": 3513 + }, + { + "epoch": 1.238237885462555, + "grad_norm": 1.6040361718583698, + "learning_rate": 6.944205780517138e-06, + "loss": 0.5674207210540771, + "step": 3514 + }, + { + "epoch": 1.238590308370044, + "grad_norm": 1.8089615708578142, + "learning_rate": 6.938658600010734e-06, + "loss": 0.6744752526283264, + "step": 3515 + }, + { + "epoch": 1.238942731277533, + "grad_norm": 1.851260674535246, + "learning_rate": 6.9331124587344655e-06, + "loss": 0.537495493888855, + "step": 3516 + }, + { + "epoch": 1.2392951541850221, + "grad_norm": 1.7599394880527937, + "learning_rate": 6.92756735857107e-06, + "loss": 0.8405104875564575, + "step": 3517 + }, + { + "epoch": 1.239647577092511, + "grad_norm": 1.7838209985249966, + "learning_rate": 6.92202330140294e-06, + "loss": 0.6751723885536194, + "step": 3518 + }, + { + "epoch": 1.24, + "grad_norm": 1.8012761946666955, + "learning_rate": 6.9164802891121105e-06, + "loss": 0.5763178467750549, + "step": 3519 + }, + { + "epoch": 1.240352422907489, + "grad_norm": 1.7859481797599979, + "learning_rate": 6.910938323580256e-06, + "loss": 0.7713793516159058, + "step": 3520 + }, + { + "epoch": 1.240704845814978, + "grad_norm": 2.0598557028652356, + "learning_rate": 6.90539740668871e-06, + "loss": 0.6354435682296753, + "step": 3521 + }, + { + "epoch": 1.241057268722467, + "grad_norm": 1.6780280463346202, + "learning_rate": 6.899857540318434e-06, + "loss": 0.5121721625328064, + "step": 3522 + }, + { + "epoch": 1.241409691629956, + "grad_norm": 1.8470903920827393, + "learning_rate": 6.894318726350042e-06, + "loss": 0.586428165435791, + "step": 3523 + }, + { + "epoch": 1.241762114537445, + "grad_norm": 1.690234288859414, + "learning_rate": 6.888780966663792e-06, + "loss": 0.4868311285972595, + "step": 3524 + }, + { + "epoch": 1.2421145374449338, + "grad_norm": 1.7688170320163026, + "learning_rate": 6.883244263139578e-06, + "loss": 0.7057775259017944, + "step": 3525 + }, + { + "epoch": 1.242466960352423, + "grad_norm": 1.630207980484645, + "learning_rate": 6.877708617656942e-06, + "loss": 0.4993360638618469, + "step": 3526 + }, + { + "epoch": 1.242819383259912, + "grad_norm": 1.7093781024880734, + "learning_rate": 6.872174032095061e-06, + "loss": 0.6096793413162231, + "step": 3527 + }, + { + "epoch": 1.2431718061674009, + "grad_norm": 1.7005141830755592, + "learning_rate": 6.866640508332751e-06, + "loss": 0.584385871887207, + "step": 3528 + }, + { + "epoch": 1.2435242290748898, + "grad_norm": 1.6033098221924098, + "learning_rate": 6.861108048248477e-06, + "loss": 0.5857449173927307, + "step": 3529 + }, + { + "epoch": 1.243876651982379, + "grad_norm": 1.6447411339873705, + "learning_rate": 6.855576653720333e-06, + "loss": 0.4337875247001648, + "step": 3530 + }, + { + "epoch": 1.244229074889868, + "grad_norm": 1.924557656954366, + "learning_rate": 6.850046326626058e-06, + "loss": 0.6949163675308228, + "step": 3531 + }, + { + "epoch": 1.2445814977973568, + "grad_norm": 2.029468434582643, + "learning_rate": 6.844517068843025e-06, + "loss": 0.5876098871231079, + "step": 3532 + }, + { + "epoch": 1.2449339207048458, + "grad_norm": 2.0143379278356153, + "learning_rate": 6.838988882248243e-06, + "loss": 0.5460488796234131, + "step": 3533 + }, + { + "epoch": 1.2452863436123347, + "grad_norm": 2.284896657447092, + "learning_rate": 6.833461768718365e-06, + "loss": 0.6500875949859619, + "step": 3534 + }, + { + "epoch": 1.2456387665198239, + "grad_norm": 1.9702281980181484, + "learning_rate": 6.82793573012967e-06, + "loss": 0.6504626274108887, + "step": 3535 + }, + { + "epoch": 1.2459911894273128, + "grad_norm": 1.8635901517060365, + "learning_rate": 6.822410768358072e-06, + "loss": 0.6881722211837769, + "step": 3536 + }, + { + "epoch": 1.2463436123348017, + "grad_norm": 1.7111090644899583, + "learning_rate": 6.816886885279132e-06, + "loss": 0.6747599840164185, + "step": 3537 + }, + { + "epoch": 1.2466960352422907, + "grad_norm": 2.61809094535544, + "learning_rate": 6.811364082768028e-06, + "loss": 0.5987570285797119, + "step": 3538 + }, + { + "epoch": 1.2470484581497798, + "grad_norm": 1.8641726073707956, + "learning_rate": 6.8058423626995885e-06, + "loss": 0.6614603996276855, + "step": 3539 + }, + { + "epoch": 1.2474008810572688, + "grad_norm": 1.5529990518062367, + "learning_rate": 6.80032172694826e-06, + "loss": 0.542367696762085, + "step": 3540 + }, + { + "epoch": 1.2477533039647577, + "grad_norm": 1.7771584963866378, + "learning_rate": 6.7948021773881235e-06, + "loss": 0.6200593709945679, + "step": 3541 + }, + { + "epoch": 1.2481057268722466, + "grad_norm": 1.896811225090905, + "learning_rate": 6.789283715892905e-06, + "loss": 0.6425306797027588, + "step": 3542 + }, + { + "epoch": 1.2484581497797356, + "grad_norm": 1.4798584901842344, + "learning_rate": 6.78376634433594e-06, + "loss": 0.5277592539787292, + "step": 3543 + }, + { + "epoch": 1.2488105726872247, + "grad_norm": 1.8357663435279958, + "learning_rate": 6.778250064590206e-06, + "loss": 0.6120523810386658, + "step": 3544 + }, + { + "epoch": 1.2491629955947137, + "grad_norm": 2.0042129559914654, + "learning_rate": 6.772734878528313e-06, + "loss": 0.538428544998169, + "step": 3545 + }, + { + "epoch": 1.2495154185022026, + "grad_norm": 1.7456851140249008, + "learning_rate": 6.76722078802249e-06, + "loss": 0.6439732909202576, + "step": 3546 + }, + { + "epoch": 1.2498678414096915, + "grad_norm": 1.5580174742798336, + "learning_rate": 6.761707794944605e-06, + "loss": 0.5951697826385498, + "step": 3547 + }, + { + "epoch": 1.2502202643171807, + "grad_norm": 1.5461650468928614, + "learning_rate": 6.7561959011661456e-06, + "loss": 0.5548606514930725, + "step": 3548 + }, + { + "epoch": 1.2505726872246696, + "grad_norm": 1.936721806656616, + "learning_rate": 6.750685108558221e-06, + "loss": 0.4768974781036377, + "step": 3549 + }, + { + "epoch": 1.2509251101321586, + "grad_norm": 1.6130866640641843, + "learning_rate": 6.745175418991585e-06, + "loss": 0.6629552245140076, + "step": 3550 + }, + { + "epoch": 1.2512775330396475, + "grad_norm": 1.8826604922139925, + "learning_rate": 6.739666834336599e-06, + "loss": 0.6550329923629761, + "step": 3551 + }, + { + "epoch": 1.2516299559471364, + "grad_norm": 1.7091222991512534, + "learning_rate": 6.734159356463254e-06, + "loss": 0.37340015172958374, + "step": 3552 + }, + { + "epoch": 1.2519823788546256, + "grad_norm": 2.0454082069330424, + "learning_rate": 6.728652987241175e-06, + "loss": 0.6343201398849487, + "step": 3553 + }, + { + "epoch": 1.2523348017621145, + "grad_norm": 1.8938201811077042, + "learning_rate": 6.723147728539596e-06, + "loss": 0.7555221319198608, + "step": 3554 + }, + { + "epoch": 1.2526872246696035, + "grad_norm": 1.7356069524639768, + "learning_rate": 6.717643582227384e-06, + "loss": 0.5944523215293884, + "step": 3555 + }, + { + "epoch": 1.2530396475770926, + "grad_norm": 1.627279375354834, + "learning_rate": 6.71214055017303e-06, + "loss": 0.5686212778091431, + "step": 3556 + }, + { + "epoch": 1.2533920704845816, + "grad_norm": 1.697482530075543, + "learning_rate": 6.706638634244629e-06, + "loss": 0.6401857137680054, + "step": 3557 + }, + { + "epoch": 1.2537444933920705, + "grad_norm": 1.5933991655989903, + "learning_rate": 6.701137836309926e-06, + "loss": 0.4571516513824463, + "step": 3558 + }, + { + "epoch": 1.2540969162995594, + "grad_norm": 1.7606001647916119, + "learning_rate": 6.695638158236255e-06, + "loss": 0.5857570171356201, + "step": 3559 + }, + { + "epoch": 1.2544493392070484, + "grad_norm": 1.7187772621235449, + "learning_rate": 6.690139601890601e-06, + "loss": 0.6981472969055176, + "step": 3560 + }, + { + "epoch": 1.2548017621145373, + "grad_norm": 1.685629147285753, + "learning_rate": 6.684642169139544e-06, + "loss": 0.5120254755020142, + "step": 3561 + }, + { + "epoch": 1.2551541850220265, + "grad_norm": 2.043587366608814, + "learning_rate": 6.67914586184929e-06, + "loss": 0.6975923776626587, + "step": 3562 + }, + { + "epoch": 1.2555066079295154, + "grad_norm": 2.1694224742588233, + "learning_rate": 6.673650681885668e-06, + "loss": 0.5825072526931763, + "step": 3563 + }, + { + "epoch": 1.2558590308370043, + "grad_norm": 1.9388578444875513, + "learning_rate": 6.668156631114124e-06, + "loss": 0.5701749324798584, + "step": 3564 + }, + { + "epoch": 1.2562114537444935, + "grad_norm": 1.6715281124187895, + "learning_rate": 6.662663711399705e-06, + "loss": 0.5230482220649719, + "step": 3565 + }, + { + "epoch": 1.2565638766519824, + "grad_norm": 1.7540798103539514, + "learning_rate": 6.657171924607102e-06, + "loss": 0.6680361032485962, + "step": 3566 + }, + { + "epoch": 1.2569162995594714, + "grad_norm": 1.7792330481880054, + "learning_rate": 6.651681272600592e-06, + "loss": 0.6745159029960632, + "step": 3567 + }, + { + "epoch": 1.2572687224669603, + "grad_norm": 1.5777367956881352, + "learning_rate": 6.646191757244089e-06, + "loss": 0.587162971496582, + "step": 3568 + }, + { + "epoch": 1.2576211453744492, + "grad_norm": 2.0091715660610183, + "learning_rate": 6.640703380401111e-06, + "loss": 0.6170785427093506, + "step": 3569 + }, + { + "epoch": 1.2579735682819384, + "grad_norm": 1.8496931248102404, + "learning_rate": 6.6352161439347875e-06, + "loss": 0.4955494999885559, + "step": 3570 + }, + { + "epoch": 1.2583259911894273, + "grad_norm": 1.8039519732213443, + "learning_rate": 6.62973004970787e-06, + "loss": 0.7183424234390259, + "step": 3571 + }, + { + "epoch": 1.2586784140969163, + "grad_norm": 1.6920151696252388, + "learning_rate": 6.624245099582713e-06, + "loss": 0.6266030669212341, + "step": 3572 + }, + { + "epoch": 1.2590308370044052, + "grad_norm": 1.8260182971737482, + "learning_rate": 6.6187612954212845e-06, + "loss": 0.5234469175338745, + "step": 3573 + }, + { + "epoch": 1.2593832599118944, + "grad_norm": 2.0762206956902234, + "learning_rate": 6.6132786390851725e-06, + "loss": 0.7066231966018677, + "step": 3574 + }, + { + "epoch": 1.2597356828193833, + "grad_norm": 1.8486791061565373, + "learning_rate": 6.60779713243556e-06, + "loss": 0.622086226940155, + "step": 3575 + }, + { + "epoch": 1.2600881057268722, + "grad_norm": 2.003110770323092, + "learning_rate": 6.6023167773332554e-06, + "loss": 0.6607370376586914, + "step": 3576 + }, + { + "epoch": 1.2604405286343612, + "grad_norm": 1.9512971078148649, + "learning_rate": 6.596837575638663e-06, + "loss": 0.6846165657043457, + "step": 3577 + }, + { + "epoch": 1.2607929515418501, + "grad_norm": 2.1137757907106574, + "learning_rate": 6.5913595292118024e-06, + "loss": 0.6329103708267212, + "step": 3578 + }, + { + "epoch": 1.2611453744493393, + "grad_norm": 1.7067433363159659, + "learning_rate": 6.585882639912302e-06, + "loss": 0.7942261695861816, + "step": 3579 + }, + { + "epoch": 1.2614977973568282, + "grad_norm": 1.923592126322299, + "learning_rate": 6.580406909599393e-06, + "loss": 0.5446548461914062, + "step": 3580 + }, + { + "epoch": 1.2618502202643171, + "grad_norm": 2.584270827853736, + "learning_rate": 6.574932340131917e-06, + "loss": 0.581193208694458, + "step": 3581 + }, + { + "epoch": 1.2622026431718063, + "grad_norm": 1.789761494779322, + "learning_rate": 6.569458933368323e-06, + "loss": 0.6099729537963867, + "step": 3582 + }, + { + "epoch": 1.2625550660792952, + "grad_norm": 1.7689292642576144, + "learning_rate": 6.563986691166655e-06, + "loss": 0.45215970277786255, + "step": 3583 + }, + { + "epoch": 1.2629074889867842, + "grad_norm": 1.9037008934232844, + "learning_rate": 6.558515615384573e-06, + "loss": 0.6674731969833374, + "step": 3584 + }, + { + "epoch": 1.2632599118942731, + "grad_norm": 1.4782940862298068, + "learning_rate": 6.553045707879338e-06, + "loss": 0.4951098561286926, + "step": 3585 + }, + { + "epoch": 1.263612334801762, + "grad_norm": 1.7852149202748289, + "learning_rate": 6.54757697050781e-06, + "loss": 0.5853816270828247, + "step": 3586 + }, + { + "epoch": 1.263964757709251, + "grad_norm": 1.5907197274079232, + "learning_rate": 6.5421094051264575e-06, + "loss": 0.5236951112747192, + "step": 3587 + }, + { + "epoch": 1.2643171806167401, + "grad_norm": 1.733068587169355, + "learning_rate": 6.536643013591347e-06, + "loss": 0.5717612504959106, + "step": 3588 + }, + { + "epoch": 1.264669603524229, + "grad_norm": 2.033496211612474, + "learning_rate": 6.531177797758155e-06, + "loss": 0.6144098043441772, + "step": 3589 + }, + { + "epoch": 1.265022026431718, + "grad_norm": 1.6355266077439052, + "learning_rate": 6.525713759482144e-06, + "loss": 0.5634705424308777, + "step": 3590 + }, + { + "epoch": 1.2653744493392072, + "grad_norm": 1.7147225194337798, + "learning_rate": 6.520250900618186e-06, + "loss": 0.582956075668335, + "step": 3591 + }, + { + "epoch": 1.265726872246696, + "grad_norm": 1.843768096592032, + "learning_rate": 6.514789223020754e-06, + "loss": 0.7649297714233398, + "step": 3592 + }, + { + "epoch": 1.266079295154185, + "grad_norm": 1.6261733555902604, + "learning_rate": 6.509328728543918e-06, + "loss": 0.6035098433494568, + "step": 3593 + }, + { + "epoch": 1.266431718061674, + "grad_norm": 1.8493319579504743, + "learning_rate": 6.503869419041344e-06, + "loss": 0.6405705809593201, + "step": 3594 + }, + { + "epoch": 1.266784140969163, + "grad_norm": 2.26304309310324, + "learning_rate": 6.498411296366299e-06, + "loss": 0.674353301525116, + "step": 3595 + }, + { + "epoch": 1.2671365638766519, + "grad_norm": 1.7621656180677492, + "learning_rate": 6.492954362371644e-06, + "loss": 0.6018465757369995, + "step": 3596 + }, + { + "epoch": 1.267488986784141, + "grad_norm": 2.127137234030612, + "learning_rate": 6.487498618909845e-06, + "loss": 0.6491270065307617, + "step": 3597 + }, + { + "epoch": 1.26784140969163, + "grad_norm": 1.6636292273445474, + "learning_rate": 6.4820440678329474e-06, + "loss": 0.5126988887786865, + "step": 3598 + }, + { + "epoch": 1.2681938325991189, + "grad_norm": 1.7884980833676332, + "learning_rate": 6.476590710992605e-06, + "loss": 0.5931694507598877, + "step": 3599 + }, + { + "epoch": 1.268546255506608, + "grad_norm": 1.9386898901162777, + "learning_rate": 6.471138550240066e-06, + "loss": 0.5455423593521118, + "step": 3600 + }, + { + "epoch": 1.268898678414097, + "grad_norm": 1.6361281925349132, + "learning_rate": 6.465687587426166e-06, + "loss": 0.4870053231716156, + "step": 3601 + }, + { + "epoch": 1.269251101321586, + "grad_norm": 1.9069149245463006, + "learning_rate": 6.460237824401337e-06, + "loss": 0.6434903144836426, + "step": 3602 + }, + { + "epoch": 1.2696035242290749, + "grad_norm": 1.676899060774639, + "learning_rate": 6.454789263015609e-06, + "loss": 0.6256476640701294, + "step": 3603 + }, + { + "epoch": 1.2699559471365638, + "grad_norm": 1.8004511475353204, + "learning_rate": 6.449341905118589e-06, + "loss": 0.6304135322570801, + "step": 3604 + }, + { + "epoch": 1.2703083700440527, + "grad_norm": 1.9009929525157667, + "learning_rate": 6.443895752559498e-06, + "loss": 0.5315194725990295, + "step": 3605 + }, + { + "epoch": 1.2706607929515419, + "grad_norm": 1.4321615697348329, + "learning_rate": 6.438450807187127e-06, + "loss": 0.5232852697372437, + "step": 3606 + }, + { + "epoch": 1.2710132158590308, + "grad_norm": 1.6584356511216338, + "learning_rate": 6.433007070849863e-06, + "loss": 0.4462543725967407, + "step": 3607 + }, + { + "epoch": 1.2713656387665198, + "grad_norm": 1.6730765460300174, + "learning_rate": 6.4275645453956945e-06, + "loss": 0.6347709894180298, + "step": 3608 + }, + { + "epoch": 1.271718061674009, + "grad_norm": 1.625329738549371, + "learning_rate": 6.422123232672182e-06, + "loss": 0.5277259349822998, + "step": 3609 + }, + { + "epoch": 1.2720704845814979, + "grad_norm": 1.7954090025098361, + "learning_rate": 6.416683134526486e-06, + "loss": 0.6297650933265686, + "step": 3610 + }, + { + "epoch": 1.2724229074889868, + "grad_norm": 1.7743916636003476, + "learning_rate": 6.411244252805351e-06, + "loss": 0.503609836101532, + "step": 3611 + }, + { + "epoch": 1.2727753303964757, + "grad_norm": 1.7300375262211753, + "learning_rate": 6.405806589355099e-06, + "loss": 0.6026735305786133, + "step": 3612 + }, + { + "epoch": 1.2731277533039647, + "grad_norm": 1.543883502597784, + "learning_rate": 6.400370146021662e-06, + "loss": 0.4918368458747864, + "step": 3613 + }, + { + "epoch": 1.2734801762114538, + "grad_norm": 2.125830682883153, + "learning_rate": 6.394934924650532e-06, + "loss": 0.6215550899505615, + "step": 3614 + }, + { + "epoch": 1.2738325991189428, + "grad_norm": 2.1843858701221563, + "learning_rate": 6.389500927086801e-06, + "loss": 0.6979820728302002, + "step": 3615 + }, + { + "epoch": 1.2741850220264317, + "grad_norm": 1.9168565956279218, + "learning_rate": 6.384068155175143e-06, + "loss": 0.5661836266517639, + "step": 3616 + }, + { + "epoch": 1.2745374449339206, + "grad_norm": 2.2497484972303896, + "learning_rate": 6.378636610759812e-06, + "loss": 0.699792742729187, + "step": 3617 + }, + { + "epoch": 1.2748898678414098, + "grad_norm": 2.1298001613626765, + "learning_rate": 6.373206295684653e-06, + "loss": 0.6418631076812744, + "step": 3618 + }, + { + "epoch": 1.2752422907488987, + "grad_norm": 1.639324838954067, + "learning_rate": 6.3677772117930895e-06, + "loss": 0.4975489675998688, + "step": 3619 + }, + { + "epoch": 1.2755947136563877, + "grad_norm": 1.6787243090627195, + "learning_rate": 6.362349360928117e-06, + "loss": 0.5621567964553833, + "step": 3620 + }, + { + "epoch": 1.2759471365638766, + "grad_norm": 1.9441609125211634, + "learning_rate": 6.356922744932335e-06, + "loss": 0.538573682308197, + "step": 3621 + }, + { + "epoch": 1.2762995594713655, + "grad_norm": 1.8099521315485383, + "learning_rate": 6.351497365647903e-06, + "loss": 0.5726763010025024, + "step": 3622 + }, + { + "epoch": 1.2766519823788547, + "grad_norm": 1.509968688666824, + "learning_rate": 6.346073224916565e-06, + "loss": 0.5911343097686768, + "step": 3623 + }, + { + "epoch": 1.2770044052863436, + "grad_norm": 1.8960352229890238, + "learning_rate": 6.340650324579658e-06, + "loss": 0.6181383728981018, + "step": 3624 + }, + { + "epoch": 1.2773568281938326, + "grad_norm": 1.8065087463718459, + "learning_rate": 6.3352286664780785e-06, + "loss": 0.5941140651702881, + "step": 3625 + }, + { + "epoch": 1.2777092511013217, + "grad_norm": 1.980034412220703, + "learning_rate": 6.329808252452316e-06, + "loss": 0.7604472637176514, + "step": 3626 + }, + { + "epoch": 1.2780616740088107, + "grad_norm": 1.7265138262893938, + "learning_rate": 6.324389084342435e-06, + "loss": 0.6063867211341858, + "step": 3627 + }, + { + "epoch": 1.2784140969162996, + "grad_norm": 1.8844241099487, + "learning_rate": 6.3189711639880644e-06, + "loss": 0.7202302813529968, + "step": 3628 + }, + { + "epoch": 1.2787665198237885, + "grad_norm": 1.7295127580755116, + "learning_rate": 6.313554493228431e-06, + "loss": 0.5934856534004211, + "step": 3629 + }, + { + "epoch": 1.2791189427312775, + "grad_norm": 1.7905829637835577, + "learning_rate": 6.3081390739023175e-06, + "loss": 0.6403088569641113, + "step": 3630 + }, + { + "epoch": 1.2794713656387664, + "grad_norm": 1.9400757232043577, + "learning_rate": 6.302724907848096e-06, + "loss": 0.6679831743240356, + "step": 3631 + }, + { + "epoch": 1.2798237885462556, + "grad_norm": 1.9107919043768602, + "learning_rate": 6.297311996903703e-06, + "loss": 0.6914902329444885, + "step": 3632 + }, + { + "epoch": 1.2801762114537445, + "grad_norm": 1.4865016000129294, + "learning_rate": 6.2919003429066535e-06, + "loss": 0.5391600131988525, + "step": 3633 + }, + { + "epoch": 1.2805286343612334, + "grad_norm": 1.7774288854868727, + "learning_rate": 6.286489947694041e-06, + "loss": 0.5740962028503418, + "step": 3634 + }, + { + "epoch": 1.2808810572687226, + "grad_norm": 1.9144175178404335, + "learning_rate": 6.281080813102523e-06, + "loss": 0.6497045159339905, + "step": 3635 + }, + { + "epoch": 1.2812334801762115, + "grad_norm": 1.6649274023798961, + "learning_rate": 6.275672940968326e-06, + "loss": 0.5481048226356506, + "step": 3636 + }, + { + "epoch": 1.2815859030837005, + "grad_norm": 1.6547388155087517, + "learning_rate": 6.270266333127266e-06, + "loss": 0.5412508249282837, + "step": 3637 + }, + { + "epoch": 1.2819383259911894, + "grad_norm": 1.8289845737684471, + "learning_rate": 6.264860991414709e-06, + "loss": 0.5055446624755859, + "step": 3638 + }, + { + "epoch": 1.2822907488986783, + "grad_norm": 1.9772143213144648, + "learning_rate": 6.259456917665605e-06, + "loss": 0.6073929071426392, + "step": 3639 + }, + { + "epoch": 1.2826431718061673, + "grad_norm": 1.6297327309789957, + "learning_rate": 6.254054113714467e-06, + "loss": 0.5277928113937378, + "step": 3640 + }, + { + "epoch": 1.2829955947136564, + "grad_norm": 1.7440990717646376, + "learning_rate": 6.248652581395378e-06, + "loss": 0.5106299519538879, + "step": 3641 + }, + { + "epoch": 1.2833480176211454, + "grad_norm": 1.612143250274434, + "learning_rate": 6.243252322541993e-06, + "loss": 0.485049843788147, + "step": 3642 + }, + { + "epoch": 1.2837004405286343, + "grad_norm": 2.0115453178937894, + "learning_rate": 6.237853338987532e-06, + "loss": 0.5899066925048828, + "step": 3643 + }, + { + "epoch": 1.2840528634361235, + "grad_norm": 1.6956228425038977, + "learning_rate": 6.2324556325647745e-06, + "loss": 0.5761981010437012, + "step": 3644 + }, + { + "epoch": 1.2844052863436124, + "grad_norm": 1.732932337254408, + "learning_rate": 6.227059205106085e-06, + "loss": 0.6288208961486816, + "step": 3645 + }, + { + "epoch": 1.2847577092511013, + "grad_norm": 1.7671756166643349, + "learning_rate": 6.2216640584433726e-06, + "loss": 0.6122645139694214, + "step": 3646 + }, + { + "epoch": 1.2851101321585903, + "grad_norm": 1.8312838317562172, + "learning_rate": 6.2162701944081295e-06, + "loss": 0.5838489532470703, + "step": 3647 + }, + { + "epoch": 1.2854625550660792, + "grad_norm": 1.5533740438356287, + "learning_rate": 6.2108776148314005e-06, + "loss": 0.6020689606666565, + "step": 3648 + }, + { + "epoch": 1.2858149779735684, + "grad_norm": 1.9453055966993607, + "learning_rate": 6.205486321543798e-06, + "loss": 0.5852698683738708, + "step": 3649 + }, + { + "epoch": 1.2861674008810573, + "grad_norm": 1.7649785944212673, + "learning_rate": 6.2000963163755015e-06, + "loss": 0.560903012752533, + "step": 3650 + }, + { + "epoch": 1.2865198237885462, + "grad_norm": 2.053972717306982, + "learning_rate": 6.194707601156249e-06, + "loss": 0.7750356197357178, + "step": 3651 + }, + { + "epoch": 1.2868722466960352, + "grad_norm": 1.7842589241914402, + "learning_rate": 6.189320177715338e-06, + "loss": 0.5503605604171753, + "step": 3652 + }, + { + "epoch": 1.2872246696035243, + "grad_norm": 1.8162609150425584, + "learning_rate": 6.183934047881636e-06, + "loss": 0.6910672187805176, + "step": 3653 + }, + { + "epoch": 1.2875770925110133, + "grad_norm": 1.6952370527492193, + "learning_rate": 6.1785492134835626e-06, + "loss": 0.7773069739341736, + "step": 3654 + }, + { + "epoch": 1.2879295154185022, + "grad_norm": 1.7765631560225321, + "learning_rate": 6.173165676349103e-06, + "loss": 0.6777454018592834, + "step": 3655 + }, + { + "epoch": 1.2882819383259911, + "grad_norm": 1.6097825614884171, + "learning_rate": 6.167783438305803e-06, + "loss": 0.6103118658065796, + "step": 3656 + }, + { + "epoch": 1.28863436123348, + "grad_norm": 2.4016366240266454, + "learning_rate": 6.1624025011807595e-06, + "loss": 0.593717634677887, + "step": 3657 + }, + { + "epoch": 1.2889867841409692, + "grad_norm": 1.700445284940488, + "learning_rate": 6.1570228668006395e-06, + "loss": 0.5822824835777283, + "step": 3658 + }, + { + "epoch": 1.2893392070484582, + "grad_norm": 1.7095957018221146, + "learning_rate": 6.151644536991656e-06, + "loss": 0.5180603861808777, + "step": 3659 + }, + { + "epoch": 1.289691629955947, + "grad_norm": 1.799926440179644, + "learning_rate": 6.14626751357959e-06, + "loss": 0.6283069849014282, + "step": 3660 + }, + { + "epoch": 1.290044052863436, + "grad_norm": 2.2706339647511613, + "learning_rate": 6.14089179838977e-06, + "loss": 0.7590633630752563, + "step": 3661 + }, + { + "epoch": 1.2903964757709252, + "grad_norm": 1.4238309589699358, + "learning_rate": 6.135517393247081e-06, + "loss": 0.6044079661369324, + "step": 3662 + }, + { + "epoch": 1.2907488986784141, + "grad_norm": 2.078820338247561, + "learning_rate": 6.130144299975973e-06, + "loss": 0.603421688079834, + "step": 3663 + }, + { + "epoch": 1.291101321585903, + "grad_norm": 1.9398452395479244, + "learning_rate": 6.1247725204004395e-06, + "loss": 0.577094554901123, + "step": 3664 + }, + { + "epoch": 1.291453744493392, + "grad_norm": 1.7780187513951604, + "learning_rate": 6.119402056344033e-06, + "loss": 0.5752004981040955, + "step": 3665 + }, + { + "epoch": 1.291806167400881, + "grad_norm": 1.6979532493457608, + "learning_rate": 6.114032909629863e-06, + "loss": 0.730962872505188, + "step": 3666 + }, + { + "epoch": 1.29215859030837, + "grad_norm": 2.0386068832784465, + "learning_rate": 6.108665082080578e-06, + "loss": 0.5361749529838562, + "step": 3667 + }, + { + "epoch": 1.292511013215859, + "grad_norm": 1.470729033877409, + "learning_rate": 6.103298575518401e-06, + "loss": 0.4841603636741638, + "step": 3668 + }, + { + "epoch": 1.292863436123348, + "grad_norm": 1.706501413292354, + "learning_rate": 6.097933391765087e-06, + "loss": 0.6614999771118164, + "step": 3669 + }, + { + "epoch": 1.2932158590308371, + "grad_norm": 1.6930402108862321, + "learning_rate": 6.092569532641947e-06, + "loss": 0.6088405847549438, + "step": 3670 + }, + { + "epoch": 1.293568281938326, + "grad_norm": 1.9173247230823398, + "learning_rate": 6.087206999969848e-06, + "loss": 0.601859986782074, + "step": 3671 + }, + { + "epoch": 1.293920704845815, + "grad_norm": 1.8019332247534052, + "learning_rate": 6.081845795569204e-06, + "loss": 0.5724194049835205, + "step": 3672 + }, + { + "epoch": 1.294273127753304, + "grad_norm": 1.7101141845528827, + "learning_rate": 6.07648592125997e-06, + "loss": 0.7899144887924194, + "step": 3673 + }, + { + "epoch": 1.2946255506607929, + "grad_norm": 1.8438581079047975, + "learning_rate": 6.071127378861667e-06, + "loss": 0.5778594017028809, + "step": 3674 + }, + { + "epoch": 1.2949779735682818, + "grad_norm": 1.6768623613769682, + "learning_rate": 6.065770170193342e-06, + "loss": 0.6357566118240356, + "step": 3675 + }, + { + "epoch": 1.295330396475771, + "grad_norm": 1.5951400768860937, + "learning_rate": 6.0604142970736115e-06, + "loss": 0.511436939239502, + "step": 3676 + }, + { + "epoch": 1.29568281938326, + "grad_norm": 1.883542435313207, + "learning_rate": 6.0550597613206205e-06, + "loss": 0.6469998955726624, + "step": 3677 + }, + { + "epoch": 1.2960352422907488, + "grad_norm": 1.5730405198836903, + "learning_rate": 6.049706564752069e-06, + "loss": 0.5724819898605347, + "step": 3678 + }, + { + "epoch": 1.296387665198238, + "grad_norm": 1.5360587172523898, + "learning_rate": 6.044354709185203e-06, + "loss": 0.6567148566246033, + "step": 3679 + }, + { + "epoch": 1.296740088105727, + "grad_norm": 1.8931575903206552, + "learning_rate": 6.039004196436807e-06, + "loss": 0.6694033145904541, + "step": 3680 + }, + { + "epoch": 1.2970925110132159, + "grad_norm": 1.8190573258877898, + "learning_rate": 6.033655028323215e-06, + "loss": 0.5147275924682617, + "step": 3681 + }, + { + "epoch": 1.2974449339207048, + "grad_norm": 2.0405860057138256, + "learning_rate": 6.0283072066603075e-06, + "loss": 0.5881609320640564, + "step": 3682 + }, + { + "epoch": 1.2977973568281937, + "grad_norm": 1.7248898652229567, + "learning_rate": 6.022960733263493e-06, + "loss": 0.625927209854126, + "step": 3683 + }, + { + "epoch": 1.2981497797356827, + "grad_norm": 1.8738096752650604, + "learning_rate": 6.017615609947747e-06, + "loss": 0.693459153175354, + "step": 3684 + }, + { + "epoch": 1.2985022026431718, + "grad_norm": 1.6745028766810846, + "learning_rate": 6.0122718385275615e-06, + "loss": 0.5185744762420654, + "step": 3685 + }, + { + "epoch": 1.2988546255506608, + "grad_norm": 1.7625922291600025, + "learning_rate": 6.006929420816982e-06, + "loss": 0.5153995752334595, + "step": 3686 + }, + { + "epoch": 1.2992070484581497, + "grad_norm": 1.9617946738772851, + "learning_rate": 6.001588358629598e-06, + "loss": 0.5844067931175232, + "step": 3687 + }, + { + "epoch": 1.2995594713656389, + "grad_norm": 1.7999387557140187, + "learning_rate": 5.996248653778529e-06, + "loss": 0.6021767854690552, + "step": 3688 + }, + { + "epoch": 1.2999118942731278, + "grad_norm": 1.650868828635221, + "learning_rate": 5.990910308076443e-06, + "loss": 0.573150098323822, + "step": 3689 + }, + { + "epoch": 1.3002643171806167, + "grad_norm": 1.8809065032795727, + "learning_rate": 5.985573323335541e-06, + "loss": 0.5125507116317749, + "step": 3690 + }, + { + "epoch": 1.3006167400881057, + "grad_norm": 1.5884199689542184, + "learning_rate": 5.980237701367556e-06, + "loss": 0.541732668876648, + "step": 3691 + }, + { + "epoch": 1.3009691629955946, + "grad_norm": 2.0151748973563577, + "learning_rate": 5.974903443983778e-06, + "loss": 0.66359543800354, + "step": 3692 + }, + { + "epoch": 1.3013215859030838, + "grad_norm": 1.8831727632454829, + "learning_rate": 5.969570552995014e-06, + "loss": 0.6986300349235535, + "step": 3693 + }, + { + "epoch": 1.3016740088105727, + "grad_norm": 2.0800644206104195, + "learning_rate": 5.9642390302116125e-06, + "loss": 0.6829022169113159, + "step": 3694 + }, + { + "epoch": 1.3020264317180616, + "grad_norm": 1.9073088749861613, + "learning_rate": 5.9589088774434655e-06, + "loss": 0.5710464715957642, + "step": 3695 + }, + { + "epoch": 1.3023788546255506, + "grad_norm": 1.8154393300824316, + "learning_rate": 5.953580096499989e-06, + "loss": 0.5604938268661499, + "step": 3696 + }, + { + "epoch": 1.3027312775330397, + "grad_norm": 1.755426899711885, + "learning_rate": 5.948252689190141e-06, + "loss": 0.678723931312561, + "step": 3697 + }, + { + "epoch": 1.3030837004405287, + "grad_norm": 1.8845664461665383, + "learning_rate": 5.9429266573224145e-06, + "loss": 0.6652591228485107, + "step": 3698 + }, + { + "epoch": 1.3034361233480176, + "grad_norm": 1.8800654237619134, + "learning_rate": 5.937602002704819e-06, + "loss": 0.6141147017478943, + "step": 3699 + }, + { + "epoch": 1.3037885462555066, + "grad_norm": 1.937561336880738, + "learning_rate": 5.932278727144924e-06, + "loss": 0.5260860919952393, + "step": 3700 + }, + { + "epoch": 1.3041409691629955, + "grad_norm": 1.6945627397292862, + "learning_rate": 5.926956832449806e-06, + "loss": 0.464357852935791, + "step": 3701 + }, + { + "epoch": 1.3044933920704846, + "grad_norm": 1.8301641414278105, + "learning_rate": 5.921636320426085e-06, + "loss": 0.6513686180114746, + "step": 3702 + }, + { + "epoch": 1.3048458149779736, + "grad_norm": 1.7297134138158161, + "learning_rate": 5.91631719287991e-06, + "loss": 0.44547855854034424, + "step": 3703 + }, + { + "epoch": 1.3051982378854625, + "grad_norm": 1.8572950621020996, + "learning_rate": 5.910999451616959e-06, + "loss": 0.714026153087616, + "step": 3704 + }, + { + "epoch": 1.3055506607929517, + "grad_norm": 1.5164059156260825, + "learning_rate": 5.90568309844244e-06, + "loss": 0.48294252157211304, + "step": 3705 + }, + { + "epoch": 1.3059030837004406, + "grad_norm": 2.0148835282111275, + "learning_rate": 5.900368135161093e-06, + "loss": 0.587759256362915, + "step": 3706 + }, + { + "epoch": 1.3062555066079296, + "grad_norm": 1.7833437474608147, + "learning_rate": 5.895054563577172e-06, + "loss": 0.6251810789108276, + "step": 3707 + }, + { + "epoch": 1.3066079295154185, + "grad_norm": 1.98023378159902, + "learning_rate": 5.889742385494481e-06, + "loss": 0.6488438844680786, + "step": 3708 + }, + { + "epoch": 1.3069603524229074, + "grad_norm": 2.3062951128393325, + "learning_rate": 5.8844316027163315e-06, + "loss": 0.6682882308959961, + "step": 3709 + }, + { + "epoch": 1.3073127753303964, + "grad_norm": 1.9459894886811675, + "learning_rate": 5.879122217045573e-06, + "loss": 0.6537875533103943, + "step": 3710 + }, + { + "epoch": 1.3076651982378855, + "grad_norm": 1.994395753049965, + "learning_rate": 5.873814230284576e-06, + "loss": 0.6813541650772095, + "step": 3711 + }, + { + "epoch": 1.3080176211453745, + "grad_norm": 2.002875607232805, + "learning_rate": 5.868507644235233e-06, + "loss": 0.6962395906448364, + "step": 3712 + }, + { + "epoch": 1.3083700440528634, + "grad_norm": 1.8811127927416966, + "learning_rate": 5.863202460698972e-06, + "loss": 0.6872841119766235, + "step": 3713 + }, + { + "epoch": 1.3087224669603525, + "grad_norm": 2.007681646131619, + "learning_rate": 5.857898681476732e-06, + "loss": 0.7200508117675781, + "step": 3714 + }, + { + "epoch": 1.3090748898678415, + "grad_norm": 1.7850989505478374, + "learning_rate": 5.852596308368982e-06, + "loss": 0.6100003719329834, + "step": 3715 + }, + { + "epoch": 1.3094273127753304, + "grad_norm": 1.962305695853223, + "learning_rate": 5.847295343175714e-06, + "loss": 0.7347345352172852, + "step": 3716 + }, + { + "epoch": 1.3097797356828194, + "grad_norm": 1.8094012131106647, + "learning_rate": 5.841995787696438e-06, + "loss": 0.6955733895301819, + "step": 3717 + }, + { + "epoch": 1.3101321585903083, + "grad_norm": 1.6497459626323396, + "learning_rate": 5.836697643730193e-06, + "loss": 0.5266987085342407, + "step": 3718 + }, + { + "epoch": 1.3104845814977972, + "grad_norm": 1.7072540878561502, + "learning_rate": 5.83140091307553e-06, + "loss": 0.5978814363479614, + "step": 3719 + }, + { + "epoch": 1.3108370044052864, + "grad_norm": 1.9008641546548906, + "learning_rate": 5.826105597530526e-06, + "loss": 0.608231782913208, + "step": 3720 + }, + { + "epoch": 1.3111894273127753, + "grad_norm": 1.660571967924875, + "learning_rate": 5.820811698892775e-06, + "loss": 0.5834963321685791, + "step": 3721 + }, + { + "epoch": 1.3115418502202643, + "grad_norm": 1.7715871926900555, + "learning_rate": 5.8155192189593915e-06, + "loss": 0.6675208806991577, + "step": 3722 + }, + { + "epoch": 1.3118942731277534, + "grad_norm": 2.0125396897962156, + "learning_rate": 5.810228159527003e-06, + "loss": 0.655093789100647, + "step": 3723 + }, + { + "epoch": 1.3122466960352424, + "grad_norm": 1.832975656309839, + "learning_rate": 5.804938522391768e-06, + "loss": 0.5658842921257019, + "step": 3724 + }, + { + "epoch": 1.3125991189427313, + "grad_norm": 1.7484570770381627, + "learning_rate": 5.799650309349348e-06, + "loss": 0.4502618610858917, + "step": 3725 + }, + { + "epoch": 1.3129515418502202, + "grad_norm": 1.6150871905896036, + "learning_rate": 5.79436352219493e-06, + "loss": 0.6165845394134521, + "step": 3726 + }, + { + "epoch": 1.3133039647577092, + "grad_norm": 1.6734001609648903, + "learning_rate": 5.7890781627232115e-06, + "loss": 0.6315968036651611, + "step": 3727 + }, + { + "epoch": 1.313656387665198, + "grad_norm": 1.5048326218576167, + "learning_rate": 5.783794232728408e-06, + "loss": 0.58831787109375, + "step": 3728 + }, + { + "epoch": 1.3140088105726873, + "grad_norm": 1.7597864288310854, + "learning_rate": 5.778511734004248e-06, + "loss": 0.5056396722793579, + "step": 3729 + }, + { + "epoch": 1.3143612334801762, + "grad_norm": 2.3417954571274753, + "learning_rate": 5.773230668343978e-06, + "loss": 0.5469251871109009, + "step": 3730 + }, + { + "epoch": 1.3147136563876651, + "grad_norm": 1.768855633328091, + "learning_rate": 5.76795103754035e-06, + "loss": 0.7011934518814087, + "step": 3731 + }, + { + "epoch": 1.3150660792951543, + "grad_norm": 1.574817644372446, + "learning_rate": 5.762672843385643e-06, + "loss": 0.7080543041229248, + "step": 3732 + }, + { + "epoch": 1.3154185022026432, + "grad_norm": 1.7812689751161113, + "learning_rate": 5.757396087671634e-06, + "loss": 0.5180330276489258, + "step": 3733 + }, + { + "epoch": 1.3157709251101322, + "grad_norm": 1.6465709022018649, + "learning_rate": 5.75212077218962e-06, + "loss": 0.5282220840454102, + "step": 3734 + }, + { + "epoch": 1.316123348017621, + "grad_norm": 1.9100789844293367, + "learning_rate": 5.746846898730403e-06, + "loss": 0.7174440026283264, + "step": 3735 + }, + { + "epoch": 1.31647577092511, + "grad_norm": 1.7156784573652895, + "learning_rate": 5.7415744690843025e-06, + "loss": 0.537194013595581, + "step": 3736 + }, + { + "epoch": 1.3168281938325992, + "grad_norm": 1.714186482517803, + "learning_rate": 5.7363034850411415e-06, + "loss": 0.7514588832855225, + "step": 3737 + }, + { + "epoch": 1.3171806167400881, + "grad_norm": 1.6138774970176952, + "learning_rate": 5.731033948390252e-06, + "loss": 0.601151704788208, + "step": 3738 + }, + { + "epoch": 1.317533039647577, + "grad_norm": 1.9652638368208295, + "learning_rate": 5.7257658609204865e-06, + "loss": 0.6046192646026611, + "step": 3739 + }, + { + "epoch": 1.317885462555066, + "grad_norm": 1.9909773544544114, + "learning_rate": 5.720499224420196e-06, + "loss": 0.5003835558891296, + "step": 3740 + }, + { + "epoch": 1.3182378854625552, + "grad_norm": 2.7143275056165237, + "learning_rate": 5.715234040677229e-06, + "loss": 0.6251966953277588, + "step": 3741 + }, + { + "epoch": 1.318590308370044, + "grad_norm": 1.9483642954012013, + "learning_rate": 5.709970311478961e-06, + "loss": 0.6681240797042847, + "step": 3742 + }, + { + "epoch": 1.318942731277533, + "grad_norm": 1.6278748497204938, + "learning_rate": 5.704708038612261e-06, + "loss": 0.582561194896698, + "step": 3743 + }, + { + "epoch": 1.319295154185022, + "grad_norm": 1.8550137845260724, + "learning_rate": 5.699447223863508e-06, + "loss": 0.5616302490234375, + "step": 3744 + }, + { + "epoch": 1.319647577092511, + "grad_norm": 1.7452561285826282, + "learning_rate": 5.6941878690185835e-06, + "loss": 0.6131408214569092, + "step": 3745 + }, + { + "epoch": 1.32, + "grad_norm": 1.8334584062109562, + "learning_rate": 5.688929975862873e-06, + "loss": 0.5772547721862793, + "step": 3746 + }, + { + "epoch": 1.320352422907489, + "grad_norm": 1.7519534139582256, + "learning_rate": 5.683673546181274e-06, + "loss": 0.5927203893661499, + "step": 3747 + }, + { + "epoch": 1.320704845814978, + "grad_norm": 1.9849489030223588, + "learning_rate": 5.67841858175818e-06, + "loss": 0.6001334190368652, + "step": 3748 + }, + { + "epoch": 1.321057268722467, + "grad_norm": 1.584893703676267, + "learning_rate": 5.673165084377479e-06, + "loss": 0.4598100781440735, + "step": 3749 + }, + { + "epoch": 1.321409691629956, + "grad_norm": 1.9316178856088813, + "learning_rate": 5.667913055822578e-06, + "loss": 0.6455222368240356, + "step": 3750 + }, + { + "epoch": 1.321762114537445, + "grad_norm": 1.9234057001448424, + "learning_rate": 5.662662497876375e-06, + "loss": 0.6327164173126221, + "step": 3751 + }, + { + "epoch": 1.322114537444934, + "grad_norm": 1.7096288638222439, + "learning_rate": 5.657413412321271e-06, + "loss": 0.6699539422988892, + "step": 3752 + }, + { + "epoch": 1.3224669603524228, + "grad_norm": 2.0694083676949107, + "learning_rate": 5.6521658009391676e-06, + "loss": 0.7507830858230591, + "step": 3753 + }, + { + "epoch": 1.3228193832599118, + "grad_norm": 1.7615687866950613, + "learning_rate": 5.646919665511461e-06, + "loss": 0.5164662003517151, + "step": 3754 + }, + { + "epoch": 1.323171806167401, + "grad_norm": 2.267697288539615, + "learning_rate": 5.641675007819058e-06, + "loss": 0.7059702277183533, + "step": 3755 + }, + { + "epoch": 1.3235242290748899, + "grad_norm": 2.1165471311290243, + "learning_rate": 5.636431829642359e-06, + "loss": 0.6535515189170837, + "step": 3756 + }, + { + "epoch": 1.3238766519823788, + "grad_norm": 1.782117402624855, + "learning_rate": 5.631190132761247e-06, + "loss": 0.5912176370620728, + "step": 3757 + }, + { + "epoch": 1.324229074889868, + "grad_norm": 1.6111457739999588, + "learning_rate": 5.625949918955126e-06, + "loss": 0.6527940034866333, + "step": 3758 + }, + { + "epoch": 1.324581497797357, + "grad_norm": 1.9751426120017839, + "learning_rate": 5.620711190002879e-06, + "loss": 0.7236875295639038, + "step": 3759 + }, + { + "epoch": 1.3249339207048458, + "grad_norm": 2.042390900324052, + "learning_rate": 5.6154739476829e-06, + "loss": 0.6823146343231201, + "step": 3760 + }, + { + "epoch": 1.3252863436123348, + "grad_norm": 2.058457581887865, + "learning_rate": 5.610238193773061e-06, + "loss": 0.5795537233352661, + "step": 3761 + }, + { + "epoch": 1.3256387665198237, + "grad_norm": 1.90461931046175, + "learning_rate": 5.605003930050738e-06, + "loss": 0.5530939102172852, + "step": 3762 + }, + { + "epoch": 1.3259911894273126, + "grad_norm": 1.6978922894801083, + "learning_rate": 5.599771158292806e-06, + "loss": 0.5362278819084167, + "step": 3763 + }, + { + "epoch": 1.3263436123348018, + "grad_norm": 1.9521190182519916, + "learning_rate": 5.5945398802756315e-06, + "loss": 0.6136768460273743, + "step": 3764 + }, + { + "epoch": 1.3266960352422907, + "grad_norm": 1.7782753118174626, + "learning_rate": 5.589310097775055e-06, + "loss": 0.5979033708572388, + "step": 3765 + }, + { + "epoch": 1.3270484581497797, + "grad_norm": 1.810593191069574, + "learning_rate": 5.584081812566439e-06, + "loss": 0.6750006675720215, + "step": 3766 + }, + { + "epoch": 1.3274008810572688, + "grad_norm": 1.6815578779160076, + "learning_rate": 5.578855026424619e-06, + "loss": 0.6004951000213623, + "step": 3767 + }, + { + "epoch": 1.3277533039647578, + "grad_norm": 1.522422246822047, + "learning_rate": 5.573629741123926e-06, + "loss": 0.570702075958252, + "step": 3768 + }, + { + "epoch": 1.3281057268722467, + "grad_norm": 1.5435622334320813, + "learning_rate": 5.5684059584381826e-06, + "loss": 0.506945788860321, + "step": 3769 + }, + { + "epoch": 1.3284581497797356, + "grad_norm": 1.647967795112189, + "learning_rate": 5.563183680140696e-06, + "loss": 0.5935436487197876, + "step": 3770 + }, + { + "epoch": 1.3288105726872246, + "grad_norm": 2.7715355389110043, + "learning_rate": 5.5579629080042755e-06, + "loss": 0.641446590423584, + "step": 3771 + }, + { + "epoch": 1.3291629955947137, + "grad_norm": 1.7489195207611605, + "learning_rate": 5.552743643801209e-06, + "loss": 0.5816437005996704, + "step": 3772 + }, + { + "epoch": 1.3295154185022027, + "grad_norm": 1.7699530777692443, + "learning_rate": 5.547525889303265e-06, + "loss": 0.666487991809845, + "step": 3773 + }, + { + "epoch": 1.3298678414096916, + "grad_norm": 2.100750588167558, + "learning_rate": 5.542309646281718e-06, + "loss": 0.7961397767066956, + "step": 3774 + }, + { + "epoch": 1.3302202643171805, + "grad_norm": 1.5292695888779975, + "learning_rate": 5.53709491650732e-06, + "loss": 0.4736033082008362, + "step": 3775 + }, + { + "epoch": 1.3305726872246697, + "grad_norm": 1.8004482810288622, + "learning_rate": 5.531881701750304e-06, + "loss": 0.542208194732666, + "step": 3776 + }, + { + "epoch": 1.3309251101321586, + "grad_norm": 1.8151751535940353, + "learning_rate": 5.526670003780399e-06, + "loss": 0.6306429505348206, + "step": 3777 + }, + { + "epoch": 1.3312775330396476, + "grad_norm": 1.7520809852323194, + "learning_rate": 5.521459824366808e-06, + "loss": 0.531991720199585, + "step": 3778 + }, + { + "epoch": 1.3316299559471365, + "grad_norm": 1.9852873895231067, + "learning_rate": 5.516251165278235e-06, + "loss": 0.688262939453125, + "step": 3779 + }, + { + "epoch": 1.3319823788546254, + "grad_norm": 2.0026356133489416, + "learning_rate": 5.511044028282853e-06, + "loss": 0.7555293440818787, + "step": 3780 + }, + { + "epoch": 1.3323348017621146, + "grad_norm": 1.9387490035628434, + "learning_rate": 5.505838415148317e-06, + "loss": 0.7518796324729919, + "step": 3781 + }, + { + "epoch": 1.3326872246696035, + "grad_norm": 1.859399241253671, + "learning_rate": 5.500634327641777e-06, + "loss": 0.5161253809928894, + "step": 3782 + }, + { + "epoch": 1.3330396475770925, + "grad_norm": 1.5897606830745852, + "learning_rate": 5.4954317675298586e-06, + "loss": 0.5617681741714478, + "step": 3783 + }, + { + "epoch": 1.3333920704845814, + "grad_norm": 1.6894758792140483, + "learning_rate": 5.4902307365786676e-06, + "loss": 0.5707885026931763, + "step": 3784 + }, + { + "epoch": 1.3337444933920706, + "grad_norm": 1.9016603426520955, + "learning_rate": 5.485031236553792e-06, + "loss": 0.5842025876045227, + "step": 3785 + }, + { + "epoch": 1.3340969162995595, + "grad_norm": 2.278549510271659, + "learning_rate": 5.479833269220296e-06, + "loss": 0.7103949785232544, + "step": 3786 + }, + { + "epoch": 1.3344493392070484, + "grad_norm": 1.8432428404869632, + "learning_rate": 5.474636836342737e-06, + "loss": 0.7704740762710571, + "step": 3787 + }, + { + "epoch": 1.3348017621145374, + "grad_norm": 1.808727631247744, + "learning_rate": 5.469441939685137e-06, + "loss": 0.6402652263641357, + "step": 3788 + }, + { + "epoch": 1.3351541850220263, + "grad_norm": 1.892219877227891, + "learning_rate": 5.464248581011002e-06, + "loss": 0.8214348554611206, + "step": 3789 + }, + { + "epoch": 1.3355066079295155, + "grad_norm": 1.9758909531924576, + "learning_rate": 5.459056762083318e-06, + "loss": 0.6372429132461548, + "step": 3790 + }, + { + "epoch": 1.3358590308370044, + "grad_norm": 1.849044346394621, + "learning_rate": 5.453866484664543e-06, + "loss": 0.5418422222137451, + "step": 3791 + }, + { + "epoch": 1.3362114537444933, + "grad_norm": 1.7395663492002502, + "learning_rate": 5.448677750516613e-06, + "loss": 0.6574567556381226, + "step": 3792 + }, + { + "epoch": 1.3365638766519825, + "grad_norm": 1.9976311809706857, + "learning_rate": 5.443490561400948e-06, + "loss": 0.5174030661582947, + "step": 3793 + }, + { + "epoch": 1.3369162995594714, + "grad_norm": 1.5627335899600845, + "learning_rate": 5.4383049190784275e-06, + "loss": 0.595477819442749, + "step": 3794 + }, + { + "epoch": 1.3372687224669604, + "grad_norm": 1.845680624563864, + "learning_rate": 5.4331208253094255e-06, + "loss": 0.6177364587783813, + "step": 3795 + }, + { + "epoch": 1.3376211453744493, + "grad_norm": 1.6348460055259042, + "learning_rate": 5.4279382818537774e-06, + "loss": 0.6106897592544556, + "step": 3796 + }, + { + "epoch": 1.3379735682819383, + "grad_norm": 1.8500671496295353, + "learning_rate": 5.422757290470795e-06, + "loss": 0.46700483560562134, + "step": 3797 + }, + { + "epoch": 1.3383259911894272, + "grad_norm": 1.952200717602712, + "learning_rate": 5.417577852919262e-06, + "loss": 0.5408231019973755, + "step": 3798 + }, + { + "epoch": 1.3386784140969163, + "grad_norm": 1.8733329229880296, + "learning_rate": 5.412399970957439e-06, + "loss": 0.6430809497833252, + "step": 3799 + }, + { + "epoch": 1.3390308370044053, + "grad_norm": 1.9515663922431925, + "learning_rate": 5.4072236463430535e-06, + "loss": 0.6817858219146729, + "step": 3800 + }, + { + "epoch": 1.3393832599118942, + "grad_norm": 1.7386331074635664, + "learning_rate": 5.402048880833308e-06, + "loss": 0.5492604970932007, + "step": 3801 + }, + { + "epoch": 1.3397356828193834, + "grad_norm": 1.9883458715986422, + "learning_rate": 5.39687567618487e-06, + "loss": 0.6148543357849121, + "step": 3802 + }, + { + "epoch": 1.3400881057268723, + "grad_norm": 1.7245960691315507, + "learning_rate": 5.391704034153894e-06, + "loss": 0.5921820402145386, + "step": 3803 + }, + { + "epoch": 1.3404405286343613, + "grad_norm": 1.8759210914719033, + "learning_rate": 5.386533956495974e-06, + "loss": 0.49728113412857056, + "step": 3804 + }, + { + "epoch": 1.3407929515418502, + "grad_norm": 1.7899218455267007, + "learning_rate": 5.381365444966205e-06, + "loss": 0.5944808125495911, + "step": 3805 + }, + { + "epoch": 1.3411453744493391, + "grad_norm": 1.6022996204023598, + "learning_rate": 5.376198501319128e-06, + "loss": 0.5197580456733704, + "step": 3806 + }, + { + "epoch": 1.341497797356828, + "grad_norm": 1.5953524266203611, + "learning_rate": 5.3710331273087625e-06, + "loss": 0.6229256391525269, + "step": 3807 + }, + { + "epoch": 1.3418502202643172, + "grad_norm": 2.0736813734241073, + "learning_rate": 5.365869324688591e-06, + "loss": 0.5305753946304321, + "step": 3808 + }, + { + "epoch": 1.3422026431718062, + "grad_norm": 1.4520191291543518, + "learning_rate": 5.360707095211566e-06, + "loss": 0.4002259373664856, + "step": 3809 + }, + { + "epoch": 1.342555066079295, + "grad_norm": 1.8821320745162777, + "learning_rate": 5.3555464406300965e-06, + "loss": 0.5211426615715027, + "step": 3810 + }, + { + "epoch": 1.3429074889867842, + "grad_norm": 1.7112007743194535, + "learning_rate": 5.350387362696077e-06, + "loss": 0.5998013019561768, + "step": 3811 + }, + { + "epoch": 1.3432599118942732, + "grad_norm": 1.6128635046491597, + "learning_rate": 5.345229863160839e-06, + "loss": 0.5330953598022461, + "step": 3812 + }, + { + "epoch": 1.3436123348017621, + "grad_norm": 1.6570398271033384, + "learning_rate": 5.340073943775206e-06, + "loss": 0.6999118328094482, + "step": 3813 + }, + { + "epoch": 1.343964757709251, + "grad_norm": 2.060346240780723, + "learning_rate": 5.334919606289446e-06, + "loss": 0.6286367177963257, + "step": 3814 + }, + { + "epoch": 1.34431718061674, + "grad_norm": 1.4130805934733843, + "learning_rate": 5.329766852453296e-06, + "loss": 0.5793008804321289, + "step": 3815 + }, + { + "epoch": 1.3446696035242292, + "grad_norm": 1.7815340287164039, + "learning_rate": 5.324615684015957e-06, + "loss": 0.5811383128166199, + "step": 3816 + }, + { + "epoch": 1.345022026431718, + "grad_norm": 1.8888368809882845, + "learning_rate": 5.319466102726087e-06, + "loss": 0.7389675378799438, + "step": 3817 + }, + { + "epoch": 1.345374449339207, + "grad_norm": 1.9482215135863048, + "learning_rate": 5.314318110331815e-06, + "loss": 0.6105868220329285, + "step": 3818 + }, + { + "epoch": 1.345726872246696, + "grad_norm": 1.648111237588601, + "learning_rate": 5.3091717085807235e-06, + "loss": 0.5979465842247009, + "step": 3819 + }, + { + "epoch": 1.3460792951541851, + "grad_norm": 2.100772248921902, + "learning_rate": 5.304026899219846e-06, + "loss": 0.6722681522369385, + "step": 3820 + }, + { + "epoch": 1.346431718061674, + "grad_norm": 1.5469717835195365, + "learning_rate": 5.298883683995697e-06, + "loss": 0.4687497913837433, + "step": 3821 + }, + { + "epoch": 1.346784140969163, + "grad_norm": 1.6982574361909266, + "learning_rate": 5.29374206465423e-06, + "loss": 0.563692569732666, + "step": 3822 + }, + { + "epoch": 1.347136563876652, + "grad_norm": 1.7298606992172854, + "learning_rate": 5.2886020429408716e-06, + "loss": 0.604897141456604, + "step": 3823 + }, + { + "epoch": 1.3474889867841409, + "grad_norm": 2.111770720101543, + "learning_rate": 5.283463620600493e-06, + "loss": 0.6270164251327515, + "step": 3824 + }, + { + "epoch": 1.34784140969163, + "grad_norm": 2.1238324371472954, + "learning_rate": 5.278326799377428e-06, + "loss": 0.6487830877304077, + "step": 3825 + }, + { + "epoch": 1.348193832599119, + "grad_norm": 1.58718768900561, + "learning_rate": 5.273191581015474e-06, + "loss": 0.5816935896873474, + "step": 3826 + }, + { + "epoch": 1.348546255506608, + "grad_norm": 1.72099904065486, + "learning_rate": 5.26805796725788e-06, + "loss": 0.6281115412712097, + "step": 3827 + }, + { + "epoch": 1.348898678414097, + "grad_norm": 2.0975447662151288, + "learning_rate": 5.2629259598473335e-06, + "loss": 0.5031973123550415, + "step": 3828 + }, + { + "epoch": 1.349251101321586, + "grad_norm": 1.6391975654545219, + "learning_rate": 5.257795560526005e-06, + "loss": 0.6220165491104126, + "step": 3829 + }, + { + "epoch": 1.349603524229075, + "grad_norm": 1.8177506583957952, + "learning_rate": 5.2526667710354995e-06, + "loss": 0.6451058387756348, + "step": 3830 + }, + { + "epoch": 1.3499559471365639, + "grad_norm": 2.000132155225934, + "learning_rate": 5.247539593116884e-06, + "loss": 0.7524863481521606, + "step": 3831 + }, + { + "epoch": 1.3503083700440528, + "grad_norm": 1.7855711080776688, + "learning_rate": 5.242414028510674e-06, + "loss": 0.6270921230316162, + "step": 3832 + }, + { + "epoch": 1.3506607929515417, + "grad_norm": 1.8779302666662292, + "learning_rate": 5.237290078956836e-06, + "loss": 0.6196550130844116, + "step": 3833 + }, + { + "epoch": 1.351013215859031, + "grad_norm": 1.932517845360487, + "learning_rate": 5.232167746194798e-06, + "loss": 0.8512230515480042, + "step": 3834 + }, + { + "epoch": 1.3513656387665198, + "grad_norm": 1.672868645098828, + "learning_rate": 5.227047031963435e-06, + "loss": 0.5196807980537415, + "step": 3835 + }, + { + "epoch": 1.3517180616740088, + "grad_norm": 1.890472281368116, + "learning_rate": 5.2219279380010565e-06, + "loss": 0.6713111400604248, + "step": 3836 + }, + { + "epoch": 1.352070484581498, + "grad_norm": 1.8891048300322977, + "learning_rate": 5.216810466045448e-06, + "loss": 0.7150874137878418, + "step": 3837 + }, + { + "epoch": 1.3524229074889869, + "grad_norm": 1.9379344809365882, + "learning_rate": 5.211694617833827e-06, + "loss": 0.5812375545501709, + "step": 3838 + }, + { + "epoch": 1.3527753303964758, + "grad_norm": 1.6232111313971074, + "learning_rate": 5.2065803951028675e-06, + "loss": 0.5842182040214539, + "step": 3839 + }, + { + "epoch": 1.3531277533039647, + "grad_norm": 1.7655789614212678, + "learning_rate": 5.201467799588685e-06, + "loss": 0.5432665348052979, + "step": 3840 + }, + { + "epoch": 1.3534801762114537, + "grad_norm": 1.610757257105171, + "learning_rate": 5.196356833026845e-06, + "loss": 0.551771879196167, + "step": 3841 + }, + { + "epoch": 1.3538325991189426, + "grad_norm": 2.0105503681662076, + "learning_rate": 5.19124749715237e-06, + "loss": 0.6961710453033447, + "step": 3842 + }, + { + "epoch": 1.3541850220264318, + "grad_norm": 1.9510922019810755, + "learning_rate": 5.18613979369972e-06, + "loss": 0.7105714678764343, + "step": 3843 + }, + { + "epoch": 1.3545374449339207, + "grad_norm": 1.9369232024679732, + "learning_rate": 5.181033724402789e-06, + "loss": 0.7100229263305664, + "step": 3844 + }, + { + "epoch": 1.3548898678414096, + "grad_norm": 1.6852711649451124, + "learning_rate": 5.175929290994941e-06, + "loss": 0.651812732219696, + "step": 3845 + }, + { + "epoch": 1.3552422907488988, + "grad_norm": 2.308449923325572, + "learning_rate": 5.170826495208967e-06, + "loss": 0.5194147825241089, + "step": 3846 + }, + { + "epoch": 1.3555947136563877, + "grad_norm": 1.6095794520986102, + "learning_rate": 5.16572533877711e-06, + "loss": 0.5939956307411194, + "step": 3847 + }, + { + "epoch": 1.3559471365638767, + "grad_norm": 1.7731843322868706, + "learning_rate": 5.160625823431051e-06, + "loss": 0.6434104442596436, + "step": 3848 + }, + { + "epoch": 1.3562995594713656, + "grad_norm": 1.9584483919337772, + "learning_rate": 5.155527950901914e-06, + "loss": 0.5256108045578003, + "step": 3849 + }, + { + "epoch": 1.3566519823788545, + "grad_norm": 1.5746637659323357, + "learning_rate": 5.150431722920277e-06, + "loss": 0.5632717609405518, + "step": 3850 + }, + { + "epoch": 1.3570044052863435, + "grad_norm": 1.8450205582439452, + "learning_rate": 5.145337141216149e-06, + "loss": 0.5964382886886597, + "step": 3851 + }, + { + "epoch": 1.3573568281938326, + "grad_norm": 1.9383063853676261, + "learning_rate": 5.140244207518971e-06, + "loss": 0.7268366813659668, + "step": 3852 + }, + { + "epoch": 1.3577092511013216, + "grad_norm": 2.357958765027834, + "learning_rate": 5.135152923557647e-06, + "loss": 0.7376477122306824, + "step": 3853 + }, + { + "epoch": 1.3580616740088105, + "grad_norm": 1.9573550951394243, + "learning_rate": 5.130063291060505e-06, + "loss": 0.50569748878479, + "step": 3854 + }, + { + "epoch": 1.3584140969162997, + "grad_norm": 1.684535591269265, + "learning_rate": 5.12497531175532e-06, + "loss": 0.5639374256134033, + "step": 3855 + }, + { + "epoch": 1.3587665198237886, + "grad_norm": 2.0009335012534146, + "learning_rate": 5.1198889873692994e-06, + "loss": 0.5051915645599365, + "step": 3856 + }, + { + "epoch": 1.3591189427312775, + "grad_norm": 1.979939818228197, + "learning_rate": 5.114804319629088e-06, + "loss": 0.4718795120716095, + "step": 3857 + }, + { + "epoch": 1.3594713656387665, + "grad_norm": 1.7040447839749338, + "learning_rate": 5.109721310260781e-06, + "loss": 0.5684067606925964, + "step": 3858 + }, + { + "epoch": 1.3598237885462554, + "grad_norm": 1.687205926430453, + "learning_rate": 5.104639960989903e-06, + "loss": 0.5757609605789185, + "step": 3859 + }, + { + "epoch": 1.3601762114537446, + "grad_norm": 1.637859976815221, + "learning_rate": 5.099560273541401e-06, + "loss": 0.5971167087554932, + "step": 3860 + }, + { + "epoch": 1.3605286343612335, + "grad_norm": 1.9766573766085018, + "learning_rate": 5.094482249639683e-06, + "loss": 0.6959896683692932, + "step": 3861 + }, + { + "epoch": 1.3608810572687224, + "grad_norm": 1.8397057454745067, + "learning_rate": 5.089405891008574e-06, + "loss": 0.6954548358917236, + "step": 3862 + }, + { + "epoch": 1.3612334801762114, + "grad_norm": 1.5747472561310782, + "learning_rate": 5.084331199371343e-06, + "loss": 0.5659986138343811, + "step": 3863 + }, + { + "epoch": 1.3615859030837005, + "grad_norm": 1.9340659365358734, + "learning_rate": 5.079258176450687e-06, + "loss": 0.5582559108734131, + "step": 3864 + }, + { + "epoch": 1.3619383259911895, + "grad_norm": 1.5684621947501252, + "learning_rate": 5.0741868239687395e-06, + "loss": 0.5337075591087341, + "step": 3865 + }, + { + "epoch": 1.3622907488986784, + "grad_norm": 1.8617666338346237, + "learning_rate": 5.069117143647075e-06, + "loss": 0.621441125869751, + "step": 3866 + }, + { + "epoch": 1.3626431718061673, + "grad_norm": 1.7285404952370873, + "learning_rate": 5.064049137206677e-06, + "loss": 0.5476670861244202, + "step": 3867 + }, + { + "epoch": 1.3629955947136563, + "grad_norm": 1.9444577342582248, + "learning_rate": 5.058982806367989e-06, + "loss": 0.5357356071472168, + "step": 3868 + }, + { + "epoch": 1.3633480176211454, + "grad_norm": 2.032867685216442, + "learning_rate": 5.053918152850868e-06, + "loss": 0.5722761750221252, + "step": 3869 + }, + { + "epoch": 1.3637004405286344, + "grad_norm": 1.8019521015311857, + "learning_rate": 5.048855178374606e-06, + "loss": 0.7271207571029663, + "step": 3870 + }, + { + "epoch": 1.3640528634361233, + "grad_norm": 2.149716528128109, + "learning_rate": 5.043793884657926e-06, + "loss": 0.6213557720184326, + "step": 3871 + }, + { + "epoch": 1.3644052863436125, + "grad_norm": 1.9750542918701046, + "learning_rate": 5.03873427341898e-06, + "loss": 0.6509476900100708, + "step": 3872 + }, + { + "epoch": 1.3647577092511014, + "grad_norm": 1.8266690493980986, + "learning_rate": 5.0336763463753425e-06, + "loss": 0.5321642756462097, + "step": 3873 + }, + { + "epoch": 1.3651101321585903, + "grad_norm": 1.8114804761469812, + "learning_rate": 5.028620105244035e-06, + "loss": 0.7237476110458374, + "step": 3874 + }, + { + "epoch": 1.3654625550660793, + "grad_norm": 2.014453779183698, + "learning_rate": 5.0235655517414805e-06, + "loss": 0.6653447151184082, + "step": 3875 + }, + { + "epoch": 1.3658149779735682, + "grad_norm": 1.843622237552059, + "learning_rate": 5.018512687583552e-06, + "loss": 0.6188938617706299, + "step": 3876 + }, + { + "epoch": 1.3661674008810571, + "grad_norm": 1.8211870806299153, + "learning_rate": 5.013461514485536e-06, + "loss": 0.6341606378555298, + "step": 3877 + }, + { + "epoch": 1.3665198237885463, + "grad_norm": 1.6224290182707664, + "learning_rate": 5.00841203416215e-06, + "loss": 0.6148994565010071, + "step": 3878 + }, + { + "epoch": 1.3668722466960352, + "grad_norm": 1.8692541577175399, + "learning_rate": 5.003364248327533e-06, + "loss": 0.6292222142219543, + "step": 3879 + }, + { + "epoch": 1.3672246696035242, + "grad_norm": 1.618170468267519, + "learning_rate": 4.998318158695255e-06, + "loss": 0.6648836135864258, + "step": 3880 + }, + { + "epoch": 1.3675770925110133, + "grad_norm": 6.866040476375875, + "learning_rate": 4.993273766978297e-06, + "loss": 0.5175273418426514, + "step": 3881 + }, + { + "epoch": 1.3679295154185023, + "grad_norm": 1.5661461645683938, + "learning_rate": 4.98823107488909e-06, + "loss": 0.5686253309249878, + "step": 3882 + }, + { + "epoch": 1.3682819383259912, + "grad_norm": 1.9697672783538545, + "learning_rate": 4.983190084139452e-06, + "loss": 0.6128156185150146, + "step": 3883 + }, + { + "epoch": 1.3686343612334801, + "grad_norm": 1.9331016188284555, + "learning_rate": 4.978150796440656e-06, + "loss": 0.6849625110626221, + "step": 3884 + }, + { + "epoch": 1.368986784140969, + "grad_norm": 1.5986771035358114, + "learning_rate": 4.973113213503379e-06, + "loss": 0.5735955238342285, + "step": 3885 + }, + { + "epoch": 1.369339207048458, + "grad_norm": 1.6049593584012303, + "learning_rate": 4.968077337037724e-06, + "loss": 0.4584425091743469, + "step": 3886 + }, + { + "epoch": 1.3696916299559472, + "grad_norm": 1.9525312670752564, + "learning_rate": 4.963043168753212e-06, + "loss": 0.547109067440033, + "step": 3887 + }, + { + "epoch": 1.3700440528634361, + "grad_norm": 2.113357180829694, + "learning_rate": 4.9580107103587895e-06, + "loss": 0.6966128349304199, + "step": 3888 + }, + { + "epoch": 1.370396475770925, + "grad_norm": 1.7817002019358994, + "learning_rate": 4.952979963562814e-06, + "loss": 0.6275819540023804, + "step": 3889 + }, + { + "epoch": 1.3707488986784142, + "grad_norm": 1.6096829752005641, + "learning_rate": 4.94795093007308e-06, + "loss": 0.5678467750549316, + "step": 3890 + }, + { + "epoch": 1.3711013215859031, + "grad_norm": 1.8874234747665013, + "learning_rate": 4.942923611596772e-06, + "loss": 0.6516115665435791, + "step": 3891 + }, + { + "epoch": 1.371453744493392, + "grad_norm": 1.8638529672264463, + "learning_rate": 4.937898009840518e-06, + "loss": 0.6279621124267578, + "step": 3892 + }, + { + "epoch": 1.371806167400881, + "grad_norm": 1.6187117518672614, + "learning_rate": 4.932874126510353e-06, + "loss": 0.6123322248458862, + "step": 3893 + }, + { + "epoch": 1.37215859030837, + "grad_norm": 1.6259761787603553, + "learning_rate": 4.927851963311726e-06, + "loss": 0.43412432074546814, + "step": 3894 + }, + { + "epoch": 1.372511013215859, + "grad_norm": 1.859998329311036, + "learning_rate": 4.922831521949507e-06, + "loss": 0.6582022905349731, + "step": 3895 + }, + { + "epoch": 1.372863436123348, + "grad_norm": 1.8966645456702385, + "learning_rate": 4.917812804127976e-06, + "loss": 0.6219466328620911, + "step": 3896 + }, + { + "epoch": 1.373215859030837, + "grad_norm": 2.056798959647299, + "learning_rate": 4.9127958115508365e-06, + "loss": 0.5352981090545654, + "step": 3897 + }, + { + "epoch": 1.373568281938326, + "grad_norm": 1.5240218181276974, + "learning_rate": 4.907780545921205e-06, + "loss": 0.47646182775497437, + "step": 3898 + }, + { + "epoch": 1.373920704845815, + "grad_norm": 1.6949945802187276, + "learning_rate": 4.902767008941594e-06, + "loss": 0.5335453748703003, + "step": 3899 + }, + { + "epoch": 1.374273127753304, + "grad_norm": 1.7931951401372748, + "learning_rate": 4.897755202313954e-06, + "loss": 0.576435923576355, + "step": 3900 + }, + { + "epoch": 1.374625550660793, + "grad_norm": 1.6675338707159029, + "learning_rate": 4.8927451277396365e-06, + "loss": 0.533431887626648, + "step": 3901 + }, + { + "epoch": 1.3749779735682819, + "grad_norm": 1.7439550653197133, + "learning_rate": 4.8877367869194035e-06, + "loss": 0.6892110109329224, + "step": 3902 + }, + { + "epoch": 1.3753303964757708, + "grad_norm": 1.9209875137364842, + "learning_rate": 4.8827301815534335e-06, + "loss": 0.7028052806854248, + "step": 3903 + }, + { + "epoch": 1.37568281938326, + "grad_norm": 1.8413166797931897, + "learning_rate": 4.877725313341306e-06, + "loss": 0.6883414387702942, + "step": 3904 + }, + { + "epoch": 1.376035242290749, + "grad_norm": 2.145518516472349, + "learning_rate": 4.8727221839820285e-06, + "loss": 0.6712944507598877, + "step": 3905 + }, + { + "epoch": 1.3763876651982379, + "grad_norm": 1.6297297090329885, + "learning_rate": 4.867720795174006e-06, + "loss": 0.6139085292816162, + "step": 3906 + }, + { + "epoch": 1.3767400881057268, + "grad_norm": 1.8425831405666082, + "learning_rate": 4.862721148615043e-06, + "loss": 0.6463953256607056, + "step": 3907 + }, + { + "epoch": 1.377092511013216, + "grad_norm": 1.768461759599311, + "learning_rate": 4.857723246002376e-06, + "loss": 0.6790587306022644, + "step": 3908 + }, + { + "epoch": 1.3774449339207049, + "grad_norm": 1.7177146369820009, + "learning_rate": 4.852727089032634e-06, + "loss": 0.4996854066848755, + "step": 3909 + }, + { + "epoch": 1.3777973568281938, + "grad_norm": 1.8098347886488457, + "learning_rate": 4.847732679401855e-06, + "loss": 0.5826590061187744, + "step": 3910 + }, + { + "epoch": 1.3781497797356828, + "grad_norm": 1.8997892974208295, + "learning_rate": 4.842740018805489e-06, + "loss": 0.5044558048248291, + "step": 3911 + }, + { + "epoch": 1.3785022026431717, + "grad_norm": 1.873679943847948, + "learning_rate": 4.837749108938381e-06, + "loss": 0.49022918939590454, + "step": 3912 + }, + { + "epoch": 1.3788546255506609, + "grad_norm": 1.9497488299017371, + "learning_rate": 4.832759951494798e-06, + "loss": 0.7034850120544434, + "step": 3913 + }, + { + "epoch": 1.3792070484581498, + "grad_norm": 1.8582811393472771, + "learning_rate": 4.827772548168408e-06, + "loss": 0.5835636854171753, + "step": 3914 + }, + { + "epoch": 1.3795594713656387, + "grad_norm": 1.8615896532434415, + "learning_rate": 4.822786900652262e-06, + "loss": 0.6000608205795288, + "step": 3915 + }, + { + "epoch": 1.3799118942731279, + "grad_norm": 2.003742345218382, + "learning_rate": 4.817803010638847e-06, + "loss": 0.6121091842651367, + "step": 3916 + }, + { + "epoch": 1.3802643171806168, + "grad_norm": 1.80308866184307, + "learning_rate": 4.812820879820034e-06, + "loss": 0.457197904586792, + "step": 3917 + }, + { + "epoch": 1.3806167400881058, + "grad_norm": 1.8962611537179284, + "learning_rate": 4.807840509887102e-06, + "loss": 0.6495843529701233, + "step": 3918 + }, + { + "epoch": 1.3809691629955947, + "grad_norm": 1.9212587769996015, + "learning_rate": 4.80286190253073e-06, + "loss": 0.6245059967041016, + "step": 3919 + }, + { + "epoch": 1.3813215859030836, + "grad_norm": 2.020688644956673, + "learning_rate": 4.797885059440998e-06, + "loss": 0.5648606419563293, + "step": 3920 + }, + { + "epoch": 1.3816740088105726, + "grad_norm": 1.93208096226899, + "learning_rate": 4.7929099823073945e-06, + "loss": 0.6593670845031738, + "step": 3921 + }, + { + "epoch": 1.3820264317180617, + "grad_norm": 1.8973564890389945, + "learning_rate": 4.787936672818807e-06, + "loss": 0.6400346159934998, + "step": 3922 + }, + { + "epoch": 1.3823788546255507, + "grad_norm": 1.8684904083901948, + "learning_rate": 4.782965132663505e-06, + "loss": 0.6042170524597168, + "step": 3923 + }, + { + "epoch": 1.3827312775330396, + "grad_norm": 1.8230700495851246, + "learning_rate": 4.777995363529184e-06, + "loss": 0.6224586963653564, + "step": 3924 + }, + { + "epoch": 1.3830837004405288, + "grad_norm": 2.09797321253942, + "learning_rate": 4.7730273671029235e-06, + "loss": 0.6944444179534912, + "step": 3925 + }, + { + "epoch": 1.3834361233480177, + "grad_norm": 1.976613089140818, + "learning_rate": 4.768061145071201e-06, + "loss": 0.5871950387954712, + "step": 3926 + }, + { + "epoch": 1.3837885462555066, + "grad_norm": 1.7713632438369786, + "learning_rate": 4.763096699119897e-06, + "loss": 0.6438909769058228, + "step": 3927 + }, + { + "epoch": 1.3841409691629956, + "grad_norm": 1.6141008005869943, + "learning_rate": 4.75813403093428e-06, + "loss": 0.6338443756103516, + "step": 3928 + }, + { + "epoch": 1.3844933920704845, + "grad_norm": 2.2680544531424753, + "learning_rate": 4.753173142199036e-06, + "loss": 0.6343874931335449, + "step": 3929 + }, + { + "epoch": 1.3848458149779734, + "grad_norm": 1.7233771229601555, + "learning_rate": 4.7482140345982174e-06, + "loss": 0.5383629202842712, + "step": 3930 + }, + { + "epoch": 1.3851982378854626, + "grad_norm": 1.8699549247596075, + "learning_rate": 4.743256709815289e-06, + "loss": 0.5365063548088074, + "step": 3931 + }, + { + "epoch": 1.3855506607929515, + "grad_norm": 2.2583515376147694, + "learning_rate": 4.738301169533116e-06, + "loss": 0.6310757398605347, + "step": 3932 + }, + { + "epoch": 1.3859030837004405, + "grad_norm": 2.1022070754037476, + "learning_rate": 4.733347415433946e-06, + "loss": 0.7609038949012756, + "step": 3933 + }, + { + "epoch": 1.3862555066079296, + "grad_norm": 2.174490642392946, + "learning_rate": 4.728395449199423e-06, + "loss": 0.5837516784667969, + "step": 3934 + }, + { + "epoch": 1.3866079295154186, + "grad_norm": 1.719340289699717, + "learning_rate": 4.7234452725105875e-06, + "loss": 0.6075407862663269, + "step": 3935 + }, + { + "epoch": 1.3869603524229075, + "grad_norm": 1.7651152509667416, + "learning_rate": 4.718496887047864e-06, + "loss": 0.5246843099594116, + "step": 3936 + }, + { + "epoch": 1.3873127753303964, + "grad_norm": 1.6874306455639787, + "learning_rate": 4.713550294491091e-06, + "loss": 0.6256884336471558, + "step": 3937 + }, + { + "epoch": 1.3876651982378854, + "grad_norm": 1.632156841956259, + "learning_rate": 4.708605496519467e-06, + "loss": 0.5039727687835693, + "step": 3938 + }, + { + "epoch": 1.3880176211453745, + "grad_norm": 2.0143508196146196, + "learning_rate": 4.703662494811599e-06, + "loss": 0.5302769541740417, + "step": 3939 + }, + { + "epoch": 1.3883700440528635, + "grad_norm": 1.6358403288542849, + "learning_rate": 4.698721291045491e-06, + "loss": 0.654889702796936, + "step": 3940 + }, + { + "epoch": 1.3887224669603524, + "grad_norm": 1.8724260838054423, + "learning_rate": 4.693781886898521e-06, + "loss": 0.5571156740188599, + "step": 3941 + }, + { + "epoch": 1.3890748898678413, + "grad_norm": 1.8352093678478665, + "learning_rate": 4.688844284047466e-06, + "loss": 0.489155113697052, + "step": 3942 + }, + { + "epoch": 1.3894273127753305, + "grad_norm": 2.3056906716340793, + "learning_rate": 4.683908484168487e-06, + "loss": 0.6422649621963501, + "step": 3943 + }, + { + "epoch": 1.3897797356828194, + "grad_norm": 2.1056674936107345, + "learning_rate": 4.67897448893713e-06, + "loss": 0.6800041794776917, + "step": 3944 + }, + { + "epoch": 1.3901321585903084, + "grad_norm": 1.9512416893069657, + "learning_rate": 4.674042300028345e-06, + "loss": 0.6091655492782593, + "step": 3945 + }, + { + "epoch": 1.3904845814977973, + "grad_norm": 1.5832960247380383, + "learning_rate": 4.669111919116442e-06, + "loss": 0.6217864751815796, + "step": 3946 + }, + { + "epoch": 1.3908370044052862, + "grad_norm": 1.9328669999328483, + "learning_rate": 4.664183347875144e-06, + "loss": 0.6140862703323364, + "step": 3947 + }, + { + "epoch": 1.3911894273127754, + "grad_norm": 1.5467868836495022, + "learning_rate": 4.659256587977542e-06, + "loss": 0.5485835075378418, + "step": 3948 + }, + { + "epoch": 1.3915418502202643, + "grad_norm": 1.9704789330010746, + "learning_rate": 4.654331641096118e-06, + "loss": 0.642849862575531, + "step": 3949 + }, + { + "epoch": 1.3918942731277533, + "grad_norm": 3.421035640959237, + "learning_rate": 4.649408508902739e-06, + "loss": 0.7084407806396484, + "step": 3950 + }, + { + "epoch": 1.3922466960352424, + "grad_norm": 1.780782004302536, + "learning_rate": 4.644487193068653e-06, + "loss": 0.4798510670661926, + "step": 3951 + }, + { + "epoch": 1.3925991189427314, + "grad_norm": 2.0571809281532056, + "learning_rate": 4.639567695264493e-06, + "loss": 0.6350974440574646, + "step": 3952 + }, + { + "epoch": 1.3929515418502203, + "grad_norm": 1.6636780012798107, + "learning_rate": 4.634650017160285e-06, + "loss": 0.6046940684318542, + "step": 3953 + }, + { + "epoch": 1.3933039647577092, + "grad_norm": 1.8656342511774384, + "learning_rate": 4.629734160425412e-06, + "loss": 0.5262438058853149, + "step": 3954 + }, + { + "epoch": 1.3936563876651982, + "grad_norm": 1.6602375526420536, + "learning_rate": 4.6248201267286666e-06, + "loss": 0.4836997985839844, + "step": 3955 + }, + { + "epoch": 1.394008810572687, + "grad_norm": 1.8387545975251456, + "learning_rate": 4.619907917738206e-06, + "loss": 0.5491573810577393, + "step": 3956 + }, + { + "epoch": 1.3943612334801763, + "grad_norm": 1.7103638500009937, + "learning_rate": 4.614997535121574e-06, + "loss": 0.5778772830963135, + "step": 3957 + }, + { + "epoch": 1.3947136563876652, + "grad_norm": 1.886204345973439, + "learning_rate": 4.61008898054569e-06, + "loss": 0.6235651969909668, + "step": 3958 + }, + { + "epoch": 1.3950660792951541, + "grad_norm": 1.533461324415723, + "learning_rate": 4.605182255676857e-06, + "loss": 0.5192956924438477, + "step": 3959 + }, + { + "epoch": 1.3954185022026433, + "grad_norm": 1.6490801359766816, + "learning_rate": 4.600277362180753e-06, + "loss": 0.5652563571929932, + "step": 3960 + }, + { + "epoch": 1.3957709251101322, + "grad_norm": 2.0491508628562594, + "learning_rate": 4.595374301722445e-06, + "loss": 0.6451884508132935, + "step": 3961 + }, + { + "epoch": 1.3961233480176212, + "grad_norm": 1.6267669051180629, + "learning_rate": 4.5904730759663555e-06, + "loss": 0.6358006000518799, + "step": 3962 + }, + { + "epoch": 1.39647577092511, + "grad_norm": 1.9868299068304147, + "learning_rate": 4.5855736865763104e-06, + "loss": 0.6122751832008362, + "step": 3963 + }, + { + "epoch": 1.396828193832599, + "grad_norm": 1.6563994945684704, + "learning_rate": 4.580676135215495e-06, + "loss": 0.5563797354698181, + "step": 3964 + }, + { + "epoch": 1.397180616740088, + "grad_norm": 1.7043306637307543, + "learning_rate": 4.575780423546476e-06, + "loss": 0.5915960669517517, + "step": 3965 + }, + { + "epoch": 1.3975330396475771, + "grad_norm": 2.2793683384994363, + "learning_rate": 4.570886553231196e-06, + "loss": 0.5755159854888916, + "step": 3966 + }, + { + "epoch": 1.397885462555066, + "grad_norm": 1.713166792254198, + "learning_rate": 4.565994525930967e-06, + "loss": 0.7017625570297241, + "step": 3967 + }, + { + "epoch": 1.398237885462555, + "grad_norm": 1.901331269180062, + "learning_rate": 4.5611043433064875e-06, + "loss": 0.7623441815376282, + "step": 3968 + }, + { + "epoch": 1.3985903083700442, + "grad_norm": 1.772343766995311, + "learning_rate": 4.556216007017822e-06, + "loss": 0.5561864376068115, + "step": 3969 + }, + { + "epoch": 1.398942731277533, + "grad_norm": 1.7107369517825557, + "learning_rate": 4.5513295187243975e-06, + "loss": 0.516582727432251, + "step": 3970 + }, + { + "epoch": 1.399295154185022, + "grad_norm": 1.6087287767761917, + "learning_rate": 4.5464448800850366e-06, + "loss": 0.6324976682662964, + "step": 3971 + }, + { + "epoch": 1.399647577092511, + "grad_norm": 1.660721417089598, + "learning_rate": 4.541562092757918e-06, + "loss": 0.5926251411437988, + "step": 3972 + }, + { + "epoch": 1.4, + "grad_norm": 1.7443423550845751, + "learning_rate": 4.536681158400598e-06, + "loss": 0.5677082538604736, + "step": 3973 + }, + { + "epoch": 1.400352422907489, + "grad_norm": 1.791823926745788, + "learning_rate": 4.531802078669997e-06, + "loss": 0.5267887115478516, + "step": 3974 + }, + { + "epoch": 1.400704845814978, + "grad_norm": 2.3840846637544617, + "learning_rate": 4.526924855222411e-06, + "loss": 0.6361796855926514, + "step": 3975 + }, + { + "epoch": 1.401057268722467, + "grad_norm": 1.9992656380929168, + "learning_rate": 4.522049489713513e-06, + "loss": 0.5906916856765747, + "step": 3976 + }, + { + "epoch": 1.4014096916299559, + "grad_norm": 1.932616358578933, + "learning_rate": 4.517175983798334e-06, + "loss": 0.647320568561554, + "step": 3977 + }, + { + "epoch": 1.401762114537445, + "grad_norm": 1.7297380971513312, + "learning_rate": 4.512304339131271e-06, + "loss": 0.6129240989685059, + "step": 3978 + }, + { + "epoch": 1.402114537444934, + "grad_norm": 1.8820056515419912, + "learning_rate": 4.507434557366106e-06, + "loss": 0.5550417900085449, + "step": 3979 + }, + { + "epoch": 1.402466960352423, + "grad_norm": 3.410101687197828, + "learning_rate": 4.502566640155972e-06, + "loss": 0.5677829384803772, + "step": 3980 + }, + { + "epoch": 1.4028193832599118, + "grad_norm": 2.037826582552855, + "learning_rate": 4.497700589153379e-06, + "loss": 0.6627114415168762, + "step": 3981 + }, + { + "epoch": 1.4031718061674008, + "grad_norm": 2.0278559165710197, + "learning_rate": 4.492836406010197e-06, + "loss": 0.7225712537765503, + "step": 3982 + }, + { + "epoch": 1.40352422907489, + "grad_norm": 1.6877243893704514, + "learning_rate": 4.487974092377661e-06, + "loss": 0.5259708762168884, + "step": 3983 + }, + { + "epoch": 1.4038766519823789, + "grad_norm": 1.930838228409862, + "learning_rate": 4.4831136499063856e-06, + "loss": 0.5509500503540039, + "step": 3984 + }, + { + "epoch": 1.4042290748898678, + "grad_norm": 1.862328702111506, + "learning_rate": 4.478255080246338e-06, + "loss": 0.5436242818832397, + "step": 3985 + }, + { + "epoch": 1.4045814977973567, + "grad_norm": 1.9252586062101578, + "learning_rate": 4.473398385046839e-06, + "loss": 0.591008186340332, + "step": 3986 + }, + { + "epoch": 1.404933920704846, + "grad_norm": 1.8551590253300663, + "learning_rate": 4.4685435659565975e-06, + "loss": 0.7463438510894775, + "step": 3987 + }, + { + "epoch": 1.4052863436123348, + "grad_norm": 2.7212267236094445, + "learning_rate": 4.46369062462367e-06, + "loss": 0.5672414898872375, + "step": 3988 + }, + { + "epoch": 1.4056387665198238, + "grad_norm": 1.9023461618951703, + "learning_rate": 4.458839562695481e-06, + "loss": 0.6022762060165405, + "step": 3989 + }, + { + "epoch": 1.4059911894273127, + "grad_norm": 2.975414442801074, + "learning_rate": 4.453990381818811e-06, + "loss": 0.8312792181968689, + "step": 3990 + }, + { + "epoch": 1.4063436123348017, + "grad_norm": 1.5291152049255947, + "learning_rate": 4.4491430836398055e-06, + "loss": 0.475655198097229, + "step": 3991 + }, + { + "epoch": 1.4066960352422908, + "grad_norm": 2.205738960261052, + "learning_rate": 4.444297669803981e-06, + "loss": 0.6317172050476074, + "step": 3992 + }, + { + "epoch": 1.4070484581497797, + "grad_norm": 1.7590033801874774, + "learning_rate": 4.439454141956194e-06, + "loss": 0.5412036180496216, + "step": 3993 + }, + { + "epoch": 1.4074008810572687, + "grad_norm": 1.8534848369039538, + "learning_rate": 4.434612501740671e-06, + "loss": 0.6401170492172241, + "step": 3994 + }, + { + "epoch": 1.4077533039647578, + "grad_norm": 1.6819739888663638, + "learning_rate": 4.429772750801007e-06, + "loss": 0.6175628900527954, + "step": 3995 + }, + { + "epoch": 1.4081057268722468, + "grad_norm": 1.9863542351176011, + "learning_rate": 4.424934890780142e-06, + "loss": 0.6875946521759033, + "step": 3996 + }, + { + "epoch": 1.4084581497797357, + "grad_norm": 1.6357928529424866, + "learning_rate": 4.420098923320378e-06, + "loss": 0.6404017210006714, + "step": 3997 + }, + { + "epoch": 1.4088105726872246, + "grad_norm": 2.096371594852834, + "learning_rate": 4.415264850063378e-06, + "loss": 0.7569783329963684, + "step": 3998 + }, + { + "epoch": 1.4091629955947136, + "grad_norm": 1.9373448832520324, + "learning_rate": 4.410432672650154e-06, + "loss": 0.6125228404998779, + "step": 3999 + }, + { + "epoch": 1.4095154185022025, + "grad_norm": 1.8206271046178746, + "learning_rate": 4.405602392721091e-06, + "loss": 0.6187582015991211, + "step": 4000 + }, + { + "epoch": 1.4098678414096917, + "grad_norm": 1.6622405329305723, + "learning_rate": 4.400774011915907e-06, + "loss": 0.6086148023605347, + "step": 4001 + }, + { + "epoch": 1.4102202643171806, + "grad_norm": 1.4174012456939833, + "learning_rate": 4.3959475318736885e-06, + "loss": 0.4140232801437378, + "step": 4002 + }, + { + "epoch": 1.4105726872246696, + "grad_norm": 1.836512159334361, + "learning_rate": 4.391122954232883e-06, + "loss": 0.5065237879753113, + "step": 4003 + }, + { + "epoch": 1.4109251101321587, + "grad_norm": 1.458932644295331, + "learning_rate": 4.386300280631279e-06, + "loss": 0.4817734658718109, + "step": 4004 + }, + { + "epoch": 1.4112775330396476, + "grad_norm": 1.6662288245729417, + "learning_rate": 4.381479512706025e-06, + "loss": 0.6339706778526306, + "step": 4005 + }, + { + "epoch": 1.4116299559471366, + "grad_norm": 2.1459595089971653, + "learning_rate": 4.376660652093621e-06, + "loss": 0.6581720113754272, + "step": 4006 + }, + { + "epoch": 1.4119823788546255, + "grad_norm": 2.1052256395432503, + "learning_rate": 4.3718437004299174e-06, + "loss": 0.722156286239624, + "step": 4007 + }, + { + "epoch": 1.4123348017621145, + "grad_norm": 2.007137048045836, + "learning_rate": 4.36702865935013e-06, + "loss": 0.5262913703918457, + "step": 4008 + }, + { + "epoch": 1.4126872246696034, + "grad_norm": 1.6239575731802327, + "learning_rate": 4.362215530488805e-06, + "loss": 0.6242132186889648, + "step": 4009 + }, + { + "epoch": 1.4130396475770926, + "grad_norm": 1.6412038783326008, + "learning_rate": 4.35740431547985e-06, + "loss": 0.48776593804359436, + "step": 4010 + }, + { + "epoch": 1.4133920704845815, + "grad_norm": 1.4539922592281447, + "learning_rate": 4.352595015956528e-06, + "loss": 0.5528746843338013, + "step": 4011 + }, + { + "epoch": 1.4137444933920704, + "grad_norm": 1.881555645901769, + "learning_rate": 4.347787633551444e-06, + "loss": 0.6282942295074463, + "step": 4012 + }, + { + "epoch": 1.4140969162995596, + "grad_norm": 1.997464157113011, + "learning_rate": 4.342982169896555e-06, + "loss": 0.6113284826278687, + "step": 4013 + }, + { + "epoch": 1.4144493392070485, + "grad_norm": 1.696170493669202, + "learning_rate": 4.3381786266231685e-06, + "loss": 0.5756875872612, + "step": 4014 + }, + { + "epoch": 1.4148017621145375, + "grad_norm": 1.8012350757266906, + "learning_rate": 4.333377005361931e-06, + "loss": 0.6180154085159302, + "step": 4015 + }, + { + "epoch": 1.4151541850220264, + "grad_norm": 2.2454634074572146, + "learning_rate": 4.328577307742855e-06, + "loss": 0.5728827118873596, + "step": 4016 + }, + { + "epoch": 1.4155066079295153, + "grad_norm": 1.7928891595746113, + "learning_rate": 4.323779535395278e-06, + "loss": 0.5248062014579773, + "step": 4017 + }, + { + "epoch": 1.4158590308370045, + "grad_norm": 1.7454680737255013, + "learning_rate": 4.318983689947895e-06, + "loss": 0.5938228368759155, + "step": 4018 + }, + { + "epoch": 1.4162114537444934, + "grad_norm": 1.8931460456480809, + "learning_rate": 4.3141897730287544e-06, + "loss": 0.7085045576095581, + "step": 4019 + }, + { + "epoch": 1.4165638766519824, + "grad_norm": 2.566425134177144, + "learning_rate": 4.309397786265235e-06, + "loss": 0.599969744682312, + "step": 4020 + }, + { + "epoch": 1.4169162995594713, + "grad_norm": 2.186511304730039, + "learning_rate": 4.30460773128407e-06, + "loss": 0.5784738063812256, + "step": 4021 + }, + { + "epoch": 1.4172687224669605, + "grad_norm": 1.8802349185240168, + "learning_rate": 4.299819609711332e-06, + "loss": 0.6492793560028076, + "step": 4022 + }, + { + "epoch": 1.4176211453744494, + "grad_norm": 1.6886854891683005, + "learning_rate": 4.2950334231724375e-06, + "loss": 0.6690749526023865, + "step": 4023 + }, + { + "epoch": 1.4179735682819383, + "grad_norm": 1.8482135160791267, + "learning_rate": 4.290249173292158e-06, + "loss": 0.5919139981269836, + "step": 4024 + }, + { + "epoch": 1.4183259911894273, + "grad_norm": 1.6202611135629348, + "learning_rate": 4.285466861694583e-06, + "loss": 0.5661630630493164, + "step": 4025 + }, + { + "epoch": 1.4186784140969162, + "grad_norm": 1.7328062744712673, + "learning_rate": 4.280686490003169e-06, + "loss": 0.547730565071106, + "step": 4026 + }, + { + "epoch": 1.4190308370044054, + "grad_norm": 1.7270546788274348, + "learning_rate": 4.2759080598406985e-06, + "loss": 0.6150445938110352, + "step": 4027 + }, + { + "epoch": 1.4193832599118943, + "grad_norm": 2.048539568947664, + "learning_rate": 4.271131572829303e-06, + "loss": 0.6522917747497559, + "step": 4028 + }, + { + "epoch": 1.4197356828193832, + "grad_norm": 1.952118534937186, + "learning_rate": 4.266357030590449e-06, + "loss": 0.8456230163574219, + "step": 4029 + }, + { + "epoch": 1.4200881057268724, + "grad_norm": 1.810792149813479, + "learning_rate": 4.261584434744945e-06, + "loss": 0.6059526801109314, + "step": 4030 + }, + { + "epoch": 1.4204405286343613, + "grad_norm": 1.8213808222910857, + "learning_rate": 4.256813786912937e-06, + "loss": 0.6289907693862915, + "step": 4031 + }, + { + "epoch": 1.4207929515418503, + "grad_norm": 1.5510911353998291, + "learning_rate": 4.252045088713919e-06, + "loss": 0.48954638838768005, + "step": 4032 + }, + { + "epoch": 1.4211453744493392, + "grad_norm": 2.020061779490103, + "learning_rate": 4.2472783417667055e-06, + "loss": 0.6999461054801941, + "step": 4033 + }, + { + "epoch": 1.4214977973568281, + "grad_norm": 1.9629261898681878, + "learning_rate": 4.242513547689466e-06, + "loss": 0.5610899925231934, + "step": 4034 + }, + { + "epoch": 1.421850220264317, + "grad_norm": 1.8415242379631616, + "learning_rate": 4.237750708099697e-06, + "loss": 0.6240172386169434, + "step": 4035 + }, + { + "epoch": 1.4222026431718062, + "grad_norm": 1.887889822972652, + "learning_rate": 4.2329898246142356e-06, + "loss": 0.6368240118026733, + "step": 4036 + }, + { + "epoch": 1.4225550660792952, + "grad_norm": 2.0839652521207483, + "learning_rate": 4.228230898849253e-06, + "loss": 0.6242600679397583, + "step": 4037 + }, + { + "epoch": 1.422907488986784, + "grad_norm": 1.7622749957844728, + "learning_rate": 4.223473932420255e-06, + "loss": 0.6446138620376587, + "step": 4038 + }, + { + "epoch": 1.4232599118942733, + "grad_norm": 1.8800444061446486, + "learning_rate": 4.218718926942081e-06, + "loss": 0.7108229398727417, + "step": 4039 + }, + { + "epoch": 1.4236123348017622, + "grad_norm": 1.7917659431298882, + "learning_rate": 4.213965884028919e-06, + "loss": 0.5279660820960999, + "step": 4040 + }, + { + "epoch": 1.4239647577092511, + "grad_norm": 1.7747691200912903, + "learning_rate": 4.209214805294264e-06, + "loss": 0.6422853469848633, + "step": 4041 + }, + { + "epoch": 1.42431718061674, + "grad_norm": 1.848339978168105, + "learning_rate": 4.2044656923509704e-06, + "loss": 0.6355602741241455, + "step": 4042 + }, + { + "epoch": 1.424669603524229, + "grad_norm": 1.7787421175687093, + "learning_rate": 4.19971854681121e-06, + "loss": 0.5351370573043823, + "step": 4043 + }, + { + "epoch": 1.425022026431718, + "grad_norm": 2.0300248809256987, + "learning_rate": 4.194973370286492e-06, + "loss": 0.5573978424072266, + "step": 4044 + }, + { + "epoch": 1.425374449339207, + "grad_norm": 1.9433750628346875, + "learning_rate": 4.1902301643876555e-06, + "loss": 0.5865412950515747, + "step": 4045 + }, + { + "epoch": 1.425726872246696, + "grad_norm": 2.102324249123369, + "learning_rate": 4.185488930724868e-06, + "loss": 0.6231919527053833, + "step": 4046 + }, + { + "epoch": 1.426079295154185, + "grad_norm": 1.7135783989067233, + "learning_rate": 4.180749670907638e-06, + "loss": 0.48964112997055054, + "step": 4047 + }, + { + "epoch": 1.4264317180616741, + "grad_norm": 2.0973459527664686, + "learning_rate": 4.176012386544796e-06, + "loss": 0.6299121975898743, + "step": 4048 + }, + { + "epoch": 1.426784140969163, + "grad_norm": 1.7239115182277114, + "learning_rate": 4.171277079244492e-06, + "loss": 0.5612789392471313, + "step": 4049 + }, + { + "epoch": 1.427136563876652, + "grad_norm": 1.7396324571675468, + "learning_rate": 4.166543750614227e-06, + "loss": 0.4315321147441864, + "step": 4050 + }, + { + "epoch": 1.427488986784141, + "grad_norm": 2.0031203112343094, + "learning_rate": 4.1618124022608146e-06, + "loss": 0.6300851702690125, + "step": 4051 + }, + { + "epoch": 1.4278414096916299, + "grad_norm": 1.7808675185736187, + "learning_rate": 4.1570830357904e-06, + "loss": 0.6258795261383057, + "step": 4052 + }, + { + "epoch": 1.4281938325991188, + "grad_norm": 1.9069027085637078, + "learning_rate": 4.152355652808457e-06, + "loss": 0.7364479303359985, + "step": 4053 + }, + { + "epoch": 1.428546255506608, + "grad_norm": 1.8474323145969993, + "learning_rate": 4.147630254919781e-06, + "loss": 0.44845038652420044, + "step": 4054 + }, + { + "epoch": 1.428898678414097, + "grad_norm": 1.6823890398766386, + "learning_rate": 4.142906843728504e-06, + "loss": 0.516815185546875, + "step": 4055 + }, + { + "epoch": 1.4292511013215858, + "grad_norm": 1.6276914964492604, + "learning_rate": 4.138185420838079e-06, + "loss": 0.6296960711479187, + "step": 4056 + }, + { + "epoch": 1.429603524229075, + "grad_norm": 1.728227730408027, + "learning_rate": 4.133465987851269e-06, + "loss": 0.5709103345870972, + "step": 4057 + }, + { + "epoch": 1.429955947136564, + "grad_norm": 1.7709951139170081, + "learning_rate": 4.128748546370186e-06, + "loss": 0.5672547817230225, + "step": 4058 + }, + { + "epoch": 1.4303083700440529, + "grad_norm": 1.8161591736426752, + "learning_rate": 4.124033097996252e-06, + "loss": 0.5927014350891113, + "step": 4059 + }, + { + "epoch": 1.4306607929515418, + "grad_norm": 1.75056683772296, + "learning_rate": 4.119319644330214e-06, + "loss": 0.7021238803863525, + "step": 4060 + }, + { + "epoch": 1.4310132158590307, + "grad_norm": 1.7949933259038664, + "learning_rate": 4.114608186972143e-06, + "loss": 0.5940784215927124, + "step": 4061 + }, + { + "epoch": 1.43136563876652, + "grad_norm": 1.7958424742287702, + "learning_rate": 4.109898727521429e-06, + "loss": 0.46511122584342957, + "step": 4062 + }, + { + "epoch": 1.4317180616740088, + "grad_norm": 1.7489789285307085, + "learning_rate": 4.105191267576797e-06, + "loss": 0.4710976481437683, + "step": 4063 + }, + { + "epoch": 1.4320704845814978, + "grad_norm": 1.650142742870973, + "learning_rate": 4.100485808736273e-06, + "loss": 0.5947977900505066, + "step": 4064 + }, + { + "epoch": 1.4324229074889867, + "grad_norm": 1.7620222249444284, + "learning_rate": 4.095782352597214e-06, + "loss": 0.6312115788459778, + "step": 4065 + }, + { + "epoch": 1.4327753303964759, + "grad_norm": 1.7689711305484843, + "learning_rate": 4.091080900756303e-06, + "loss": 0.5709977149963379, + "step": 4066 + }, + { + "epoch": 1.4331277533039648, + "grad_norm": 1.8903042666510779, + "learning_rate": 4.086381454809535e-06, + "loss": 0.6183716058731079, + "step": 4067 + }, + { + "epoch": 1.4334801762114537, + "grad_norm": 1.8677159370638265, + "learning_rate": 4.081684016352223e-06, + "loss": 0.5139745473861694, + "step": 4068 + }, + { + "epoch": 1.4338325991189427, + "grad_norm": 1.9141879794373917, + "learning_rate": 4.076988586979004e-06, + "loss": 0.6611173152923584, + "step": 4069 + }, + { + "epoch": 1.4341850220264316, + "grad_norm": 1.7276457807578136, + "learning_rate": 4.072295168283824e-06, + "loss": 0.616943359375, + "step": 4070 + }, + { + "epoch": 1.4345374449339208, + "grad_norm": 2.331740237042665, + "learning_rate": 4.067603761859965e-06, + "loss": 0.5388625264167786, + "step": 4071 + }, + { + "epoch": 1.4348898678414097, + "grad_norm": 1.9571975377572324, + "learning_rate": 4.062914369300002e-06, + "loss": 0.5523884892463684, + "step": 4072 + }, + { + "epoch": 1.4352422907488986, + "grad_norm": 1.8860165198416616, + "learning_rate": 4.058226992195838e-06, + "loss": 0.5610285997390747, + "step": 4073 + }, + { + "epoch": 1.4355947136563878, + "grad_norm": 1.8522832262316333, + "learning_rate": 4.0535416321387e-06, + "loss": 0.583917498588562, + "step": 4074 + }, + { + "epoch": 1.4359471365638767, + "grad_norm": 1.677482186323321, + "learning_rate": 4.048858290719115e-06, + "loss": 0.6025276184082031, + "step": 4075 + }, + { + "epoch": 1.4362995594713657, + "grad_norm": 1.8037188167117204, + "learning_rate": 4.044176969526936e-06, + "loss": 0.5643888711929321, + "step": 4076 + }, + { + "epoch": 1.4366519823788546, + "grad_norm": 1.709713655992042, + "learning_rate": 4.0394976701513235e-06, + "loss": 0.550167977809906, + "step": 4077 + }, + { + "epoch": 1.4370044052863435, + "grad_norm": 2.1319034629476747, + "learning_rate": 4.034820394180749e-06, + "loss": 0.6182876825332642, + "step": 4078 + }, + { + "epoch": 1.4373568281938325, + "grad_norm": 2.018408244379198, + "learning_rate": 4.030145143203016e-06, + "loss": 0.5197434425354004, + "step": 4079 + }, + { + "epoch": 1.4377092511013216, + "grad_norm": 2.037308833831004, + "learning_rate": 4.025471918805214e-06, + "loss": 0.5351034998893738, + "step": 4080 + }, + { + "epoch": 1.4380616740088106, + "grad_norm": 1.988655670021041, + "learning_rate": 4.020800722573758e-06, + "loss": 0.5576729774475098, + "step": 4081 + }, + { + "epoch": 1.4384140969162995, + "grad_norm": 2.03830396836609, + "learning_rate": 4.016131556094381e-06, + "loss": 0.5557611584663391, + "step": 4082 + }, + { + "epoch": 1.4387665198237887, + "grad_norm": 1.6841558782049018, + "learning_rate": 4.011464420952115e-06, + "loss": 0.5300010442733765, + "step": 4083 + }, + { + "epoch": 1.4391189427312776, + "grad_norm": 2.5196291624747387, + "learning_rate": 4.0067993187313085e-06, + "loss": 0.5254991054534912, + "step": 4084 + }, + { + "epoch": 1.4394713656387665, + "grad_norm": 1.9569129587138865, + "learning_rate": 4.002136251015617e-06, + "loss": 0.5044848322868347, + "step": 4085 + }, + { + "epoch": 1.4398237885462555, + "grad_norm": 1.7587820286029368, + "learning_rate": 3.997475219388005e-06, + "loss": 0.6422302722930908, + "step": 4086 + }, + { + "epoch": 1.4401762114537444, + "grad_norm": 1.7785161433093049, + "learning_rate": 3.992816225430758e-06, + "loss": 0.5502497553825378, + "step": 4087 + }, + { + "epoch": 1.4405286343612334, + "grad_norm": 1.9272648866171629, + "learning_rate": 3.988159270725448e-06, + "loss": 0.7479537129402161, + "step": 4088 + }, + { + "epoch": 1.4408810572687225, + "grad_norm": 1.8882665464741835, + "learning_rate": 3.983504356852967e-06, + "loss": 0.5418091416358948, + "step": 4089 + }, + { + "epoch": 1.4412334801762114, + "grad_norm": 2.1909054908738805, + "learning_rate": 3.978851485393519e-06, + "loss": 0.5262568593025208, + "step": 4090 + }, + { + "epoch": 1.4415859030837004, + "grad_norm": 1.7855475608149034, + "learning_rate": 3.974200657926607e-06, + "loss": 0.5419692397117615, + "step": 4091 + }, + { + "epoch": 1.4419383259911895, + "grad_norm": 1.84908442821801, + "learning_rate": 3.9695518760310425e-06, + "loss": 0.5202164649963379, + "step": 4092 + }, + { + "epoch": 1.4422907488986785, + "grad_norm": 1.6256093479781946, + "learning_rate": 3.96490514128494e-06, + "loss": 0.7232608795166016, + "step": 4093 + }, + { + "epoch": 1.4426431718061674, + "grad_norm": 3.2107784732452473, + "learning_rate": 3.960260455265721e-06, + "loss": 0.5899156332015991, + "step": 4094 + }, + { + "epoch": 1.4429955947136563, + "grad_norm": 1.9995831956411032, + "learning_rate": 3.95561781955012e-06, + "loss": 0.629068911075592, + "step": 4095 + }, + { + "epoch": 1.4433480176211453, + "grad_norm": 1.9520751138167456, + "learning_rate": 3.950977235714154e-06, + "loss": 0.5584920644760132, + "step": 4096 + }, + { + "epoch": 1.4437004405286344, + "grad_norm": 1.7280125643736322, + "learning_rate": 3.9463387053331685e-06, + "loss": 0.713936984539032, + "step": 4097 + }, + { + "epoch": 1.4440528634361234, + "grad_norm": 2.7226452019662357, + "learning_rate": 3.9417022299817944e-06, + "loss": 0.7157076001167297, + "step": 4098 + }, + { + "epoch": 1.4444052863436123, + "grad_norm": 1.940369638230087, + "learning_rate": 3.937067811233972e-06, + "loss": 0.6540844440460205, + "step": 4099 + }, + { + "epoch": 1.4447577092511013, + "grad_norm": 1.6342043838390767, + "learning_rate": 3.9324354506629425e-06, + "loss": 0.5350022315979004, + "step": 4100 + }, + { + "epoch": 1.4451101321585904, + "grad_norm": 1.9186113150470587, + "learning_rate": 3.9278051498412475e-06, + "loss": 0.6852695941925049, + "step": 4101 + }, + { + "epoch": 1.4454625550660793, + "grad_norm": 1.8060312138879744, + "learning_rate": 3.923176910340728e-06, + "loss": 0.6059536337852478, + "step": 4102 + }, + { + "epoch": 1.4458149779735683, + "grad_norm": 1.6721278909458728, + "learning_rate": 3.918550733732536e-06, + "loss": 0.5787979364395142, + "step": 4103 + }, + { + "epoch": 1.4461674008810572, + "grad_norm": 1.8059605647431092, + "learning_rate": 3.9139266215871e-06, + "loss": 0.6068835258483887, + "step": 4104 + }, + { + "epoch": 1.4465198237885462, + "grad_norm": 1.7804694224195132, + "learning_rate": 3.909304575474175e-06, + "loss": 0.5123663544654846, + "step": 4105 + }, + { + "epoch": 1.4468722466960353, + "grad_norm": 1.832785857954117, + "learning_rate": 3.9046845969627975e-06, + "loss": 0.6285420656204224, + "step": 4106 + }, + { + "epoch": 1.4472246696035242, + "grad_norm": 1.8029701329975896, + "learning_rate": 3.9000666876213056e-06, + "loss": 0.6186035871505737, + "step": 4107 + }, + { + "epoch": 1.4475770925110132, + "grad_norm": 2.8121411727628174, + "learning_rate": 3.895450849017336e-06, + "loss": 0.6222661733627319, + "step": 4108 + }, + { + "epoch": 1.4479295154185021, + "grad_norm": 1.7965214044078308, + "learning_rate": 3.890837082717822e-06, + "loss": 0.5889515280723572, + "step": 4109 + }, + { + "epoch": 1.4482819383259913, + "grad_norm": 1.8839124618745182, + "learning_rate": 3.8862253902889925e-06, + "loss": 0.6160309314727783, + "step": 4110 + }, + { + "epoch": 1.4486343612334802, + "grad_norm": 1.7651875927016676, + "learning_rate": 3.881615773296381e-06, + "loss": 0.48093074560165405, + "step": 4111 + }, + { + "epoch": 1.4489867841409692, + "grad_norm": 1.8283039880345147, + "learning_rate": 3.877008233304796e-06, + "loss": 0.5851131677627563, + "step": 4112 + }, + { + "epoch": 1.449339207048458, + "grad_norm": 1.7366010221761805, + "learning_rate": 3.872402771878365e-06, + "loss": 0.5322093963623047, + "step": 4113 + }, + { + "epoch": 1.449691629955947, + "grad_norm": 1.7342840660368584, + "learning_rate": 3.8677993905804956e-06, + "loss": 0.652804970741272, + "step": 4114 + }, + { + "epoch": 1.4500440528634362, + "grad_norm": 1.9583669696557284, + "learning_rate": 3.863198090973891e-06, + "loss": 0.5494996309280396, + "step": 4115 + }, + { + "epoch": 1.4503964757709251, + "grad_norm": 1.910811405312081, + "learning_rate": 3.8585988746205505e-06, + "loss": 0.5641331672668457, + "step": 4116 + }, + { + "epoch": 1.450748898678414, + "grad_norm": 1.7616537450177998, + "learning_rate": 3.854001743081764e-06, + "loss": 0.5415998697280884, + "step": 4117 + }, + { + "epoch": 1.4511013215859032, + "grad_norm": 1.599490372210091, + "learning_rate": 3.849406697918113e-06, + "loss": 0.4399813711643219, + "step": 4118 + }, + { + "epoch": 1.4514537444933922, + "grad_norm": 2.0642862733318115, + "learning_rate": 3.84481374068948e-06, + "loss": 0.6228655576705933, + "step": 4119 + }, + { + "epoch": 1.451806167400881, + "grad_norm": 1.650547077673145, + "learning_rate": 3.8402228729550195e-06, + "loss": 0.5575108528137207, + "step": 4120 + }, + { + "epoch": 1.45215859030837, + "grad_norm": 2.4780057667058704, + "learning_rate": 3.835634096273197e-06, + "loss": 0.5705434679985046, + "step": 4121 + }, + { + "epoch": 1.452511013215859, + "grad_norm": 2.1620556917486184, + "learning_rate": 3.831047412201758e-06, + "loss": 0.5649456977844238, + "step": 4122 + }, + { + "epoch": 1.452863436123348, + "grad_norm": 1.9734169166383557, + "learning_rate": 3.826462822297736e-06, + "loss": 0.5656554698944092, + "step": 4123 + }, + { + "epoch": 1.453215859030837, + "grad_norm": 1.8883507101257415, + "learning_rate": 3.82188032811746e-06, + "loss": 0.6565591096878052, + "step": 4124 + }, + { + "epoch": 1.453568281938326, + "grad_norm": 1.9823635297408013, + "learning_rate": 3.817299931216537e-06, + "loss": 0.6553423404693604, + "step": 4125 + }, + { + "epoch": 1.453920704845815, + "grad_norm": 1.8362785094722764, + "learning_rate": 3.812721633149883e-06, + "loss": 0.5401671528816223, + "step": 4126 + }, + { + "epoch": 1.454273127753304, + "grad_norm": 2.008049720412482, + "learning_rate": 3.808145435471674e-06, + "loss": 0.7275381088256836, + "step": 4127 + }, + { + "epoch": 1.454625550660793, + "grad_norm": 1.827455905179675, + "learning_rate": 3.80357133973539e-06, + "loss": 0.6384310722351074, + "step": 4128 + }, + { + "epoch": 1.454977973568282, + "grad_norm": 1.986935058055083, + "learning_rate": 3.7989993474937993e-06, + "loss": 0.7783250212669373, + "step": 4129 + }, + { + "epoch": 1.455330396475771, + "grad_norm": 2.1923612655628624, + "learning_rate": 3.7944294602989473e-06, + "loss": 0.752954363822937, + "step": 4130 + }, + { + "epoch": 1.4556828193832598, + "grad_norm": 1.801491937261316, + "learning_rate": 3.789861679702169e-06, + "loss": 0.6099411249160767, + "step": 4131 + }, + { + "epoch": 1.4560352422907488, + "grad_norm": 2.12230143233965, + "learning_rate": 3.7852960072540845e-06, + "loss": 0.6608012914657593, + "step": 4132 + }, + { + "epoch": 1.456387665198238, + "grad_norm": 1.836228759881875, + "learning_rate": 3.7807324445045924e-06, + "loss": 0.5119853615760803, + "step": 4133 + }, + { + "epoch": 1.4567400881057269, + "grad_norm": 2.036719543857632, + "learning_rate": 3.7761709930028923e-06, + "loss": 0.6353520750999451, + "step": 4134 + }, + { + "epoch": 1.4570925110132158, + "grad_norm": 1.9234147822597618, + "learning_rate": 3.7716116542974434e-06, + "loss": 0.6427614688873291, + "step": 4135 + }, + { + "epoch": 1.457444933920705, + "grad_norm": 2.34139645382815, + "learning_rate": 3.7670544299360003e-06, + "loss": 0.6205203533172607, + "step": 4136 + }, + { + "epoch": 1.457797356828194, + "grad_norm": 1.940401751978381, + "learning_rate": 3.7624993214656046e-06, + "loss": 0.5957762002944946, + "step": 4137 + }, + { + "epoch": 1.4581497797356828, + "grad_norm": 1.8842452122457418, + "learning_rate": 3.7579463304325714e-06, + "loss": 0.6698043346405029, + "step": 4138 + }, + { + "epoch": 1.4585022026431718, + "grad_norm": 1.825534553754035, + "learning_rate": 3.7533954583824982e-06, + "loss": 0.5947796106338501, + "step": 4139 + }, + { + "epoch": 1.4588546255506607, + "grad_norm": 1.7817965501913557, + "learning_rate": 3.7488467068602664e-06, + "loss": 0.5905463695526123, + "step": 4140 + }, + { + "epoch": 1.4592070484581499, + "grad_norm": 1.8530726474927524, + "learning_rate": 3.74430007741003e-06, + "loss": 0.6218722462654114, + "step": 4141 + }, + { + "epoch": 1.4595594713656388, + "grad_norm": 1.9872212615104103, + "learning_rate": 3.739755571575241e-06, + "loss": 0.6124013066291809, + "step": 4142 + }, + { + "epoch": 1.4599118942731277, + "grad_norm": 1.8897226451904012, + "learning_rate": 3.7352131908986046e-06, + "loss": 0.5816842317581177, + "step": 4143 + }, + { + "epoch": 1.4602643171806167, + "grad_norm": 1.780742815029414, + "learning_rate": 3.7306729369221197e-06, + "loss": 0.5225531458854675, + "step": 4144 + }, + { + "epoch": 1.4606167400881058, + "grad_norm": 1.5899946748394236, + "learning_rate": 3.7261348111870663e-06, + "loss": 0.4536696672439575, + "step": 4145 + }, + { + "epoch": 1.4609691629955948, + "grad_norm": 1.6530094281559282, + "learning_rate": 3.7215988152339968e-06, + "loss": 0.5777832269668579, + "step": 4146 + }, + { + "epoch": 1.4613215859030837, + "grad_norm": 2.0042576738233993, + "learning_rate": 3.717064950602737e-06, + "loss": 0.5964622497558594, + "step": 4147 + }, + { + "epoch": 1.4616740088105726, + "grad_norm": 1.634683701176406, + "learning_rate": 3.7125332188323937e-06, + "loss": 0.47224390506744385, + "step": 4148 + }, + { + "epoch": 1.4620264317180616, + "grad_norm": 2.0219703130043474, + "learning_rate": 3.708003621461347e-06, + "loss": 0.5989271402359009, + "step": 4149 + }, + { + "epoch": 1.4623788546255507, + "grad_norm": 1.7865027495889427, + "learning_rate": 3.7034761600272627e-06, + "loss": 0.6171919107437134, + "step": 4150 + }, + { + "epoch": 1.4627312775330397, + "grad_norm": 1.742831115289917, + "learning_rate": 3.6989508360670647e-06, + "loss": 0.7064418792724609, + "step": 4151 + }, + { + "epoch": 1.4630837004405286, + "grad_norm": 2.236539087690149, + "learning_rate": 3.6944276511169577e-06, + "loss": 0.6055941581726074, + "step": 4152 + }, + { + "epoch": 1.4634361233480178, + "grad_norm": 1.7433765587507288, + "learning_rate": 3.689906606712429e-06, + "loss": 0.4550645351409912, + "step": 4153 + }, + { + "epoch": 1.4637885462555067, + "grad_norm": 1.929810725161399, + "learning_rate": 3.68538770438823e-06, + "loss": 0.5958502292633057, + "step": 4154 + }, + { + "epoch": 1.4641409691629956, + "grad_norm": 2.057185852502653, + "learning_rate": 3.680870945678389e-06, + "loss": 0.5574297904968262, + "step": 4155 + }, + { + "epoch": 1.4644933920704846, + "grad_norm": 1.443537567568116, + "learning_rate": 3.676356332116202e-06, + "loss": 0.46494683623313904, + "step": 4156 + }, + { + "epoch": 1.4648458149779735, + "grad_norm": 1.7341220293452018, + "learning_rate": 3.671843865234238e-06, + "loss": 0.5549977421760559, + "step": 4157 + }, + { + "epoch": 1.4651982378854624, + "grad_norm": 1.7585158502615206, + "learning_rate": 3.6673335465643488e-06, + "loss": 0.5620779395103455, + "step": 4158 + }, + { + "epoch": 1.4655506607929516, + "grad_norm": 1.845448976603358, + "learning_rate": 3.662825377637638e-06, + "loss": 0.5945389270782471, + "step": 4159 + }, + { + "epoch": 1.4659030837004405, + "grad_norm": 1.9218401758762256, + "learning_rate": 3.6583193599844867e-06, + "loss": 0.6923668384552002, + "step": 4160 + }, + { + "epoch": 1.4662555066079295, + "grad_norm": 2.16834694145402, + "learning_rate": 3.653815495134557e-06, + "loss": 0.6848515868186951, + "step": 4161 + }, + { + "epoch": 1.4666079295154186, + "grad_norm": 1.922504159473904, + "learning_rate": 3.649313784616765e-06, + "loss": 0.640125036239624, + "step": 4162 + }, + { + "epoch": 1.4669603524229076, + "grad_norm": 1.816415927402479, + "learning_rate": 3.6448142299593026e-06, + "loss": 0.6879653930664062, + "step": 4163 + }, + { + "epoch": 1.4673127753303965, + "grad_norm": 1.9534619637738762, + "learning_rate": 3.6403168326896286e-06, + "loss": 0.6757794618606567, + "step": 4164 + }, + { + "epoch": 1.4676651982378854, + "grad_norm": 1.7476054801499117, + "learning_rate": 3.6358215943344664e-06, + "loss": 0.6405826807022095, + "step": 4165 + }, + { + "epoch": 1.4680176211453744, + "grad_norm": 2.1448885390527064, + "learning_rate": 3.6313285164198187e-06, + "loss": 0.692136287689209, + "step": 4166 + }, + { + "epoch": 1.4683700440528633, + "grad_norm": 1.8449983835752888, + "learning_rate": 3.626837600470935e-06, + "loss": 0.6305568218231201, + "step": 4167 + }, + { + "epoch": 1.4687224669603525, + "grad_norm": 2.1026866185280264, + "learning_rate": 3.6223488480123427e-06, + "loss": 0.7040449380874634, + "step": 4168 + }, + { + "epoch": 1.4690748898678414, + "grad_norm": 1.5463095765444386, + "learning_rate": 3.6178622605678403e-06, + "loss": 0.5064427852630615, + "step": 4169 + }, + { + "epoch": 1.4694273127753303, + "grad_norm": 1.6549157120829303, + "learning_rate": 3.6133778396604813e-06, + "loss": 0.46597155928611755, + "step": 4170 + }, + { + "epoch": 1.4697797356828195, + "grad_norm": 1.9774014610728103, + "learning_rate": 3.6088955868125864e-06, + "loss": 0.5764908790588379, + "step": 4171 + }, + { + "epoch": 1.4701321585903084, + "grad_norm": 1.9347279402338318, + "learning_rate": 3.6044155035457405e-06, + "loss": 0.5808656215667725, + "step": 4172 + }, + { + "epoch": 1.4704845814977974, + "grad_norm": 2.0116811142135202, + "learning_rate": 3.599937591380791e-06, + "loss": 0.5439287424087524, + "step": 4173 + }, + { + "epoch": 1.4708370044052863, + "grad_norm": 1.5674669602592264, + "learning_rate": 3.595461851837857e-06, + "loss": 0.5943965911865234, + "step": 4174 + }, + { + "epoch": 1.4711894273127752, + "grad_norm": 1.8847509954427386, + "learning_rate": 3.590988286436302e-06, + "loss": 0.631833016872406, + "step": 4175 + }, + { + "epoch": 1.4715418502202642, + "grad_norm": 1.9232774716266652, + "learning_rate": 3.5865168966947718e-06, + "loss": 0.514176070690155, + "step": 4176 + }, + { + "epoch": 1.4718942731277533, + "grad_norm": 1.7211351925277203, + "learning_rate": 3.582047684131159e-06, + "loss": 0.584772527217865, + "step": 4177 + }, + { + "epoch": 1.4722466960352423, + "grad_norm": 1.7726013207799318, + "learning_rate": 3.5775806502626244e-06, + "loss": 0.5085974931716919, + "step": 4178 + }, + { + "epoch": 1.4725991189427312, + "grad_norm": 2.1422494719737464, + "learning_rate": 3.573115796605584e-06, + "loss": 0.62562495470047, + "step": 4179 + }, + { + "epoch": 1.4729515418502204, + "grad_norm": 1.9507950967896, + "learning_rate": 3.5686531246757206e-06, + "loss": 0.5815824270248413, + "step": 4180 + }, + { + "epoch": 1.4733039647577093, + "grad_norm": 1.8811159721586839, + "learning_rate": 3.5641926359879663e-06, + "loss": 0.6639705300331116, + "step": 4181 + }, + { + "epoch": 1.4736563876651982, + "grad_norm": 1.8978737039698366, + "learning_rate": 3.5597343320565293e-06, + "loss": 0.6265684962272644, + "step": 4182 + }, + { + "epoch": 1.4740088105726872, + "grad_norm": 1.877895350809495, + "learning_rate": 3.5552782143948504e-06, + "loss": 0.6113626956939697, + "step": 4183 + }, + { + "epoch": 1.4743612334801761, + "grad_norm": 1.8492974346484832, + "learning_rate": 3.550824284515655e-06, + "loss": 0.5247244834899902, + "step": 4184 + }, + { + "epoch": 1.4747136563876653, + "grad_norm": 1.871370335191458, + "learning_rate": 3.5463725439309083e-06, + "loss": 0.5524521470069885, + "step": 4185 + }, + { + "epoch": 1.4750660792951542, + "grad_norm": 1.9955136913094453, + "learning_rate": 3.5419229941518384e-06, + "loss": 0.5462251305580139, + "step": 4186 + }, + { + "epoch": 1.4754185022026431, + "grad_norm": 1.6609337480864497, + "learning_rate": 3.5374756366889297e-06, + "loss": 0.6500638723373413, + "step": 4187 + }, + { + "epoch": 1.475770925110132, + "grad_norm": 2.0744035602538586, + "learning_rate": 3.5330304730519216e-06, + "loss": 0.4445904791355133, + "step": 4188 + }, + { + "epoch": 1.4761233480176212, + "grad_norm": 1.7788816335434026, + "learning_rate": 3.5285875047498075e-06, + "loss": 0.6068017482757568, + "step": 4189 + }, + { + "epoch": 1.4764757709251102, + "grad_norm": 1.683605461123042, + "learning_rate": 3.5241467332908384e-06, + "loss": 0.5577334761619568, + "step": 4190 + }, + { + "epoch": 1.4768281938325991, + "grad_norm": 1.9605228698426533, + "learning_rate": 3.5197081601825135e-06, + "loss": 0.6596503257751465, + "step": 4191 + }, + { + "epoch": 1.477180616740088, + "grad_norm": 1.9912955738456768, + "learning_rate": 3.5152717869315965e-06, + "loss": 0.6260303258895874, + "step": 4192 + }, + { + "epoch": 1.477533039647577, + "grad_norm": 2.010278739994815, + "learning_rate": 3.510837615044097e-06, + "loss": 0.5467355251312256, + "step": 4193 + }, + { + "epoch": 1.4778854625550661, + "grad_norm": 2.516516212561754, + "learning_rate": 3.506405646025276e-06, + "loss": 0.5306693911552429, + "step": 4194 + }, + { + "epoch": 1.478237885462555, + "grad_norm": 1.7497505820795882, + "learning_rate": 3.5019758813796513e-06, + "loss": 0.6130725741386414, + "step": 4195 + }, + { + "epoch": 1.478590308370044, + "grad_norm": 2.2199157894914143, + "learning_rate": 3.4975483226109874e-06, + "loss": 0.6656272411346436, + "step": 4196 + }, + { + "epoch": 1.4789427312775332, + "grad_norm": 1.8654097488268417, + "learning_rate": 3.4931229712223047e-06, + "loss": 0.6018439531326294, + "step": 4197 + }, + { + "epoch": 1.479295154185022, + "grad_norm": 2.0982915779378137, + "learning_rate": 3.488699828715871e-06, + "loss": 0.6635257005691528, + "step": 4198 + }, + { + "epoch": 1.479647577092511, + "grad_norm": 1.8412514150393455, + "learning_rate": 3.4842788965932038e-06, + "loss": 0.5760075449943542, + "step": 4199 + }, + { + "epoch": 1.48, + "grad_norm": 1.7009881043074442, + "learning_rate": 3.4798601763550778e-06, + "loss": 0.6951982975006104, + "step": 4200 + }, + { + "epoch": 1.480352422907489, + "grad_norm": 1.880170776358824, + "learning_rate": 3.475443669501508e-06, + "loss": 0.6574405431747437, + "step": 4201 + }, + { + "epoch": 1.4807048458149779, + "grad_norm": 1.8075997179509888, + "learning_rate": 3.4710293775317593e-06, + "loss": 0.5912263989448547, + "step": 4202 + }, + { + "epoch": 1.481057268722467, + "grad_norm": 1.7703606198961421, + "learning_rate": 3.4666173019443485e-06, + "loss": 0.5169661045074463, + "step": 4203 + }, + { + "epoch": 1.481409691629956, + "grad_norm": 1.6923587460137135, + "learning_rate": 3.4622074442370345e-06, + "loss": 0.5707660913467407, + "step": 4204 + }, + { + "epoch": 1.4817621145374449, + "grad_norm": 1.7929036165873167, + "learning_rate": 3.4577998059068354e-06, + "loss": 0.5856584310531616, + "step": 4205 + }, + { + "epoch": 1.482114537444934, + "grad_norm": 2.0144464412272636, + "learning_rate": 3.4533943884499975e-06, + "loss": 0.6306010484695435, + "step": 4206 + }, + { + "epoch": 1.482466960352423, + "grad_norm": 1.9708292107625427, + "learning_rate": 3.4489911933620245e-06, + "loss": 0.6177140474319458, + "step": 4207 + }, + { + "epoch": 1.482819383259912, + "grad_norm": 1.6187910458828605, + "learning_rate": 3.4445902221376694e-06, + "loss": 0.5527759790420532, + "step": 4208 + }, + { + "epoch": 1.4831718061674009, + "grad_norm": 1.792049785406371, + "learning_rate": 3.440191476270922e-06, + "loss": 0.6838431358337402, + "step": 4209 + }, + { + "epoch": 1.4835242290748898, + "grad_norm": 2.0640892173970933, + "learning_rate": 3.4357949572550196e-06, + "loss": 0.4876987636089325, + "step": 4210 + }, + { + "epoch": 1.4838766519823787, + "grad_norm": 1.66358265635652, + "learning_rate": 3.4314006665824427e-06, + "loss": 0.5639296770095825, + "step": 4211 + }, + { + "epoch": 1.4842290748898679, + "grad_norm": 1.9264745517709694, + "learning_rate": 3.427008605744915e-06, + "loss": 0.4189109802246094, + "step": 4212 + }, + { + "epoch": 1.4845814977973568, + "grad_norm": 1.7041726412059042, + "learning_rate": 3.422618776233413e-06, + "loss": 0.6602882146835327, + "step": 4213 + }, + { + "epoch": 1.4849339207048458, + "grad_norm": 2.105857994769297, + "learning_rate": 3.4182311795381373e-06, + "loss": 0.7642478942871094, + "step": 4214 + }, + { + "epoch": 1.485286343612335, + "grad_norm": 1.703090383184888, + "learning_rate": 3.41384581714854e-06, + "loss": 0.5550031065940857, + "step": 4215 + }, + { + "epoch": 1.4856387665198239, + "grad_norm": 1.956165427853548, + "learning_rate": 3.4094626905533223e-06, + "loss": 0.7036092281341553, + "step": 4216 + }, + { + "epoch": 1.4859911894273128, + "grad_norm": 1.9055824872661757, + "learning_rate": 3.4050818012404165e-06, + "loss": 0.693780779838562, + "step": 4217 + }, + { + "epoch": 1.4863436123348017, + "grad_norm": 1.750544621689218, + "learning_rate": 3.4007031506969977e-06, + "loss": 0.6315299868583679, + "step": 4218 + }, + { + "epoch": 1.4866960352422907, + "grad_norm": 2.0036955114247355, + "learning_rate": 3.396326740409481e-06, + "loss": 0.47849225997924805, + "step": 4219 + }, + { + "epoch": 1.4870484581497798, + "grad_norm": 1.9433930854735686, + "learning_rate": 3.3919525718635195e-06, + "loss": 0.6200336217880249, + "step": 4220 + }, + { + "epoch": 1.4874008810572688, + "grad_norm": 1.7540152253976415, + "learning_rate": 3.3875806465440152e-06, + "loss": 0.7594903707504272, + "step": 4221 + }, + { + "epoch": 1.4877533039647577, + "grad_norm": 1.8336468945254887, + "learning_rate": 3.383210965935093e-06, + "loss": 0.47159409523010254, + "step": 4222 + }, + { + "epoch": 1.4881057268722466, + "grad_norm": 1.6169320059495966, + "learning_rate": 3.3788435315201216e-06, + "loss": 0.5272495150566101, + "step": 4223 + }, + { + "epoch": 1.4884581497797358, + "grad_norm": 1.8268891947791475, + "learning_rate": 3.3744783447817177e-06, + "loss": 0.43847334384918213, + "step": 4224 + }, + { + "epoch": 1.4888105726872247, + "grad_norm": 1.7778298915864024, + "learning_rate": 3.370115407201724e-06, + "loss": 0.656914234161377, + "step": 4225 + }, + { + "epoch": 1.4891629955947137, + "grad_norm": 1.9329427480605288, + "learning_rate": 3.3657547202612128e-06, + "loss": 0.6379527449607849, + "step": 4226 + }, + { + "epoch": 1.4895154185022026, + "grad_norm": 2.0864597290427103, + "learning_rate": 3.3613962854405136e-06, + "loss": 0.6254120469093323, + "step": 4227 + }, + { + "epoch": 1.4898678414096915, + "grad_norm": 1.9356514762449182, + "learning_rate": 3.35704010421917e-06, + "loss": 0.6567566990852356, + "step": 4228 + }, + { + "epoch": 1.4902202643171807, + "grad_norm": 1.7537675986626187, + "learning_rate": 3.352686178075981e-06, + "loss": 0.5121499300003052, + "step": 4229 + }, + { + "epoch": 1.4905726872246696, + "grad_norm": 1.5674669685831402, + "learning_rate": 3.3483345084889595e-06, + "loss": 0.5727466344833374, + "step": 4230 + }, + { + "epoch": 1.4909251101321586, + "grad_norm": 2.005473410378335, + "learning_rate": 3.3439850969353614e-06, + "loss": 0.6013318300247192, + "step": 4231 + }, + { + "epoch": 1.4912775330396475, + "grad_norm": 1.8391832358116647, + "learning_rate": 3.3396379448916836e-06, + "loss": 0.6350653767585754, + "step": 4232 + }, + { + "epoch": 1.4916299559471367, + "grad_norm": 1.8543607360516363, + "learning_rate": 3.335293053833645e-06, + "loss": 0.7072123885154724, + "step": 4233 + }, + { + "epoch": 1.4919823788546256, + "grad_norm": 1.7154205163032374, + "learning_rate": 3.330950425236201e-06, + "loss": 0.6208378076553345, + "step": 4234 + }, + { + "epoch": 1.4923348017621145, + "grad_norm": 2.0942315596519667, + "learning_rate": 3.3266100605735397e-06, + "loss": 0.656146764755249, + "step": 4235 + }, + { + "epoch": 1.4926872246696035, + "grad_norm": 1.953931788636606, + "learning_rate": 3.322271961319076e-06, + "loss": 0.6145347952842712, + "step": 4236 + }, + { + "epoch": 1.4930396475770924, + "grad_norm": 1.7528969029549952, + "learning_rate": 3.3179361289454694e-06, + "loss": 0.5876312255859375, + "step": 4237 + }, + { + "epoch": 1.4933920704845816, + "grad_norm": 1.601290490679199, + "learning_rate": 3.3136025649245897e-06, + "loss": 0.48365384340286255, + "step": 4238 + }, + { + "epoch": 1.4937444933920705, + "grad_norm": 2.189978154300805, + "learning_rate": 3.3092712707275467e-06, + "loss": 0.566576361656189, + "step": 4239 + }, + { + "epoch": 1.4940969162995594, + "grad_norm": 1.9878421762040837, + "learning_rate": 3.3049422478246886e-06, + "loss": 0.6982032060623169, + "step": 4240 + }, + { + "epoch": 1.4944493392070486, + "grad_norm": 1.9039317781349454, + "learning_rate": 3.3006154976855787e-06, + "loss": 0.5802686214447021, + "step": 4241 + }, + { + "epoch": 1.4948017621145375, + "grad_norm": 1.8292762393050834, + "learning_rate": 3.296291021779016e-06, + "loss": 0.6656724214553833, + "step": 4242 + }, + { + "epoch": 1.4951541850220265, + "grad_norm": 1.8194685949700777, + "learning_rate": 3.2919688215730227e-06, + "loss": 0.5081402063369751, + "step": 4243 + }, + { + "epoch": 1.4955066079295154, + "grad_norm": 1.858251792062496, + "learning_rate": 3.28764889853485e-06, + "loss": 0.6963785290718079, + "step": 4244 + }, + { + "epoch": 1.4958590308370043, + "grad_norm": 1.6076782907375928, + "learning_rate": 3.283331254130987e-06, + "loss": 0.4953869581222534, + "step": 4245 + }, + { + "epoch": 1.4962114537444933, + "grad_norm": 1.7009256372822803, + "learning_rate": 3.2790158898271283e-06, + "loss": 0.5495179295539856, + "step": 4246 + }, + { + "epoch": 1.4965638766519824, + "grad_norm": 1.5282320768079813, + "learning_rate": 3.274702807088208e-06, + "loss": 0.6238610148429871, + "step": 4247 + }, + { + "epoch": 1.4969162995594714, + "grad_norm": 1.7145305152154042, + "learning_rate": 3.270392007378389e-06, + "loss": 0.557083249092102, + "step": 4248 + }, + { + "epoch": 1.4972687224669603, + "grad_norm": 1.8458971793579602, + "learning_rate": 3.2660834921610495e-06, + "loss": 0.6317561864852905, + "step": 4249 + }, + { + "epoch": 1.4976211453744495, + "grad_norm": 2.025909664851984, + "learning_rate": 3.2617772628987974e-06, + "loss": 0.5957529544830322, + "step": 4250 + }, + { + "epoch": 1.4979735682819384, + "grad_norm": 1.8950835934769208, + "learning_rate": 3.2574733210534637e-06, + "loss": 0.7661205530166626, + "step": 4251 + }, + { + "epoch": 1.4983259911894273, + "grad_norm": 1.6920068443310292, + "learning_rate": 3.2531716680861024e-06, + "loss": 0.5616782903671265, + "step": 4252 + }, + { + "epoch": 1.4986784140969163, + "grad_norm": 1.770897830706882, + "learning_rate": 3.2488723054569905e-06, + "loss": 0.5679990649223328, + "step": 4253 + }, + { + "epoch": 1.4990308370044052, + "grad_norm": 1.8387315322882807, + "learning_rate": 3.2445752346256244e-06, + "loss": 0.6355923414230347, + "step": 4254 + }, + { + "epoch": 1.4993832599118941, + "grad_norm": 1.5734033631685667, + "learning_rate": 3.2402804570507316e-06, + "loss": 0.5050745010375977, + "step": 4255 + }, + { + "epoch": 1.4997356828193833, + "grad_norm": 1.9535070131295427, + "learning_rate": 3.2359879741902557e-06, + "loss": 0.6585286855697632, + "step": 4256 + }, + { + "epoch": 1.5000881057268722, + "grad_norm": 1.9369843836982625, + "learning_rate": 3.2316977875013567e-06, + "loss": 0.5108245611190796, + "step": 4257 + }, + { + "epoch": 1.5004405286343614, + "grad_norm": 1.7460361732263896, + "learning_rate": 3.2274098984404223e-06, + "loss": 0.5270702838897705, + "step": 4258 + }, + { + "epoch": 1.5007929515418503, + "grad_norm": 2.09582870313145, + "learning_rate": 3.223124308463057e-06, + "loss": 0.6421051025390625, + "step": 4259 + }, + { + "epoch": 1.5011453744493393, + "grad_norm": 2.0173715825527454, + "learning_rate": 3.218841019024084e-06, + "loss": 0.6040945053100586, + "step": 4260 + }, + { + "epoch": 1.5014977973568282, + "grad_norm": 3.5488261180155023, + "learning_rate": 3.214560031577548e-06, + "loss": 0.6389988660812378, + "step": 4261 + }, + { + "epoch": 1.5018502202643171, + "grad_norm": 2.1449229280338096, + "learning_rate": 3.210281347576707e-06, + "loss": 0.6474273800849915, + "step": 4262 + }, + { + "epoch": 1.502202643171806, + "grad_norm": 2.0726789637634666, + "learning_rate": 3.206004968474048e-06, + "loss": 0.7020560503005981, + "step": 4263 + }, + { + "epoch": 1.502555066079295, + "grad_norm": 1.7677587583992656, + "learning_rate": 3.2017308957212644e-06, + "loss": 0.574647068977356, + "step": 4264 + }, + { + "epoch": 1.5029074889867842, + "grad_norm": 1.8152121117445819, + "learning_rate": 3.1974591307692724e-06, + "loss": 0.6912944316864014, + "step": 4265 + }, + { + "epoch": 1.503259911894273, + "grad_norm": 1.7825438750387144, + "learning_rate": 3.1931896750682036e-06, + "loss": 0.7738592028617859, + "step": 4266 + }, + { + "epoch": 1.5036123348017623, + "grad_norm": 1.7835054391965142, + "learning_rate": 3.188922530067402e-06, + "loss": 0.6418012380599976, + "step": 4267 + }, + { + "epoch": 1.5039647577092512, + "grad_norm": 2.0481798246782628, + "learning_rate": 3.1846576972154343e-06, + "loss": 0.639055609703064, + "step": 4268 + }, + { + "epoch": 1.5043171806167401, + "grad_norm": 1.8365579809471801, + "learning_rate": 3.1803951779600774e-06, + "loss": 0.5512406229972839, + "step": 4269 + }, + { + "epoch": 1.504669603524229, + "grad_norm": 1.7182877192220278, + "learning_rate": 3.1761349737483194e-06, + "loss": 0.5838354229927063, + "step": 4270 + }, + { + "epoch": 1.505022026431718, + "grad_norm": 1.5090233544437164, + "learning_rate": 3.1718770860263747e-06, + "loss": 0.5903568267822266, + "step": 4271 + }, + { + "epoch": 1.505374449339207, + "grad_norm": 1.761348463041135, + "learning_rate": 3.1676215162396604e-06, + "loss": 0.5610073804855347, + "step": 4272 + }, + { + "epoch": 1.5057268722466959, + "grad_norm": 1.9899291186285208, + "learning_rate": 3.163368265832809e-06, + "loss": 0.6543136835098267, + "step": 4273 + }, + { + "epoch": 1.506079295154185, + "grad_norm": 1.9484911821126696, + "learning_rate": 3.1591173362496686e-06, + "loss": 0.6586440801620483, + "step": 4274 + }, + { + "epoch": 1.506431718061674, + "grad_norm": 1.7389367867721892, + "learning_rate": 3.1548687289332958e-06, + "loss": 0.5360713601112366, + "step": 4275 + }, + { + "epoch": 1.5067841409691631, + "grad_norm": 2.1157677007043243, + "learning_rate": 3.1506224453259615e-06, + "loss": 0.6695356369018555, + "step": 4276 + }, + { + "epoch": 1.507136563876652, + "grad_norm": 1.7594436585853632, + "learning_rate": 3.146378486869146e-06, + "loss": 0.5708016753196716, + "step": 4277 + }, + { + "epoch": 1.507488986784141, + "grad_norm": 1.997964983412431, + "learning_rate": 3.142136855003538e-06, + "loss": 0.5412342548370361, + "step": 4278 + }, + { + "epoch": 1.50784140969163, + "grad_norm": 1.645092688511499, + "learning_rate": 3.1378975511690468e-06, + "loss": 0.5392874479293823, + "step": 4279 + }, + { + "epoch": 1.5081938325991189, + "grad_norm": 2.1591157791946256, + "learning_rate": 3.133660576804781e-06, + "loss": 0.6559237241744995, + "step": 4280 + }, + { + "epoch": 1.5085462555066078, + "grad_norm": 1.6625372644532221, + "learning_rate": 3.1294259333490597e-06, + "loss": 0.49973511695861816, + "step": 4281 + }, + { + "epoch": 1.5088986784140968, + "grad_norm": 1.9292311285357981, + "learning_rate": 3.1251936222394152e-06, + "loss": 0.5458316206932068, + "step": 4282 + }, + { + "epoch": 1.509251101321586, + "grad_norm": 1.7771232071244591, + "learning_rate": 3.120963644912579e-06, + "loss": 0.628986656665802, + "step": 4283 + }, + { + "epoch": 1.5096035242290748, + "grad_norm": 1.956059007614116, + "learning_rate": 3.1167360028045103e-06, + "loss": 0.6234235167503357, + "step": 4284 + }, + { + "epoch": 1.509955947136564, + "grad_norm": 1.832929038299875, + "learning_rate": 3.112510697350348e-06, + "loss": 0.49892476201057434, + "step": 4285 + }, + { + "epoch": 1.510308370044053, + "grad_norm": 1.859590789761001, + "learning_rate": 3.1082877299844562e-06, + "loss": 0.42951709032058716, + "step": 4286 + }, + { + "epoch": 1.5106607929515419, + "grad_norm": 1.9658176092994974, + "learning_rate": 3.1040671021404045e-06, + "loss": 0.6392263770103455, + "step": 4287 + }, + { + "epoch": 1.5110132158590308, + "grad_norm": 1.9240075529588605, + "learning_rate": 3.099848815250964e-06, + "loss": 0.6198933124542236, + "step": 4288 + }, + { + "epoch": 1.5113656387665197, + "grad_norm": 2.6550374581713436, + "learning_rate": 3.0956328707481055e-06, + "loss": 0.7626048922538757, + "step": 4289 + }, + { + "epoch": 1.5117180616740087, + "grad_norm": 2.057470135822257, + "learning_rate": 3.0914192700630175e-06, + "loss": 0.5245747566223145, + "step": 4290 + }, + { + "epoch": 1.5120704845814978, + "grad_norm": 2.016409834872785, + "learning_rate": 3.0872080146260818e-06, + "loss": 0.6788556575775146, + "step": 4291 + }, + { + "epoch": 1.5124229074889868, + "grad_norm": 1.8970717527543317, + "learning_rate": 3.082999105866897e-06, + "loss": 0.6224241852760315, + "step": 4292 + }, + { + "epoch": 1.512775330396476, + "grad_norm": 1.8828342237083628, + "learning_rate": 3.0787925452142477e-06, + "loss": 0.706061840057373, + "step": 4293 + }, + { + "epoch": 1.5131277533039649, + "grad_norm": 1.8530285911040203, + "learning_rate": 3.07458833409613e-06, + "loss": 0.7075262665748596, + "step": 4294 + }, + { + "epoch": 1.5134801762114538, + "grad_norm": 1.8075779914700747, + "learning_rate": 3.0703864739397494e-06, + "loss": 0.4912101626396179, + "step": 4295 + }, + { + "epoch": 1.5138325991189427, + "grad_norm": 1.908543179959353, + "learning_rate": 3.066186966171507e-06, + "loss": 0.6530265808105469, + "step": 4296 + }, + { + "epoch": 1.5141850220264317, + "grad_norm": 2.12821134565194, + "learning_rate": 3.0619898122169946e-06, + "loss": 0.4905887246131897, + "step": 4297 + }, + { + "epoch": 1.5145374449339206, + "grad_norm": 1.626400447189927, + "learning_rate": 3.057795013501025e-06, + "loss": 0.5025225281715393, + "step": 4298 + }, + { + "epoch": 1.5148898678414096, + "grad_norm": 1.945132129374327, + "learning_rate": 3.0536025714475946e-06, + "loss": 0.5769479274749756, + "step": 4299 + }, + { + "epoch": 1.5152422907488987, + "grad_norm": 2.0281621255217526, + "learning_rate": 3.049412487479919e-06, + "loss": 0.6275384426116943, + "step": 4300 + }, + { + "epoch": 1.5155947136563876, + "grad_norm": 1.6860684781531563, + "learning_rate": 3.04522476302039e-06, + "loss": 0.5555096864700317, + "step": 4301 + }, + { + "epoch": 1.5159471365638768, + "grad_norm": 1.9260394424858205, + "learning_rate": 3.0410393994906096e-06, + "loss": 0.5605635643005371, + "step": 4302 + }, + { + "epoch": 1.5162995594713657, + "grad_norm": 2.127824876873509, + "learning_rate": 3.0368563983113864e-06, + "loss": 0.6006621718406677, + "step": 4303 + }, + { + "epoch": 1.5166519823788547, + "grad_norm": 2.012171091410243, + "learning_rate": 3.0326757609027147e-06, + "loss": 0.5288259983062744, + "step": 4304 + }, + { + "epoch": 1.5170044052863436, + "grad_norm": 1.7034257525965926, + "learning_rate": 3.0284974886837903e-06, + "loss": 0.5671676993370056, + "step": 4305 + }, + { + "epoch": 1.5173568281938326, + "grad_norm": 2.496889571382279, + "learning_rate": 3.0243215830730075e-06, + "loss": 0.6072134971618652, + "step": 4306 + }, + { + "epoch": 1.5177092511013215, + "grad_norm": 1.726261889224961, + "learning_rate": 3.020148045487953e-06, + "loss": 0.6010481119155884, + "step": 4307 + }, + { + "epoch": 1.5180616740088104, + "grad_norm": 1.6250908189476003, + "learning_rate": 3.0159768773454225e-06, + "loss": 0.6126751899719238, + "step": 4308 + }, + { + "epoch": 1.5184140969162996, + "grad_norm": 1.6123380534859018, + "learning_rate": 3.011808080061387e-06, + "loss": 0.5408819317817688, + "step": 4309 + }, + { + "epoch": 1.5187665198237885, + "grad_norm": 1.6792977324898095, + "learning_rate": 3.0076416550510255e-06, + "loss": 0.6528562307357788, + "step": 4310 + }, + { + "epoch": 1.5191189427312777, + "grad_norm": 1.6431948485087644, + "learning_rate": 3.003477603728715e-06, + "loss": 0.6355241537094116, + "step": 4311 + }, + { + "epoch": 1.5194713656387666, + "grad_norm": 1.7630338655444058, + "learning_rate": 2.9993159275080174e-06, + "loss": 0.5511878728866577, + "step": 4312 + }, + { + "epoch": 1.5198237885462555, + "grad_norm": 1.9093354982688662, + "learning_rate": 2.9951566278016943e-06, + "loss": 0.5066816806793213, + "step": 4313 + }, + { + "epoch": 1.5201762114537445, + "grad_norm": 1.676344611272679, + "learning_rate": 2.9909997060216966e-06, + "loss": 0.5636533498764038, + "step": 4314 + }, + { + "epoch": 1.5205286343612334, + "grad_norm": 1.8885420705538216, + "learning_rate": 2.9868451635791706e-06, + "loss": 0.49742352962493896, + "step": 4315 + }, + { + "epoch": 1.5208810572687224, + "grad_norm": 2.013877525146858, + "learning_rate": 2.9826930018844533e-06, + "loss": 0.7264617681503296, + "step": 4316 + }, + { + "epoch": 1.5212334801762113, + "grad_norm": 1.8792043539230026, + "learning_rate": 2.978543222347076e-06, + "loss": 0.5342350006103516, + "step": 4317 + }, + { + "epoch": 1.5215859030837005, + "grad_norm": 1.7569176385310192, + "learning_rate": 2.9743958263757554e-06, + "loss": 0.4324883818626404, + "step": 4318 + }, + { + "epoch": 1.5219383259911894, + "grad_norm": 1.8546496052344164, + "learning_rate": 2.970250815378409e-06, + "loss": 0.5867510437965393, + "step": 4319 + }, + { + "epoch": 1.5222907488986785, + "grad_norm": 1.8415605839915816, + "learning_rate": 2.966108190762138e-06, + "loss": 0.7176594734191895, + "step": 4320 + }, + { + "epoch": 1.5226431718061675, + "grad_norm": 1.967906535494615, + "learning_rate": 2.9619679539332337e-06, + "loss": 0.5810995101928711, + "step": 4321 + }, + { + "epoch": 1.5229955947136564, + "grad_norm": 2.478705006420029, + "learning_rate": 2.957830106297177e-06, + "loss": 0.6262675523757935, + "step": 4322 + }, + { + "epoch": 1.5233480176211454, + "grad_norm": 2.1743670559442245, + "learning_rate": 2.9536946492586383e-06, + "loss": 0.7743325233459473, + "step": 4323 + }, + { + "epoch": 1.5237004405286343, + "grad_norm": 1.7129659102014092, + "learning_rate": 2.9495615842214776e-06, + "loss": 0.7706553936004639, + "step": 4324 + }, + { + "epoch": 1.5240528634361232, + "grad_norm": 1.6835245148440698, + "learning_rate": 2.9454309125887405e-06, + "loss": 0.5982425808906555, + "step": 4325 + }, + { + "epoch": 1.5244052863436124, + "grad_norm": 1.8547174799711497, + "learning_rate": 2.9413026357626596e-06, + "loss": 0.5580830574035645, + "step": 4326 + }, + { + "epoch": 1.5247577092511013, + "grad_norm": 1.8954299514318398, + "learning_rate": 2.937176755144662e-06, + "loss": 0.5316063165664673, + "step": 4327 + }, + { + "epoch": 1.5251101321585903, + "grad_norm": 1.7578719545795178, + "learning_rate": 2.9330532721353523e-06, + "loss": 0.574161171913147, + "step": 4328 + }, + { + "epoch": 1.5254625550660794, + "grad_norm": 1.7055567103896054, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.5339558720588684, + "step": 4329 + }, + { + "epoch": 1.5258149779735684, + "grad_norm": 1.7071106155323514, + "learning_rate": 2.9248135045411607e-06, + "loss": 0.594109296798706, + "step": 4330 + }, + { + "epoch": 1.5261674008810573, + "grad_norm": 2.0022142230843873, + "learning_rate": 2.9206972227534237e-06, + "loss": 0.5953024625778198, + "step": 4331 + }, + { + "epoch": 1.5265198237885462, + "grad_norm": 1.6369885387081085, + "learning_rate": 2.916583344168663e-06, + "loss": 0.5142296552658081, + "step": 4332 + }, + { + "epoch": 1.5268722466960352, + "grad_norm": 1.7205930689665365, + "learning_rate": 2.912471870183411e-06, + "loss": 0.5796314477920532, + "step": 4333 + }, + { + "epoch": 1.527224669603524, + "grad_norm": 1.984086822092815, + "learning_rate": 2.9083628021933886e-06, + "loss": 0.7202566862106323, + "step": 4334 + }, + { + "epoch": 1.5275770925110133, + "grad_norm": 2.2205082372485756, + "learning_rate": 2.9042561415934956e-06, + "loss": 0.6684188842773438, + "step": 4335 + }, + { + "epoch": 1.5279295154185022, + "grad_norm": 1.7000543577524454, + "learning_rate": 2.9001518897778147e-06, + "loss": 0.5377634763717651, + "step": 4336 + }, + { + "epoch": 1.5282819383259914, + "grad_norm": 1.7985805373418047, + "learning_rate": 2.8960500481396115e-06, + "loss": 0.5780486464500427, + "step": 4337 + }, + { + "epoch": 1.5286343612334803, + "grad_norm": 1.7528900086241466, + "learning_rate": 2.891950618071333e-06, + "loss": 0.6020476818084717, + "step": 4338 + }, + { + "epoch": 1.5289867841409692, + "grad_norm": 2.1939247460310303, + "learning_rate": 2.8878536009646106e-06, + "loss": 0.6076337099075317, + "step": 4339 + }, + { + "epoch": 1.5293392070484582, + "grad_norm": 1.9795227787355654, + "learning_rate": 2.883758998210251e-06, + "loss": 0.6370673179626465, + "step": 4340 + }, + { + "epoch": 1.529691629955947, + "grad_norm": 1.8686242611734982, + "learning_rate": 2.879666811198244e-06, + "loss": 0.41594892740249634, + "step": 4341 + }, + { + "epoch": 1.530044052863436, + "grad_norm": 1.9135398095116771, + "learning_rate": 2.8755770413177632e-06, + "loss": 0.4506857693195343, + "step": 4342 + }, + { + "epoch": 1.530396475770925, + "grad_norm": 1.730478313082556, + "learning_rate": 2.8714896899571575e-06, + "loss": 0.5883188247680664, + "step": 4343 + }, + { + "epoch": 1.5307488986784141, + "grad_norm": 1.9672614741507624, + "learning_rate": 2.8674047585039545e-06, + "loss": 0.6327757239341736, + "step": 4344 + }, + { + "epoch": 1.531101321585903, + "grad_norm": 1.958310227558085, + "learning_rate": 2.863322248344862e-06, + "loss": 0.6241307258605957, + "step": 4345 + }, + { + "epoch": 1.5314537444933922, + "grad_norm": 1.9476460174005816, + "learning_rate": 2.859242160865764e-06, + "loss": 0.6982603669166565, + "step": 4346 + }, + { + "epoch": 1.5318061674008812, + "grad_norm": 1.8365437525119523, + "learning_rate": 2.8551644974517236e-06, + "loss": 0.6293624639511108, + "step": 4347 + }, + { + "epoch": 1.53215859030837, + "grad_norm": 1.8744498682554205, + "learning_rate": 2.85108925948698e-06, + "loss": 0.5630898475646973, + "step": 4348 + }, + { + "epoch": 1.532511013215859, + "grad_norm": 1.9156047998547734, + "learning_rate": 2.847016448354948e-06, + "loss": 0.5300726294517517, + "step": 4349 + }, + { + "epoch": 1.532863436123348, + "grad_norm": 1.763805411540431, + "learning_rate": 2.8429460654382257e-06, + "loss": 0.6302311420440674, + "step": 4350 + }, + { + "epoch": 1.533215859030837, + "grad_norm": 2.0169994553834467, + "learning_rate": 2.8388781121185815e-06, + "loss": 0.5063371658325195, + "step": 4351 + }, + { + "epoch": 1.5335682819383258, + "grad_norm": 1.995201540637565, + "learning_rate": 2.8348125897769496e-06, + "loss": 0.6116877198219299, + "step": 4352 + }, + { + "epoch": 1.533920704845815, + "grad_norm": 1.7792408625607217, + "learning_rate": 2.830749499793458e-06, + "loss": 0.5671982169151306, + "step": 4353 + }, + { + "epoch": 1.534273127753304, + "grad_norm": 1.927509688688397, + "learning_rate": 2.826688843547395e-06, + "loss": 0.6537752747535706, + "step": 4354 + }, + { + "epoch": 1.534625550660793, + "grad_norm": 2.0558781996543805, + "learning_rate": 2.8226306224172283e-06, + "loss": 0.6608545780181885, + "step": 4355 + }, + { + "epoch": 1.534977973568282, + "grad_norm": 2.2266474146630655, + "learning_rate": 2.8185748377805977e-06, + "loss": 0.7038587331771851, + "step": 4356 + }, + { + "epoch": 1.535330396475771, + "grad_norm": 2.084521400671401, + "learning_rate": 2.8145214910143128e-06, + "loss": 0.7422336339950562, + "step": 4357 + }, + { + "epoch": 1.53568281938326, + "grad_norm": 1.7545985325343467, + "learning_rate": 2.8104705834943625e-06, + "loss": 0.5739270448684692, + "step": 4358 + }, + { + "epoch": 1.5360352422907488, + "grad_norm": 1.8063052751864486, + "learning_rate": 2.8064221165959073e-06, + "loss": 0.6429908275604248, + "step": 4359 + }, + { + "epoch": 1.5363876651982378, + "grad_norm": 2.071223390835828, + "learning_rate": 2.802376091693264e-06, + "loss": 0.5660578012466431, + "step": 4360 + }, + { + "epoch": 1.5367400881057267, + "grad_norm": 1.7313945668822706, + "learning_rate": 2.798332510159942e-06, + "loss": 0.4507398009300232, + "step": 4361 + }, + { + "epoch": 1.5370925110132159, + "grad_norm": 1.638677595892734, + "learning_rate": 2.7942913733686063e-06, + "loss": 0.5107634663581848, + "step": 4362 + }, + { + "epoch": 1.5374449339207048, + "grad_norm": 1.8351099655043759, + "learning_rate": 2.790252682691106e-06, + "loss": 0.505529522895813, + "step": 4363 + }, + { + "epoch": 1.537797356828194, + "grad_norm": 1.7171378177734038, + "learning_rate": 2.7862164394984405e-06, + "loss": 0.459098219871521, + "step": 4364 + }, + { + "epoch": 1.538149779735683, + "grad_norm": 1.6256824509042396, + "learning_rate": 2.782182645160789e-06, + "loss": 0.5200169086456299, + "step": 4365 + }, + { + "epoch": 1.5385022026431718, + "grad_norm": 2.1593491644665908, + "learning_rate": 2.778151301047506e-06, + "loss": 0.6723796725273132, + "step": 4366 + }, + { + "epoch": 1.5388546255506608, + "grad_norm": 1.7552269414614434, + "learning_rate": 2.7741224085271067e-06, + "loss": 0.5385584831237793, + "step": 4367 + }, + { + "epoch": 1.5392070484581497, + "grad_norm": 1.6358174263890735, + "learning_rate": 2.770095968967267e-06, + "loss": 0.5766934156417847, + "step": 4368 + }, + { + "epoch": 1.5395594713656386, + "grad_norm": 1.6116842273066272, + "learning_rate": 2.766071983734845e-06, + "loss": 0.6303011178970337, + "step": 4369 + }, + { + "epoch": 1.5399118942731278, + "grad_norm": 2.0294439046284736, + "learning_rate": 2.7620504541958525e-06, + "loss": 0.6192827224731445, + "step": 4370 + }, + { + "epoch": 1.5402643171806167, + "grad_norm": 1.9731545400175885, + "learning_rate": 2.758031381715485e-06, + "loss": 0.543215811252594, + "step": 4371 + }, + { + "epoch": 1.5406167400881057, + "grad_norm": 1.8102023756492311, + "learning_rate": 2.7540147676580808e-06, + "loss": 0.6364312171936035, + "step": 4372 + }, + { + "epoch": 1.5409691629955948, + "grad_norm": 1.7440307883728075, + "learning_rate": 2.750000613387157e-06, + "loss": 0.5625254511833191, + "step": 4373 + }, + { + "epoch": 1.5413215859030838, + "grad_norm": 2.1646055145888377, + "learning_rate": 2.7459889202654e-06, + "loss": 0.7304128408432007, + "step": 4374 + }, + { + "epoch": 1.5416740088105727, + "grad_norm": 2.1627384337401296, + "learning_rate": 2.7419796896546536e-06, + "loss": 0.676097571849823, + "step": 4375 + }, + { + "epoch": 1.5420264317180616, + "grad_norm": 1.9373952441867042, + "learning_rate": 2.7379729229159193e-06, + "loss": 0.7024539709091187, + "step": 4376 + }, + { + "epoch": 1.5423788546255506, + "grad_norm": 1.6778176206961017, + "learning_rate": 2.7339686214093774e-06, + "loss": 0.6357964277267456, + "step": 4377 + }, + { + "epoch": 1.5427312775330395, + "grad_norm": 1.8606991682829432, + "learning_rate": 2.729966786494361e-06, + "loss": 0.5254555940628052, + "step": 4378 + }, + { + "epoch": 1.5430837004405287, + "grad_norm": 1.527570009912515, + "learning_rate": 2.7259674195293697e-06, + "loss": 0.4899883270263672, + "step": 4379 + }, + { + "epoch": 1.5434361233480176, + "grad_norm": 1.725531709071361, + "learning_rate": 2.721970521872063e-06, + "loss": 0.5750056505203247, + "step": 4380 + }, + { + "epoch": 1.5437885462555068, + "grad_norm": 1.8900737960638598, + "learning_rate": 2.71797609487926e-06, + "loss": 0.5852059125900269, + "step": 4381 + }, + { + "epoch": 1.5441409691629957, + "grad_norm": 1.8258629839457563, + "learning_rate": 2.71398413990695e-06, + "loss": 0.6360914707183838, + "step": 4382 + }, + { + "epoch": 1.5444933920704846, + "grad_norm": 1.7586915096989222, + "learning_rate": 2.7099946583102764e-06, + "loss": 0.5120062828063965, + "step": 4383 + }, + { + "epoch": 1.5448458149779736, + "grad_norm": 2.068877348919367, + "learning_rate": 2.706007651443543e-06, + "loss": 0.5798901319503784, + "step": 4384 + }, + { + "epoch": 1.5451982378854625, + "grad_norm": 2.014366735127449, + "learning_rate": 2.702023120660213e-06, + "loss": 0.5112065076828003, + "step": 4385 + }, + { + "epoch": 1.5455506607929514, + "grad_norm": 1.9281653354114374, + "learning_rate": 2.6980410673129133e-06, + "loss": 0.6136611700057983, + "step": 4386 + }, + { + "epoch": 1.5459030837004404, + "grad_norm": 1.6841076662412324, + "learning_rate": 2.694061492753426e-06, + "loss": 0.5944457054138184, + "step": 4387 + }, + { + "epoch": 1.5462555066079295, + "grad_norm": 1.9404009079173157, + "learning_rate": 2.690084398332692e-06, + "loss": 0.5931667685508728, + "step": 4388 + }, + { + "epoch": 1.5466079295154185, + "grad_norm": 1.607840859056915, + "learning_rate": 2.686109785400809e-06, + "loss": 0.6112217307090759, + "step": 4389 + }, + { + "epoch": 1.5469603524229076, + "grad_norm": 1.9090904865448288, + "learning_rate": 2.68213765530704e-06, + "loss": 0.549437940120697, + "step": 4390 + }, + { + "epoch": 1.5473127753303966, + "grad_norm": 1.9826888565576624, + "learning_rate": 2.6781680093997965e-06, + "loss": 0.674758791923523, + "step": 4391 + }, + { + "epoch": 1.5476651982378855, + "grad_norm": 1.874293916028551, + "learning_rate": 2.6742008490266504e-06, + "loss": 0.6015446186065674, + "step": 4392 + }, + { + "epoch": 1.5480176211453744, + "grad_norm": 2.2556212033260223, + "learning_rate": 2.6702361755343278e-06, + "loss": 0.5512514710426331, + "step": 4393 + }, + { + "epoch": 1.5483700440528634, + "grad_norm": 2.607818594949077, + "learning_rate": 2.666273990268713e-06, + "loss": 0.6443158984184265, + "step": 4394 + }, + { + "epoch": 1.5487224669603523, + "grad_norm": 1.840692212890546, + "learning_rate": 2.6623142945748447e-06, + "loss": 0.5682512521743774, + "step": 4395 + }, + { + "epoch": 1.5490748898678413, + "grad_norm": 2.173690182254911, + "learning_rate": 2.658357089796917e-06, + "loss": 0.5544074773788452, + "step": 4396 + }, + { + "epoch": 1.5494273127753304, + "grad_norm": 1.726846155573174, + "learning_rate": 2.6544023772782736e-06, + "loss": 0.5811636447906494, + "step": 4397 + }, + { + "epoch": 1.5497797356828193, + "grad_norm": 1.790573455353959, + "learning_rate": 2.650450158361422e-06, + "loss": 0.4696553647518158, + "step": 4398 + }, + { + "epoch": 1.5501321585903085, + "grad_norm": 1.918353319441468, + "learning_rate": 2.6465004343880153e-06, + "loss": 0.6897521615028381, + "step": 4399 + }, + { + "epoch": 1.5504845814977974, + "grad_norm": 1.9780672696205217, + "learning_rate": 2.6425532066988613e-06, + "loss": 0.6154924631118774, + "step": 4400 + }, + { + "epoch": 1.5508370044052864, + "grad_norm": 2.0803038103367815, + "learning_rate": 2.6386084766339214e-06, + "loss": 0.5333596467971802, + "step": 4401 + }, + { + "epoch": 1.5511894273127753, + "grad_norm": 1.8190921046801005, + "learning_rate": 2.634666245532309e-06, + "loss": 0.6633985042572021, + "step": 4402 + }, + { + "epoch": 1.5515418502202643, + "grad_norm": 1.6722563074159322, + "learning_rate": 2.630726514732289e-06, + "loss": 0.7913509607315063, + "step": 4403 + }, + { + "epoch": 1.5518942731277532, + "grad_norm": 1.8118598393520884, + "learning_rate": 2.6267892855712763e-06, + "loss": 0.5776455402374268, + "step": 4404 + }, + { + "epoch": 1.5522466960352423, + "grad_norm": 1.68862603841886, + "learning_rate": 2.6228545593858357e-06, + "loss": 0.5912357568740845, + "step": 4405 + }, + { + "epoch": 1.5525991189427313, + "grad_norm": 2.1281645633634274, + "learning_rate": 2.618922337511689e-06, + "loss": 0.49319127202033997, + "step": 4406 + }, + { + "epoch": 1.5529515418502202, + "grad_norm": 1.9165140700223777, + "learning_rate": 2.6149926212837016e-06, + "loss": 0.5805023908615112, + "step": 4407 + }, + { + "epoch": 1.5533039647577094, + "grad_norm": 1.8889646736612442, + "learning_rate": 2.6110654120358902e-06, + "loss": 0.5635806918144226, + "step": 4408 + }, + { + "epoch": 1.5536563876651983, + "grad_norm": 1.6927734818193383, + "learning_rate": 2.6071407111014178e-06, + "loss": 0.5006709694862366, + "step": 4409 + }, + { + "epoch": 1.5540088105726872, + "grad_norm": 2.1152391301202695, + "learning_rate": 2.6032185198126005e-06, + "loss": 0.6035311818122864, + "step": 4410 + }, + { + "epoch": 1.5543612334801762, + "grad_norm": 1.7863466908276826, + "learning_rate": 2.599298839500899e-06, + "loss": 0.5978977680206299, + "step": 4411 + }, + { + "epoch": 1.5547136563876651, + "grad_norm": 1.8741259739913476, + "learning_rate": 2.5953816714969194e-06, + "loss": 0.6330617070198059, + "step": 4412 + }, + { + "epoch": 1.555066079295154, + "grad_norm": 2.090756152665107, + "learning_rate": 2.591467017130426e-06, + "loss": 0.6541750431060791, + "step": 4413 + }, + { + "epoch": 1.5554185022026432, + "grad_norm": 2.229850729984303, + "learning_rate": 2.5875548777303204e-06, + "loss": 0.5503655076026917, + "step": 4414 + }, + { + "epoch": 1.5557709251101322, + "grad_norm": 1.7715926792210983, + "learning_rate": 2.583645254624645e-06, + "loss": 0.5117509365081787, + "step": 4415 + }, + { + "epoch": 1.5561233480176213, + "grad_norm": 2.2945620883910953, + "learning_rate": 2.5797381491406027e-06, + "loss": 0.6699894070625305, + "step": 4416 + }, + { + "epoch": 1.5564757709251102, + "grad_norm": 2.0695182526571765, + "learning_rate": 2.5758335626045308e-06, + "loss": 0.6870071291923523, + "step": 4417 + }, + { + "epoch": 1.5568281938325992, + "grad_norm": 1.7821456199762375, + "learning_rate": 2.571931496341916e-06, + "loss": 0.7680954933166504, + "step": 4418 + }, + { + "epoch": 1.5571806167400881, + "grad_norm": 2.2345593449058203, + "learning_rate": 2.568031951677389e-06, + "loss": 0.6504727602005005, + "step": 4419 + }, + { + "epoch": 1.557533039647577, + "grad_norm": 1.6796846625470907, + "learning_rate": 2.5641349299347196e-06, + "loss": 0.7101249098777771, + "step": 4420 + }, + { + "epoch": 1.557885462555066, + "grad_norm": 1.6800594114237326, + "learning_rate": 2.560240432436831e-06, + "loss": 0.5734864473342896, + "step": 4421 + }, + { + "epoch": 1.558237885462555, + "grad_norm": 2.065356657851052, + "learning_rate": 2.5563484605057854e-06, + "loss": 0.48660311102867126, + "step": 4422 + }, + { + "epoch": 1.558590308370044, + "grad_norm": 2.0678483817870847, + "learning_rate": 2.552459015462776e-06, + "loss": 0.6442986726760864, + "step": 4423 + }, + { + "epoch": 1.558942731277533, + "grad_norm": 1.721146259770593, + "learning_rate": 2.548572098628158e-06, + "loss": 0.5871995091438293, + "step": 4424 + }, + { + "epoch": 1.5592951541850222, + "grad_norm": 1.8900651182173844, + "learning_rate": 2.544687711321415e-06, + "loss": 0.5899579524993896, + "step": 4425 + }, + { + "epoch": 1.5596475770925111, + "grad_norm": 1.589021195095579, + "learning_rate": 2.540805854861177e-06, + "loss": 0.571341872215271, + "step": 4426 + }, + { + "epoch": 1.56, + "grad_norm": 2.074587814486514, + "learning_rate": 2.5369265305652112e-06, + "loss": 0.6297308206558228, + "step": 4427 + }, + { + "epoch": 1.560352422907489, + "grad_norm": 1.957815284803115, + "learning_rate": 2.5330497397504274e-06, + "loss": 0.6277692317962646, + "step": 4428 + }, + { + "epoch": 1.560704845814978, + "grad_norm": 1.8075270549654299, + "learning_rate": 2.5291754837328787e-06, + "loss": 0.5124595165252686, + "step": 4429 + }, + { + "epoch": 1.5610572687224669, + "grad_norm": 2.027466093132035, + "learning_rate": 2.5253037638277557e-06, + "loss": 0.6777669191360474, + "step": 4430 + }, + { + "epoch": 1.5614096916299558, + "grad_norm": 2.299371691906574, + "learning_rate": 2.521434581349378e-06, + "loss": 0.7380247116088867, + "step": 4431 + }, + { + "epoch": 1.561762114537445, + "grad_norm": 2.0566157739817825, + "learning_rate": 2.5175679376112206e-06, + "loss": 0.6605849266052246, + "step": 4432 + }, + { + "epoch": 1.562114537444934, + "grad_norm": 1.7899790415054606, + "learning_rate": 2.5137038339258837e-06, + "loss": 0.5688329935073853, + "step": 4433 + }, + { + "epoch": 1.562466960352423, + "grad_norm": 2.1227992795896258, + "learning_rate": 2.5098422716051197e-06, + "loss": 0.6731508374214172, + "step": 4434 + }, + { + "epoch": 1.562819383259912, + "grad_norm": 1.766889438914358, + "learning_rate": 2.505983251959798e-06, + "loss": 0.5177330374717712, + "step": 4435 + }, + { + "epoch": 1.563171806167401, + "grad_norm": 1.793841264632356, + "learning_rate": 2.502126776299938e-06, + "loss": 0.5307918787002563, + "step": 4436 + }, + { + "epoch": 1.5635242290748899, + "grad_norm": 1.8402321267228738, + "learning_rate": 2.4982728459346974e-06, + "loss": 0.59647536277771, + "step": 4437 + }, + { + "epoch": 1.5638766519823788, + "grad_norm": 2.049156650890273, + "learning_rate": 2.494421462172365e-06, + "loss": 0.6215553283691406, + "step": 4438 + }, + { + "epoch": 1.5642290748898677, + "grad_norm": 1.7976631043220852, + "learning_rate": 2.490572626320359e-06, + "loss": 0.49461615085601807, + "step": 4439 + }, + { + "epoch": 1.5645814977973567, + "grad_norm": 2.4138380625358757, + "learning_rate": 2.486726339685247e-06, + "loss": 0.6625338196754456, + "step": 4440 + }, + { + "epoch": 1.5649339207048458, + "grad_norm": 1.5979739892152505, + "learning_rate": 2.4828826035727214e-06, + "loss": 0.4059983193874359, + "step": 4441 + }, + { + "epoch": 1.5652863436123348, + "grad_norm": 1.7298713789472393, + "learning_rate": 2.47904141928761e-06, + "loss": 0.6234895586967468, + "step": 4442 + }, + { + "epoch": 1.565638766519824, + "grad_norm": 1.8282339040044808, + "learning_rate": 2.4752027881338757e-06, + "loss": 0.513421893119812, + "step": 4443 + }, + { + "epoch": 1.5659911894273129, + "grad_norm": 2.0213648562049693, + "learning_rate": 2.4713667114146123e-06, + "loss": 0.6168510913848877, + "step": 4444 + }, + { + "epoch": 1.5663436123348018, + "grad_norm": 1.8904853102151467, + "learning_rate": 2.4675331904320533e-06, + "loss": 0.5474672317504883, + "step": 4445 + }, + { + "epoch": 1.5666960352422907, + "grad_norm": 2.020157324166176, + "learning_rate": 2.46370222648756e-06, + "loss": 0.7464281916618347, + "step": 4446 + }, + { + "epoch": 1.5670484581497797, + "grad_norm": 1.8187430699226648, + "learning_rate": 2.4598738208816155e-06, + "loss": 0.5890274047851562, + "step": 4447 + }, + { + "epoch": 1.5674008810572686, + "grad_norm": 2.0160604417207293, + "learning_rate": 2.4560479749138554e-06, + "loss": 0.7577700018882751, + "step": 4448 + }, + { + "epoch": 1.5677533039647578, + "grad_norm": 1.6711759350664435, + "learning_rate": 2.4522246898830302e-06, + "loss": 0.5374037027359009, + "step": 4449 + }, + { + "epoch": 1.5681057268722467, + "grad_norm": 1.7947512315133625, + "learning_rate": 2.4484039670870286e-06, + "loss": 0.44840407371520996, + "step": 4450 + }, + { + "epoch": 1.5684581497797356, + "grad_norm": 1.8087906354095658, + "learning_rate": 2.4445858078228647e-06, + "loss": 0.5144427418708801, + "step": 4451 + }, + { + "epoch": 1.5688105726872248, + "grad_norm": 1.7889124821216469, + "learning_rate": 2.440770213386684e-06, + "loss": 0.39119952917099, + "step": 4452 + }, + { + "epoch": 1.5691629955947137, + "grad_norm": 1.6376212389282347, + "learning_rate": 2.436957185073766e-06, + "loss": 0.5287434458732605, + "step": 4453 + }, + { + "epoch": 1.5695154185022027, + "grad_norm": 2.2578778571267315, + "learning_rate": 2.4331467241785157e-06, + "loss": 0.568587064743042, + "step": 4454 + }, + { + "epoch": 1.5698678414096916, + "grad_norm": 3.2977149916111608, + "learning_rate": 2.429338831994458e-06, + "loss": 0.5522792339324951, + "step": 4455 + }, + { + "epoch": 1.5702202643171805, + "grad_norm": 1.7594156491061212, + "learning_rate": 2.425533509814262e-06, + "loss": 0.48070845007896423, + "step": 4456 + }, + { + "epoch": 1.5705726872246695, + "grad_norm": 1.631888097687176, + "learning_rate": 2.4217307589297135e-06, + "loss": 0.44293439388275146, + "step": 4457 + }, + { + "epoch": 1.5709251101321586, + "grad_norm": 1.933449446432769, + "learning_rate": 2.4179305806317266e-06, + "loss": 0.5753301382064819, + "step": 4458 + }, + { + "epoch": 1.5712775330396476, + "grad_norm": 1.9958241636570169, + "learning_rate": 2.414132976210346e-06, + "loss": 0.5873000025749207, + "step": 4459 + }, + { + "epoch": 1.5716299559471367, + "grad_norm": 2.2068877987049955, + "learning_rate": 2.410337946954736e-06, + "loss": 0.6084823608398438, + "step": 4460 + }, + { + "epoch": 1.5719823788546257, + "grad_norm": 1.743876311662913, + "learning_rate": 2.4065454941531963e-06, + "loss": 0.541124165058136, + "step": 4461 + }, + { + "epoch": 1.5723348017621146, + "grad_norm": 1.8080812306830252, + "learning_rate": 2.4027556190931446e-06, + "loss": 0.5170080661773682, + "step": 4462 + }, + { + "epoch": 1.5726872246696035, + "grad_norm": 1.817245899938438, + "learning_rate": 2.398968323061125e-06, + "loss": 0.5613514184951782, + "step": 4463 + }, + { + "epoch": 1.5730396475770925, + "grad_norm": 1.7097401781842303, + "learning_rate": 2.395183607342807e-06, + "loss": 0.6645728349685669, + "step": 4464 + }, + { + "epoch": 1.5733920704845814, + "grad_norm": 1.8730205237982336, + "learning_rate": 2.391401473222983e-06, + "loss": 0.7077093124389648, + "step": 4465 + }, + { + "epoch": 1.5737444933920703, + "grad_norm": 1.7460518248753176, + "learning_rate": 2.387621921985571e-06, + "loss": 0.5687523484230042, + "step": 4466 + }, + { + "epoch": 1.5740969162995595, + "grad_norm": 1.9850945169232843, + "learning_rate": 2.38384495491361e-06, + "loss": 0.5837362408638, + "step": 4467 + }, + { + "epoch": 1.5744493392070484, + "grad_norm": 2.051593268912329, + "learning_rate": 2.3800705732892615e-06, + "loss": 0.5552037358283997, + "step": 4468 + }, + { + "epoch": 1.5748017621145376, + "grad_norm": 1.8128967121473578, + "learning_rate": 2.376298778393814e-06, + "loss": 0.5502952337265015, + "step": 4469 + }, + { + "epoch": 1.5751541850220265, + "grad_norm": 1.958629504700592, + "learning_rate": 2.3725295715076734e-06, + "loss": 0.5621509552001953, + "step": 4470 + }, + { + "epoch": 1.5755066079295155, + "grad_norm": 2.20917213599842, + "learning_rate": 2.3687629539103676e-06, + "loss": 0.6703782081604004, + "step": 4471 + }, + { + "epoch": 1.5758590308370044, + "grad_norm": 1.6659443121840707, + "learning_rate": 2.3649989268805453e-06, + "loss": 0.5681235194206238, + "step": 4472 + }, + { + "epoch": 1.5762114537444933, + "grad_norm": 1.6009126465101926, + "learning_rate": 2.361237491695978e-06, + "loss": 0.611667811870575, + "step": 4473 + }, + { + "epoch": 1.5765638766519823, + "grad_norm": 1.7200740539010873, + "learning_rate": 2.3574786496335546e-06, + "loss": 0.5758671760559082, + "step": 4474 + }, + { + "epoch": 1.5769162995594712, + "grad_norm": 1.9125579541010735, + "learning_rate": 2.3537224019692863e-06, + "loss": 0.4865596294403076, + "step": 4475 + }, + { + "epoch": 1.5772687224669604, + "grad_norm": 1.8564502689111453, + "learning_rate": 2.3499687499782976e-06, + "loss": 0.6356204152107239, + "step": 4476 + }, + { + "epoch": 1.5776211453744493, + "grad_norm": 2.1421860610476022, + "learning_rate": 2.346217694934847e-06, + "loss": 0.7177166938781738, + "step": 4477 + }, + { + "epoch": 1.5779735682819385, + "grad_norm": 1.5480906826266605, + "learning_rate": 2.3424692381122882e-06, + "loss": 0.5727916955947876, + "step": 4478 + }, + { + "epoch": 1.5783259911894274, + "grad_norm": 1.8719733775312895, + "learning_rate": 2.3387233807831144e-06, + "loss": 0.4904511570930481, + "step": 4479 + }, + { + "epoch": 1.5786784140969163, + "grad_norm": 1.781780296857209, + "learning_rate": 2.3349801242189262e-06, + "loss": 0.6029622554779053, + "step": 4480 + }, + { + "epoch": 1.5790308370044053, + "grad_norm": 1.7377028122196188, + "learning_rate": 2.3312394696904404e-06, + "loss": 0.6462864875793457, + "step": 4481 + }, + { + "epoch": 1.5793832599118942, + "grad_norm": 2.2050402923740555, + "learning_rate": 2.327501418467495e-06, + "loss": 0.6000367403030396, + "step": 4482 + }, + { + "epoch": 1.5797356828193831, + "grad_norm": 1.8056795992302546, + "learning_rate": 2.3237659718190398e-06, + "loss": 0.5498829483985901, + "step": 4483 + }, + { + "epoch": 1.580088105726872, + "grad_norm": 1.9193344841770834, + "learning_rate": 2.320033131013142e-06, + "loss": 0.5445006489753723, + "step": 4484 + }, + { + "epoch": 1.5804405286343612, + "grad_norm": 1.737360484366453, + "learning_rate": 2.316302897316992e-06, + "loss": 0.4878338575363159, + "step": 4485 + }, + { + "epoch": 1.5807929515418502, + "grad_norm": 1.9395301127212525, + "learning_rate": 2.3125752719968763e-06, + "loss": 0.473583459854126, + "step": 4486 + }, + { + "epoch": 1.5811453744493393, + "grad_norm": 1.885736275905952, + "learning_rate": 2.308850256318218e-06, + "loss": 0.6530570983886719, + "step": 4487 + }, + { + "epoch": 1.5814977973568283, + "grad_norm": 1.9957270393411881, + "learning_rate": 2.30512785154554e-06, + "loss": 0.6925215721130371, + "step": 4488 + }, + { + "epoch": 1.5818502202643172, + "grad_norm": 2.319012517660613, + "learning_rate": 2.3014080589424837e-06, + "loss": 0.6210705637931824, + "step": 4489 + }, + { + "epoch": 1.5822026431718061, + "grad_norm": 1.9814470349632005, + "learning_rate": 2.2976908797718013e-06, + "loss": 0.5843231678009033, + "step": 4490 + }, + { + "epoch": 1.582555066079295, + "grad_norm": 1.8411432529202023, + "learning_rate": 2.2939763152953576e-06, + "loss": 0.7014307379722595, + "step": 4491 + }, + { + "epoch": 1.582907488986784, + "grad_norm": 2.432500927945977, + "learning_rate": 2.2902643667741386e-06, + "loss": 0.563744843006134, + "step": 4492 + }, + { + "epoch": 1.5832599118942732, + "grad_norm": 2.0467865020897227, + "learning_rate": 2.286555035468233e-06, + "loss": 0.6067275404930115, + "step": 4493 + }, + { + "epoch": 1.5836123348017621, + "grad_norm": 1.4471777617782167, + "learning_rate": 2.282848322636836e-06, + "loss": 0.5471328496932983, + "step": 4494 + }, + { + "epoch": 1.583964757709251, + "grad_norm": 1.8188988721843682, + "learning_rate": 2.2791442295382693e-06, + "loss": 0.4994550943374634, + "step": 4495 + }, + { + "epoch": 1.5843171806167402, + "grad_norm": 1.9672025899108128, + "learning_rate": 2.275442757429954e-06, + "loss": 0.6064262390136719, + "step": 4496 + }, + { + "epoch": 1.5846696035242291, + "grad_norm": 1.8109350365291292, + "learning_rate": 2.2717439075684268e-06, + "loss": 0.5119039416313171, + "step": 4497 + }, + { + "epoch": 1.585022026431718, + "grad_norm": 2.2031235285356883, + "learning_rate": 2.26804768120933e-06, + "loss": 0.7276502251625061, + "step": 4498 + }, + { + "epoch": 1.585374449339207, + "grad_norm": 2.0480046358265827, + "learning_rate": 2.264354079607416e-06, + "loss": 0.6175409555435181, + "step": 4499 + }, + { + "epoch": 1.585726872246696, + "grad_norm": 2.165546737643913, + "learning_rate": 2.2606631040165517e-06, + "loss": 0.6289592981338501, + "step": 4500 + }, + { + "epoch": 1.5860792951541849, + "grad_norm": 1.626913781336784, + "learning_rate": 2.2569747556897103e-06, + "loss": 0.5802761316299438, + "step": 4501 + }, + { + "epoch": 1.586431718061674, + "grad_norm": 1.6717876401169283, + "learning_rate": 2.2532890358789604e-06, + "loss": 0.5883978605270386, + "step": 4502 + }, + { + "epoch": 1.586784140969163, + "grad_norm": 1.7433478934489002, + "learning_rate": 2.2496059458355e-06, + "loss": 0.6915061473846436, + "step": 4503 + }, + { + "epoch": 1.5871365638766521, + "grad_norm": 1.7904879000491816, + "learning_rate": 2.2459254868096194e-06, + "loss": 0.6255539655685425, + "step": 4504 + }, + { + "epoch": 1.587488986784141, + "grad_norm": 2.0290072373401706, + "learning_rate": 2.2422476600507203e-06, + "loss": 0.6788307428359985, + "step": 4505 + }, + { + "epoch": 1.58784140969163, + "grad_norm": 1.8646329547804459, + "learning_rate": 2.2385724668073104e-06, + "loss": 0.5651443004608154, + "step": 4506 + }, + { + "epoch": 1.588193832599119, + "grad_norm": 1.6858252262208455, + "learning_rate": 2.2348999083270005e-06, + "loss": 0.5308901071548462, + "step": 4507 + }, + { + "epoch": 1.5885462555066079, + "grad_norm": 2.3264820621642084, + "learning_rate": 2.2312299858565156e-06, + "loss": 0.60570228099823, + "step": 4508 + }, + { + "epoch": 1.5888986784140968, + "grad_norm": 1.8330509972931788, + "learning_rate": 2.22756270064168e-06, + "loss": 0.6544185876846313, + "step": 4509 + }, + { + "epoch": 1.5892511013215858, + "grad_norm": 1.7565673285953047, + "learning_rate": 2.2238980539274156e-06, + "loss": 0.667883038520813, + "step": 4510 + }, + { + "epoch": 1.589603524229075, + "grad_norm": 1.7707733782287267, + "learning_rate": 2.2202360469577622e-06, + "loss": 0.647671103477478, + "step": 4511 + }, + { + "epoch": 1.5899559471365639, + "grad_norm": 1.8031539733499908, + "learning_rate": 2.216576680975856e-06, + "loss": 0.6990867257118225, + "step": 4512 + }, + { + "epoch": 1.590308370044053, + "grad_norm": 1.6913080596921681, + "learning_rate": 2.212919957223938e-06, + "loss": 0.6292023658752441, + "step": 4513 + }, + { + "epoch": 1.590660792951542, + "grad_norm": 2.0512598736304763, + "learning_rate": 2.2092658769433504e-06, + "loss": 0.638721227645874, + "step": 4514 + }, + { + "epoch": 1.5910132158590309, + "grad_norm": 2.0710919586830365, + "learning_rate": 2.2056144413745396e-06, + "loss": 0.5622225403785706, + "step": 4515 + }, + { + "epoch": 1.5913656387665198, + "grad_norm": 1.9225600729192178, + "learning_rate": 2.2019656517570576e-06, + "loss": 0.44093507528305054, + "step": 4516 + }, + { + "epoch": 1.5917180616740088, + "grad_norm": 1.9689195876449703, + "learning_rate": 2.198319509329556e-06, + "loss": 0.6889619827270508, + "step": 4517 + }, + { + "epoch": 1.5920704845814977, + "grad_norm": 1.8723694409082583, + "learning_rate": 2.1946760153297773e-06, + "loss": 0.5873552560806274, + "step": 4518 + }, + { + "epoch": 1.5924229074889866, + "grad_norm": 2.3733819724747245, + "learning_rate": 2.191035170994584e-06, + "loss": 0.7172325849533081, + "step": 4519 + }, + { + "epoch": 1.5927753303964758, + "grad_norm": 1.5631566998768178, + "learning_rate": 2.187396977559927e-06, + "loss": 0.520845890045166, + "step": 4520 + }, + { + "epoch": 1.5931277533039647, + "grad_norm": 1.5657344992000655, + "learning_rate": 2.1837614362608574e-06, + "loss": 0.5241606831550598, + "step": 4521 + }, + { + "epoch": 1.5934801762114539, + "grad_norm": 2.0290302307971433, + "learning_rate": 2.1801285483315303e-06, + "loss": 0.583808422088623, + "step": 4522 + }, + { + "epoch": 1.5938325991189428, + "grad_norm": 1.829890026298915, + "learning_rate": 2.1764983150051955e-06, + "loss": 0.4648814797401428, + "step": 4523 + }, + { + "epoch": 1.5941850220264318, + "grad_norm": 1.9603824667877958, + "learning_rate": 2.1728707375142087e-06, + "loss": 0.590090274810791, + "step": 4524 + }, + { + "epoch": 1.5945374449339207, + "grad_norm": 2.0292397946897527, + "learning_rate": 2.16924581709002e-06, + "loss": 0.6554102897644043, + "step": 4525 + }, + { + "epoch": 1.5948898678414096, + "grad_norm": 2.011864917811992, + "learning_rate": 2.1656235549631677e-06, + "loss": 0.5880511999130249, + "step": 4526 + }, + { + "epoch": 1.5952422907488986, + "grad_norm": 1.6246832017365502, + "learning_rate": 2.1620039523633074e-06, + "loss": 0.5779908299446106, + "step": 4527 + }, + { + "epoch": 1.5955947136563877, + "grad_norm": 1.9147900218294176, + "learning_rate": 2.1583870105191775e-06, + "loss": 0.5030412673950195, + "step": 4528 + }, + { + "epoch": 1.5959471365638767, + "grad_norm": 1.9632795275127009, + "learning_rate": 2.1547727306586173e-06, + "loss": 0.5667461156845093, + "step": 4529 + }, + { + "epoch": 1.5962995594713656, + "grad_norm": 2.3190730605108882, + "learning_rate": 2.151161114008563e-06, + "loss": 0.6820607781410217, + "step": 4530 + }, + { + "epoch": 1.5966519823788548, + "grad_norm": 1.7640709477354637, + "learning_rate": 2.1475521617950425e-06, + "loss": 0.6165209412574768, + "step": 4531 + }, + { + "epoch": 1.5970044052863437, + "grad_norm": 1.897918487033638, + "learning_rate": 2.1439458752431887e-06, + "loss": 0.5987168550491333, + "step": 4532 + }, + { + "epoch": 1.5973568281938326, + "grad_norm": 1.8946893490374197, + "learning_rate": 2.1403422555772226e-06, + "loss": 0.5161086320877075, + "step": 4533 + }, + { + "epoch": 1.5977092511013216, + "grad_norm": 1.817150642667859, + "learning_rate": 2.1367413040204543e-06, + "loss": 0.5216903686523438, + "step": 4534 + }, + { + "epoch": 1.5980616740088105, + "grad_norm": 1.7820775067820096, + "learning_rate": 2.133143021795302e-06, + "loss": 0.5664666891098022, + "step": 4535 + }, + { + "epoch": 1.5984140969162994, + "grad_norm": 1.8205676682468495, + "learning_rate": 2.129547410123268e-06, + "loss": 0.501051127910614, + "step": 4536 + }, + { + "epoch": 1.5987665198237886, + "grad_norm": 1.5799563385798543, + "learning_rate": 2.1259544702249515e-06, + "loss": 0.5466792583465576, + "step": 4537 + }, + { + "epoch": 1.5991189427312775, + "grad_norm": 1.9007615560911546, + "learning_rate": 2.122364203320043e-06, + "loss": 0.5295613408088684, + "step": 4538 + }, + { + "epoch": 1.5994713656387667, + "grad_norm": 1.6670646942482272, + "learning_rate": 2.1187766106273224e-06, + "loss": 0.5406922101974487, + "step": 4539 + }, + { + "epoch": 1.5998237885462556, + "grad_norm": 2.0700620230157125, + "learning_rate": 2.1151916933646764e-06, + "loss": 0.5908178687095642, + "step": 4540 + }, + { + "epoch": 1.6001762114537446, + "grad_norm": 1.8405525752725544, + "learning_rate": 2.1116094527490594e-06, + "loss": 0.6207743883132935, + "step": 4541 + }, + { + "epoch": 1.6005286343612335, + "grad_norm": 2.7642600887250652, + "learning_rate": 2.1080298899965413e-06, + "loss": 0.5655614137649536, + "step": 4542 + }, + { + "epoch": 1.6008810572687224, + "grad_norm": 1.5764846584358823, + "learning_rate": 2.104453006322268e-06, + "loss": 0.6019319295883179, + "step": 4543 + }, + { + "epoch": 1.6012334801762114, + "grad_norm": 1.8499785252270624, + "learning_rate": 2.1008788029404794e-06, + "loss": 0.6109766364097595, + "step": 4544 + }, + { + "epoch": 1.6015859030837003, + "grad_norm": 1.8285934792669327, + "learning_rate": 2.0973072810645078e-06, + "loss": 0.5309078693389893, + "step": 4545 + }, + { + "epoch": 1.6019383259911895, + "grad_norm": 1.7116030885611606, + "learning_rate": 2.093738441906774e-06, + "loss": 0.5440298318862915, + "step": 4546 + }, + { + "epoch": 1.6022907488986784, + "grad_norm": 1.6012955775631803, + "learning_rate": 2.0901722866787842e-06, + "loss": 0.46502384543418884, + "step": 4547 + }, + { + "epoch": 1.6026431718061676, + "grad_norm": 1.7999501734847188, + "learning_rate": 2.086608816591146e-06, + "loss": 0.4822906255722046, + "step": 4548 + }, + { + "epoch": 1.6029955947136565, + "grad_norm": 1.8169323717501906, + "learning_rate": 2.083048032853534e-06, + "loss": 0.6382625699043274, + "step": 4549 + }, + { + "epoch": 1.6033480176211454, + "grad_norm": 1.7542851479568786, + "learning_rate": 2.0794899366747334e-06, + "loss": 0.6070914268493652, + "step": 4550 + }, + { + "epoch": 1.6037004405286344, + "grad_norm": 1.8496689505105712, + "learning_rate": 2.0759345292626045e-06, + "loss": 0.5953283309936523, + "step": 4551 + }, + { + "epoch": 1.6040528634361233, + "grad_norm": 1.6448363622587787, + "learning_rate": 2.0723818118240958e-06, + "loss": 0.47553640604019165, + "step": 4552 + }, + { + "epoch": 1.6044052863436122, + "grad_norm": 2.007835441279153, + "learning_rate": 2.0688317855652463e-06, + "loss": 0.7020712494850159, + "step": 4553 + }, + { + "epoch": 1.6047577092511012, + "grad_norm": 1.739770344308816, + "learning_rate": 2.0652844516911776e-06, + "loss": 0.5998836159706116, + "step": 4554 + }, + { + "epoch": 1.6051101321585903, + "grad_norm": 1.7690620328907303, + "learning_rate": 2.0617398114060983e-06, + "loss": 0.6501786708831787, + "step": 4555 + }, + { + "epoch": 1.6054625550660793, + "grad_norm": 1.7628232586759778, + "learning_rate": 2.0581978659133097e-06, + "loss": 0.6444278955459595, + "step": 4556 + }, + { + "epoch": 1.6058149779735684, + "grad_norm": 1.8812364367093761, + "learning_rate": 2.0546586164151827e-06, + "loss": 0.6756579875946045, + "step": 4557 + }, + { + "epoch": 1.6061674008810574, + "grad_norm": 1.9541887465796286, + "learning_rate": 2.051122064113189e-06, + "loss": 0.6043737530708313, + "step": 4558 + }, + { + "epoch": 1.6065198237885463, + "grad_norm": 1.7992795463772795, + "learning_rate": 2.047588210207877e-06, + "loss": 0.6504104137420654, + "step": 4559 + }, + { + "epoch": 1.6068722466960352, + "grad_norm": 1.8447157864854533, + "learning_rate": 2.044057055898879e-06, + "loss": 0.6586685180664062, + "step": 4560 + }, + { + "epoch": 1.6072246696035242, + "grad_norm": 1.6895598009184531, + "learning_rate": 2.0405286023849125e-06, + "loss": 0.4463368058204651, + "step": 4561 + }, + { + "epoch": 1.607577092511013, + "grad_norm": 1.626067629091748, + "learning_rate": 2.037002850863777e-06, + "loss": 0.5208157896995544, + "step": 4562 + }, + { + "epoch": 1.607929515418502, + "grad_norm": 2.325947552099387, + "learning_rate": 2.033479802532354e-06, + "loss": 0.612602174282074, + "step": 4563 + }, + { + "epoch": 1.6082819383259912, + "grad_norm": 1.8677335810734068, + "learning_rate": 2.0299594585866166e-06, + "loss": 0.6871482133865356, + "step": 4564 + }, + { + "epoch": 1.6086343612334801, + "grad_norm": 2.1450630320575863, + "learning_rate": 2.0264418202215998e-06, + "loss": 0.5770177245140076, + "step": 4565 + }, + { + "epoch": 1.6089867841409693, + "grad_norm": 2.0018570918486263, + "learning_rate": 2.0229268886314413e-06, + "loss": 0.600841224193573, + "step": 4566 + }, + { + "epoch": 1.6093392070484582, + "grad_norm": 1.4951834973656204, + "learning_rate": 2.0194146650093494e-06, + "loss": 0.47742071747779846, + "step": 4567 + }, + { + "epoch": 1.6096916299559472, + "grad_norm": 1.932667797658379, + "learning_rate": 2.015905150547612e-06, + "loss": 0.5528711080551147, + "step": 4568 + }, + { + "epoch": 1.610044052863436, + "grad_norm": 1.7893968437532208, + "learning_rate": 2.0123983464376028e-06, + "loss": 0.6892603635787964, + "step": 4569 + }, + { + "epoch": 1.610396475770925, + "grad_norm": 2.0432539431091405, + "learning_rate": 2.0088942538697687e-06, + "loss": 0.593653678894043, + "step": 4570 + }, + { + "epoch": 1.610748898678414, + "grad_norm": 1.913622035178548, + "learning_rate": 2.005392874033646e-06, + "loss": 0.5570813417434692, + "step": 4571 + }, + { + "epoch": 1.6111013215859031, + "grad_norm": 1.7912413841249368, + "learning_rate": 2.0018942081178426e-06, + "loss": 0.6723357439041138, + "step": 4572 + }, + { + "epoch": 1.611453744493392, + "grad_norm": 1.8833118579628767, + "learning_rate": 1.9983982573100413e-06, + "loss": 0.5333940982818604, + "step": 4573 + }, + { + "epoch": 1.611806167400881, + "grad_norm": 2.1881508790927358, + "learning_rate": 1.9949050227970148e-06, + "loss": 0.6404193043708801, + "step": 4574 + }, + { + "epoch": 1.6121585903083702, + "grad_norm": 1.9103565569987608, + "learning_rate": 1.991414505764605e-06, + "loss": 0.6831241250038147, + "step": 4575 + }, + { + "epoch": 1.612511013215859, + "grad_norm": 2.3229832844307063, + "learning_rate": 1.9879267073977337e-06, + "loss": 0.6741847991943359, + "step": 4576 + }, + { + "epoch": 1.612863436123348, + "grad_norm": 1.843434925588856, + "learning_rate": 1.9844416288804004e-06, + "loss": 0.5234787464141846, + "step": 4577 + }, + { + "epoch": 1.613215859030837, + "grad_norm": 1.931234115746558, + "learning_rate": 1.9809592713956782e-06, + "loss": 0.6462803483009338, + "step": 4578 + }, + { + "epoch": 1.613568281938326, + "grad_norm": 2.495392945939654, + "learning_rate": 1.977479636125724e-06, + "loss": 0.612025797367096, + "step": 4579 + }, + { + "epoch": 1.6139207048458148, + "grad_norm": 1.6414504893846202, + "learning_rate": 1.9740027242517668e-06, + "loss": 0.5065322518348694, + "step": 4580 + }, + { + "epoch": 1.614273127753304, + "grad_norm": 1.9613495904560583, + "learning_rate": 1.9705285369540994e-06, + "loss": 0.4986911714076996, + "step": 4581 + }, + { + "epoch": 1.614625550660793, + "grad_norm": 2.1185650604413926, + "learning_rate": 1.967057075412111e-06, + "loss": 0.6030969619750977, + "step": 4582 + }, + { + "epoch": 1.614977973568282, + "grad_norm": 1.8032946015429019, + "learning_rate": 1.963588340804251e-06, + "loss": 0.6116718649864197, + "step": 4583 + }, + { + "epoch": 1.615330396475771, + "grad_norm": 1.9008591407855147, + "learning_rate": 1.960122334308047e-06, + "loss": 0.8064850568771362, + "step": 4584 + }, + { + "epoch": 1.61568281938326, + "grad_norm": 2.130250646945173, + "learning_rate": 1.9566590571000997e-06, + "loss": 0.7416974306106567, + "step": 4585 + }, + { + "epoch": 1.616035242290749, + "grad_norm": 2.0285944926888604, + "learning_rate": 1.9531985103560813e-06, + "loss": 0.48169833421707153, + "step": 4586 + }, + { + "epoch": 1.6163876651982378, + "grad_norm": 8.08226040018375, + "learning_rate": 1.949740695250746e-06, + "loss": 0.7766422629356384, + "step": 4587 + }, + { + "epoch": 1.6167400881057268, + "grad_norm": 1.6227557131714891, + "learning_rate": 1.9462856129579144e-06, + "loss": 0.3793888986110687, + "step": 4588 + }, + { + "epoch": 1.6170925110132157, + "grad_norm": 1.6662726387585254, + "learning_rate": 1.94283326465047e-06, + "loss": 0.6129955053329468, + "step": 4589 + }, + { + "epoch": 1.6174449339207049, + "grad_norm": 1.927411767174183, + "learning_rate": 1.9393836515003874e-06, + "loss": 0.7420347929000854, + "step": 4590 + }, + { + "epoch": 1.6177973568281938, + "grad_norm": 1.810002162071199, + "learning_rate": 1.9359367746786993e-06, + "loss": 0.49013108015060425, + "step": 4591 + }, + { + "epoch": 1.618149779735683, + "grad_norm": 1.8150752517575908, + "learning_rate": 1.932492635355513e-06, + "loss": 0.5198413133621216, + "step": 4592 + }, + { + "epoch": 1.618502202643172, + "grad_norm": 1.9402976415289777, + "learning_rate": 1.929051234700007e-06, + "loss": 0.6031092405319214, + "step": 4593 + }, + { + "epoch": 1.6188546255506608, + "grad_norm": 2.041490312444486, + "learning_rate": 1.9256125738804264e-06, + "loss": 0.6269406080245972, + "step": 4594 + }, + { + "epoch": 1.6192070484581498, + "grad_norm": 1.801972947869227, + "learning_rate": 1.922176654064096e-06, + "loss": 0.4518774747848511, + "step": 4595 + }, + { + "epoch": 1.6195594713656387, + "grad_norm": 1.8680481961289441, + "learning_rate": 1.9187434764174027e-06, + "loss": 0.6199424862861633, + "step": 4596 + }, + { + "epoch": 1.6199118942731277, + "grad_norm": 2.634014207343412, + "learning_rate": 1.9153130421057955e-06, + "loss": 0.5155355930328369, + "step": 4597 + }, + { + "epoch": 1.6202643171806166, + "grad_norm": 1.8081505074484028, + "learning_rate": 1.9118853522938087e-06, + "loss": 0.6188424229621887, + "step": 4598 + }, + { + "epoch": 1.6206167400881057, + "grad_norm": 1.8999856535081827, + "learning_rate": 1.908460408145033e-06, + "loss": 0.5807337164878845, + "step": 4599 + }, + { + "epoch": 1.6209691629955947, + "grad_norm": 1.6142171687185456, + "learning_rate": 1.9050382108221311e-06, + "loss": 0.5258378982543945, + "step": 4600 + }, + { + "epoch": 1.6213215859030838, + "grad_norm": 1.9194714558474444, + "learning_rate": 1.9016187614868308e-06, + "loss": 0.6612311601638794, + "step": 4601 + }, + { + "epoch": 1.6216740088105728, + "grad_norm": 1.7849999472385678, + "learning_rate": 1.8982020612999285e-06, + "loss": 0.611383855342865, + "step": 4602 + }, + { + "epoch": 1.6220264317180617, + "grad_norm": 1.7599275323638883, + "learning_rate": 1.894788111421294e-06, + "loss": 0.6111105680465698, + "step": 4603 + }, + { + "epoch": 1.6223788546255506, + "grad_norm": 2.061255928544227, + "learning_rate": 1.8913769130098504e-06, + "loss": 0.7554557919502258, + "step": 4604 + }, + { + "epoch": 1.6227312775330396, + "grad_norm": 1.7818402726516558, + "learning_rate": 1.887968467223591e-06, + "loss": 0.597324013710022, + "step": 4605 + }, + { + "epoch": 1.6230837004405285, + "grad_norm": 2.3192399293978014, + "learning_rate": 1.8845627752195839e-06, + "loss": 0.6232750415802002, + "step": 4606 + }, + { + "epoch": 1.6234361233480175, + "grad_norm": 1.7697166073683794, + "learning_rate": 1.8811598381539543e-06, + "loss": 0.45699936151504517, + "step": 4607 + }, + { + "epoch": 1.6237885462555066, + "grad_norm": 1.9980768091261172, + "learning_rate": 1.87775965718189e-06, + "loss": 0.5307953953742981, + "step": 4608 + }, + { + "epoch": 1.6241409691629956, + "grad_norm": 1.8817640717556428, + "learning_rate": 1.8743622334576495e-06, + "loss": 0.6013764142990112, + "step": 4609 + }, + { + "epoch": 1.6244933920704847, + "grad_norm": 2.0614740198183066, + "learning_rate": 1.8709675681345485e-06, + "loss": 0.5143340826034546, + "step": 4610 + }, + { + "epoch": 1.6248458149779736, + "grad_norm": 1.6895900050976231, + "learning_rate": 1.8675756623649788e-06, + "loss": 0.506861686706543, + "step": 4611 + }, + { + "epoch": 1.6251982378854626, + "grad_norm": 2.223885866703504, + "learning_rate": 1.8641865173003793e-06, + "loss": 0.6807849407196045, + "step": 4612 + }, + { + "epoch": 1.6255506607929515, + "grad_norm": 1.8930990565263293, + "learning_rate": 1.8608001340912573e-06, + "loss": 0.592629075050354, + "step": 4613 + }, + { + "epoch": 1.6259030837004405, + "grad_norm": 2.032831166123834, + "learning_rate": 1.8574165138871925e-06, + "loss": 0.5669249296188354, + "step": 4614 + }, + { + "epoch": 1.6262555066079294, + "grad_norm": 1.9071887451281335, + "learning_rate": 1.8540356578368135e-06, + "loss": 0.7123057246208191, + "step": 4615 + }, + { + "epoch": 1.6266079295154185, + "grad_norm": 1.7499585996323015, + "learning_rate": 1.8506575670878168e-06, + "loss": 0.5844429731369019, + "step": 4616 + }, + { + "epoch": 1.6269603524229075, + "grad_norm": 1.8176797951508414, + "learning_rate": 1.8472822427869597e-06, + "loss": 0.661457359790802, + "step": 4617 + }, + { + "epoch": 1.6273127753303964, + "grad_norm": 1.9714232511915755, + "learning_rate": 1.8439096860800565e-06, + "loss": 0.6944575905799866, + "step": 4618 + }, + { + "epoch": 1.6276651982378856, + "grad_norm": 1.9471855664955058, + "learning_rate": 1.8405398981119927e-06, + "loss": 0.5818712115287781, + "step": 4619 + }, + { + "epoch": 1.6280176211453745, + "grad_norm": 1.8573981084806426, + "learning_rate": 1.8371728800266964e-06, + "loss": 0.6373921632766724, + "step": 4620 + }, + { + "epoch": 1.6283700440528635, + "grad_norm": 1.8455409169726698, + "learning_rate": 1.8338086329671734e-06, + "loss": 0.4629862904548645, + "step": 4621 + }, + { + "epoch": 1.6287224669603524, + "grad_norm": 2.1547215929268306, + "learning_rate": 1.8304471580754779e-06, + "loss": 0.6537790894508362, + "step": 4622 + }, + { + "epoch": 1.6290748898678413, + "grad_norm": 1.9071168587624383, + "learning_rate": 1.8270884564927272e-06, + "loss": 0.527474582195282, + "step": 4623 + }, + { + "epoch": 1.6294273127753303, + "grad_norm": 1.9134019886674338, + "learning_rate": 1.8237325293590934e-06, + "loss": 0.48941463232040405, + "step": 4624 + }, + { + "epoch": 1.6297797356828194, + "grad_norm": 1.7797372995747724, + "learning_rate": 1.8203793778138123e-06, + "loss": 0.6276243925094604, + "step": 4625 + }, + { + "epoch": 1.6301321585903084, + "grad_norm": 2.175835170708709, + "learning_rate": 1.8170290029951708e-06, + "loss": 0.6339844465255737, + "step": 4626 + }, + { + "epoch": 1.6304845814977975, + "grad_norm": 1.8667689453086813, + "learning_rate": 1.813681406040524e-06, + "loss": 0.517188549041748, + "step": 4627 + }, + { + "epoch": 1.6308370044052865, + "grad_norm": 1.8956914399941025, + "learning_rate": 1.8103365880862667e-06, + "loss": 0.576552152633667, + "step": 4628 + }, + { + "epoch": 1.6311894273127754, + "grad_norm": 1.7936413452903872, + "learning_rate": 1.8069945502678688e-06, + "loss": 0.5703557729721069, + "step": 4629 + }, + { + "epoch": 1.6315418502202643, + "grad_norm": 1.9048409586347532, + "learning_rate": 1.8036552937198447e-06, + "loss": 0.538072943687439, + "step": 4630 + }, + { + "epoch": 1.6318942731277533, + "grad_norm": 1.6721149802212347, + "learning_rate": 1.8003188195757693e-06, + "loss": 0.4144761562347412, + "step": 4631 + }, + { + "epoch": 1.6322466960352422, + "grad_norm": 2.056410628146389, + "learning_rate": 1.7969851289682704e-06, + "loss": 0.5357951521873474, + "step": 4632 + }, + { + "epoch": 1.6325991189427311, + "grad_norm": 1.9601913826257962, + "learning_rate": 1.7936542230290333e-06, + "loss": 0.6158766746520996, + "step": 4633 + }, + { + "epoch": 1.6329515418502203, + "grad_norm": 2.018782202231636, + "learning_rate": 1.790326102888794e-06, + "loss": 0.7278525233268738, + "step": 4634 + }, + { + "epoch": 1.6333039647577092, + "grad_norm": 1.8937378067838377, + "learning_rate": 1.787000769677354e-06, + "loss": 0.5113881230354309, + "step": 4635 + }, + { + "epoch": 1.6336563876651984, + "grad_norm": 2.2218997592930987, + "learning_rate": 1.7836782245235485e-06, + "loss": 0.6247432827949524, + "step": 4636 + }, + { + "epoch": 1.6340088105726873, + "grad_norm": 1.9409043558834718, + "learning_rate": 1.7803584685552877e-06, + "loss": 0.513325572013855, + "step": 4637 + }, + { + "epoch": 1.6343612334801763, + "grad_norm": 2.023194297584799, + "learning_rate": 1.7770415028995213e-06, + "loss": 0.4980276823043823, + "step": 4638 + }, + { + "epoch": 1.6347136563876652, + "grad_norm": 1.8669544509684106, + "learning_rate": 1.7737273286822565e-06, + "loss": 0.5832515954971313, + "step": 4639 + }, + { + "epoch": 1.6350660792951541, + "grad_norm": 1.7519671458346908, + "learning_rate": 1.7704159470285532e-06, + "loss": 0.6030116081237793, + "step": 4640 + }, + { + "epoch": 1.635418502202643, + "grad_norm": 2.26980120712081, + "learning_rate": 1.7671073590625188e-06, + "loss": 0.5494866371154785, + "step": 4641 + }, + { + "epoch": 1.635770925110132, + "grad_norm": 1.8803060042220399, + "learning_rate": 1.7638015659073216e-06, + "loss": 0.617791473865509, + "step": 4642 + }, + { + "epoch": 1.6361233480176212, + "grad_norm": 1.8809591920257003, + "learning_rate": 1.760498568685175e-06, + "loss": 0.5213589668273926, + "step": 4643 + }, + { + "epoch": 1.63647577092511, + "grad_norm": 1.7835752431606857, + "learning_rate": 1.7571983685173367e-06, + "loss": 0.5114192962646484, + "step": 4644 + }, + { + "epoch": 1.6368281938325993, + "grad_norm": 1.8264916856765907, + "learning_rate": 1.7539009665241291e-06, + "loss": 0.6207156181335449, + "step": 4645 + }, + { + "epoch": 1.6371806167400882, + "grad_norm": 1.7037955383522276, + "learning_rate": 1.750606363824915e-06, + "loss": 0.5893350839614868, + "step": 4646 + }, + { + "epoch": 1.6375330396475771, + "grad_norm": 2.0239756750398077, + "learning_rate": 1.7473145615381092e-06, + "loss": 0.6453898549079895, + "step": 4647 + }, + { + "epoch": 1.637885462555066, + "grad_norm": 1.623565893456343, + "learning_rate": 1.7440255607811773e-06, + "loss": 0.5098680853843689, + "step": 4648 + }, + { + "epoch": 1.638237885462555, + "grad_norm": 1.9009179186379688, + "learning_rate": 1.7407393626706305e-06, + "loss": 0.5841408967971802, + "step": 4649 + }, + { + "epoch": 1.638590308370044, + "grad_norm": 1.8903189372223002, + "learning_rate": 1.7374559683220337e-06, + "loss": 0.5593127012252808, + "step": 4650 + }, + { + "epoch": 1.638942731277533, + "grad_norm": 1.9192509501465884, + "learning_rate": 1.7341753788499983e-06, + "loss": 0.6885190606117249, + "step": 4651 + }, + { + "epoch": 1.639295154185022, + "grad_norm": 2.019948918382337, + "learning_rate": 1.730897595368175e-06, + "loss": 0.6271092891693115, + "step": 4652 + }, + { + "epoch": 1.639647577092511, + "grad_norm": 1.8193728432309102, + "learning_rate": 1.7276226189892763e-06, + "loss": 0.6035536527633667, + "step": 4653 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 1.876741558260643, + "learning_rate": 1.724350450825052e-06, + "loss": 0.49980080127716064, + "step": 4654 + }, + { + "epoch": 1.640352422907489, + "grad_norm": 1.945483701689467, + "learning_rate": 1.721081091986303e-06, + "loss": 0.6056489944458008, + "step": 4655 + }, + { + "epoch": 1.640704845814978, + "grad_norm": 1.998934183218588, + "learning_rate": 1.717814543582873e-06, + "loss": 0.5611459016799927, + "step": 4656 + }, + { + "epoch": 1.641057268722467, + "grad_norm": 1.8501618159787931, + "learning_rate": 1.7145508067236515e-06, + "loss": 0.5655262470245361, + "step": 4657 + }, + { + "epoch": 1.6414096916299559, + "grad_norm": 2.17470073262635, + "learning_rate": 1.7112898825165814e-06, + "loss": 0.7793601751327515, + "step": 4658 + }, + { + "epoch": 1.6417621145374448, + "grad_norm": 1.8078904709838137, + "learning_rate": 1.7080317720686434e-06, + "loss": 0.6587018370628357, + "step": 4659 + }, + { + "epoch": 1.642114537444934, + "grad_norm": 2.0052578395520313, + "learning_rate": 1.7047764764858598e-06, + "loss": 0.5546305775642395, + "step": 4660 + }, + { + "epoch": 1.642466960352423, + "grad_norm": 2.2168924782846844, + "learning_rate": 1.7015239968733066e-06, + "loss": 0.6215736865997314, + "step": 4661 + }, + { + "epoch": 1.642819383259912, + "grad_norm": 1.95167913439103, + "learning_rate": 1.6982743343350983e-06, + "loss": 0.5772532224655151, + "step": 4662 + }, + { + "epoch": 1.643171806167401, + "grad_norm": 1.9049742666250684, + "learning_rate": 1.6950274899743947e-06, + "loss": 0.567034900188446, + "step": 4663 + }, + { + "epoch": 1.64352422907489, + "grad_norm": 1.6486603082479945, + "learning_rate": 1.6917834648933985e-06, + "loss": 0.5306716561317444, + "step": 4664 + }, + { + "epoch": 1.6438766519823789, + "grad_norm": 1.923372734442966, + "learning_rate": 1.688542260193351e-06, + "loss": 0.6691634654998779, + "step": 4665 + }, + { + "epoch": 1.6442290748898678, + "grad_norm": 1.9073972200097022, + "learning_rate": 1.6853038769745466e-06, + "loss": 0.6071977615356445, + "step": 4666 + }, + { + "epoch": 1.6445814977973567, + "grad_norm": 3.0113580201176355, + "learning_rate": 1.6820683163363161e-06, + "loss": 0.743544340133667, + "step": 4667 + }, + { + "epoch": 1.6449339207048457, + "grad_norm": 2.2198521832647864, + "learning_rate": 1.6788355793770238e-06, + "loss": 0.5745127201080322, + "step": 4668 + }, + { + "epoch": 1.6452863436123348, + "grad_norm": 2.083730313741091, + "learning_rate": 1.6756056671940902e-06, + "loss": 0.5153336524963379, + "step": 4669 + }, + { + "epoch": 1.6456387665198238, + "grad_norm": 1.888215895134721, + "learning_rate": 1.6723785808839666e-06, + "loss": 0.5780388116836548, + "step": 4670 + }, + { + "epoch": 1.645991189427313, + "grad_norm": 1.975333041709577, + "learning_rate": 1.6691543215421513e-06, + "loss": 0.601921796798706, + "step": 4671 + }, + { + "epoch": 1.6463436123348019, + "grad_norm": 1.8402715148458082, + "learning_rate": 1.6659328902631766e-06, + "loss": 0.6636123657226562, + "step": 4672 + }, + { + "epoch": 1.6466960352422908, + "grad_norm": 1.804292320266694, + "learning_rate": 1.6627142881406188e-06, + "loss": 0.45225393772125244, + "step": 4673 + }, + { + "epoch": 1.6470484581497797, + "grad_norm": 1.862693343451114, + "learning_rate": 1.6594985162670984e-06, + "loss": 0.6406756043434143, + "step": 4674 + }, + { + "epoch": 1.6474008810572687, + "grad_norm": 2.11645792406816, + "learning_rate": 1.6562855757342632e-06, + "loss": 0.6735906600952148, + "step": 4675 + }, + { + "epoch": 1.6477533039647576, + "grad_norm": 1.9503356292211693, + "learning_rate": 1.6530754676328064e-06, + "loss": 0.515188992023468, + "step": 4676 + }, + { + "epoch": 1.6481057268722465, + "grad_norm": 1.721977079638204, + "learning_rate": 1.6498681930524652e-06, + "loss": 0.5976129174232483, + "step": 4677 + }, + { + "epoch": 1.6484581497797357, + "grad_norm": 1.9285425022468947, + "learning_rate": 1.6466637530820074e-06, + "loss": 0.7367427945137024, + "step": 4678 + }, + { + "epoch": 1.6488105726872246, + "grad_norm": 2.073959448612198, + "learning_rate": 1.6434621488092385e-06, + "loss": 0.5173717737197876, + "step": 4679 + }, + { + "epoch": 1.6491629955947138, + "grad_norm": 2.1289983497571745, + "learning_rate": 1.6402633813210056e-06, + "loss": 0.7961066961288452, + "step": 4680 + }, + { + "epoch": 1.6495154185022027, + "grad_norm": 2.1150632325299488, + "learning_rate": 1.637067451703187e-06, + "loss": 0.8271595239639282, + "step": 4681 + }, + { + "epoch": 1.6498678414096917, + "grad_norm": 1.9513356704584446, + "learning_rate": 1.6338743610407103e-06, + "loss": 0.6818888783454895, + "step": 4682 + }, + { + "epoch": 1.6502202643171806, + "grad_norm": 2.182931567425792, + "learning_rate": 1.6306841104175219e-06, + "loss": 0.5168178677558899, + "step": 4683 + }, + { + "epoch": 1.6505726872246695, + "grad_norm": 1.8122401400933128, + "learning_rate": 1.627496700916612e-06, + "loss": 0.5792043209075928, + "step": 4684 + }, + { + "epoch": 1.6509251101321585, + "grad_norm": 1.8346977982265331, + "learning_rate": 1.624312133620013e-06, + "loss": 0.6099069118499756, + "step": 4685 + }, + { + "epoch": 1.6512775330396474, + "grad_norm": 1.7489569966562013, + "learning_rate": 1.6211304096087832e-06, + "loss": 0.4562867283821106, + "step": 4686 + }, + { + "epoch": 1.6516299559471366, + "grad_norm": 1.767208393167573, + "learning_rate": 1.61795152996302e-06, + "loss": 0.48648780584335327, + "step": 4687 + }, + { + "epoch": 1.6519823788546255, + "grad_norm": 1.619888597224146, + "learning_rate": 1.6147754957618533e-06, + "loss": 0.5351820588111877, + "step": 4688 + }, + { + "epoch": 1.6523348017621147, + "grad_norm": 1.5845894367063569, + "learning_rate": 1.6116023080834442e-06, + "loss": 0.4646923542022705, + "step": 4689 + }, + { + "epoch": 1.6526872246696036, + "grad_norm": 2.0533783205545304, + "learning_rate": 1.608431968005001e-06, + "loss": 0.6257984638214111, + "step": 4690 + }, + { + "epoch": 1.6530396475770925, + "grad_norm": 1.6714052981831828, + "learning_rate": 1.605264476602747e-06, + "loss": 0.5646224021911621, + "step": 4691 + }, + { + "epoch": 1.6533920704845815, + "grad_norm": 1.9704920715227376, + "learning_rate": 1.6020998349519457e-06, + "loss": 0.6074661612510681, + "step": 4692 + }, + { + "epoch": 1.6537444933920704, + "grad_norm": 2.119532209280586, + "learning_rate": 1.598938044126901e-06, + "loss": 0.703096866607666, + "step": 4693 + }, + { + "epoch": 1.6540969162995593, + "grad_norm": 2.0281924961410436, + "learning_rate": 1.5957791052009397e-06, + "loss": 0.6677542924880981, + "step": 4694 + }, + { + "epoch": 1.6544493392070485, + "grad_norm": 2.2554606939897313, + "learning_rate": 1.5926230192464232e-06, + "loss": 0.755639910697937, + "step": 4695 + }, + { + "epoch": 1.6548017621145374, + "grad_norm": 2.0543326225263705, + "learning_rate": 1.5894697873347442e-06, + "loss": 0.7008202075958252, + "step": 4696 + }, + { + "epoch": 1.6551541850220264, + "grad_norm": 1.7892378339322623, + "learning_rate": 1.5863194105363244e-06, + "loss": 0.5049681067466736, + "step": 4697 + }, + { + "epoch": 1.6555066079295155, + "grad_norm": 1.8394208195845667, + "learning_rate": 1.583171889920626e-06, + "loss": 0.7415407299995422, + "step": 4698 + }, + { + "epoch": 1.6558590308370045, + "grad_norm": 2.0103355889821404, + "learning_rate": 1.5800272265561256e-06, + "loss": 0.7949470281600952, + "step": 4699 + }, + { + "epoch": 1.6562114537444934, + "grad_norm": 2.2401604191268456, + "learning_rate": 1.5768854215103435e-06, + "loss": 0.5892510414123535, + "step": 4700 + }, + { + "epoch": 1.6565638766519823, + "grad_norm": 2.1732638193025076, + "learning_rate": 1.5737464758498243e-06, + "loss": 0.5357394814491272, + "step": 4701 + }, + { + "epoch": 1.6569162995594713, + "grad_norm": 1.92797804038562, + "learning_rate": 1.5706103906401416e-06, + "loss": 0.6078016757965088, + "step": 4702 + }, + { + "epoch": 1.6572687224669602, + "grad_norm": 2.019695184899454, + "learning_rate": 1.5674771669459e-06, + "loss": 0.5858899354934692, + "step": 4703 + }, + { + "epoch": 1.6576211453744494, + "grad_norm": 1.819706102358174, + "learning_rate": 1.5643468058307287e-06, + "loss": 0.5783329010009766, + "step": 4704 + }, + { + "epoch": 1.6579735682819383, + "grad_norm": 1.8104985438999985, + "learning_rate": 1.561219308357288e-06, + "loss": 0.5412800312042236, + "step": 4705 + }, + { + "epoch": 1.6583259911894275, + "grad_norm": 1.5193820753894371, + "learning_rate": 1.5580946755872727e-06, + "loss": 0.5609365701675415, + "step": 4706 + }, + { + "epoch": 1.6586784140969164, + "grad_norm": 2.2157168701611427, + "learning_rate": 1.554972908581388e-06, + "loss": 0.45193177461624146, + "step": 4707 + }, + { + "epoch": 1.6590308370044053, + "grad_norm": 1.885008861796424, + "learning_rate": 1.5518540083993838e-06, + "loss": 0.6402257680892944, + "step": 4708 + }, + { + "epoch": 1.6593832599118943, + "grad_norm": 1.906792902482494, + "learning_rate": 1.5487379761000276e-06, + "loss": 0.5956071615219116, + "step": 4709 + }, + { + "epoch": 1.6597356828193832, + "grad_norm": 1.5229764109639101, + "learning_rate": 1.5456248127411156e-06, + "loss": 0.5975273847579956, + "step": 4710 + }, + { + "epoch": 1.6600881057268722, + "grad_norm": 2.2860844716103514, + "learning_rate": 1.54251451937947e-06, + "loss": 0.6914929151535034, + "step": 4711 + }, + { + "epoch": 1.660440528634361, + "grad_norm": 2.141875122923791, + "learning_rate": 1.5394070970709384e-06, + "loss": 0.5867592096328735, + "step": 4712 + }, + { + "epoch": 1.6607929515418502, + "grad_norm": 1.96612759503979, + "learning_rate": 1.5363025468703917e-06, + "loss": 0.6448687314987183, + "step": 4713 + }, + { + "epoch": 1.6611453744493392, + "grad_norm": 1.8452223088884994, + "learning_rate": 1.5332008698317348e-06, + "loss": 0.5870485305786133, + "step": 4714 + }, + { + "epoch": 1.6614977973568283, + "grad_norm": 1.9043935409080608, + "learning_rate": 1.5301020670078803e-06, + "loss": 0.6336855888366699, + "step": 4715 + }, + { + "epoch": 1.6618502202643173, + "grad_norm": 1.9247468731228787, + "learning_rate": 1.527006139450784e-06, + "loss": 0.5924787521362305, + "step": 4716 + }, + { + "epoch": 1.6622026431718062, + "grad_norm": 1.5860620334804822, + "learning_rate": 1.523913088211415e-06, + "loss": 0.5817830562591553, + "step": 4717 + }, + { + "epoch": 1.6625550660792952, + "grad_norm": 1.8285246452015176, + "learning_rate": 1.5208229143397657e-06, + "loss": 0.5836390852928162, + "step": 4718 + }, + { + "epoch": 1.662907488986784, + "grad_norm": 1.5094327417455158, + "learning_rate": 1.5177356188848558e-06, + "loss": 0.47110515832901, + "step": 4719 + }, + { + "epoch": 1.663259911894273, + "grad_norm": 2.018838906344594, + "learning_rate": 1.5146512028947225e-06, + "loss": 0.6376635432243347, + "step": 4720 + }, + { + "epoch": 1.663612334801762, + "grad_norm": 1.7847798861513196, + "learning_rate": 1.5115696674164349e-06, + "loss": 0.6399784088134766, + "step": 4721 + }, + { + "epoch": 1.6639647577092511, + "grad_norm": 2.2125247577405127, + "learning_rate": 1.5084910134960773e-06, + "loss": 0.5891954898834229, + "step": 4722 + }, + { + "epoch": 1.66431718061674, + "grad_norm": 1.5827717360956535, + "learning_rate": 1.5054152421787505e-06, + "loss": 0.6358054876327515, + "step": 4723 + }, + { + "epoch": 1.6646696035242292, + "grad_norm": 1.855029533228232, + "learning_rate": 1.5023423545085892e-06, + "loss": 0.5072367191314697, + "step": 4724 + }, + { + "epoch": 1.6650220264317181, + "grad_norm": 1.8866512172900913, + "learning_rate": 1.4992723515287423e-06, + "loss": 0.5549881458282471, + "step": 4725 + }, + { + "epoch": 1.665374449339207, + "grad_norm": 1.5386308243299962, + "learning_rate": 1.4962052342813804e-06, + "loss": 0.4833364188671112, + "step": 4726 + }, + { + "epoch": 1.665726872246696, + "grad_norm": 1.837283227568624, + "learning_rate": 1.4931410038076922e-06, + "loss": 0.6183017492294312, + "step": 4727 + }, + { + "epoch": 1.666079295154185, + "grad_norm": 1.8220970545699078, + "learning_rate": 1.4900796611478885e-06, + "loss": 0.4956335127353668, + "step": 4728 + }, + { + "epoch": 1.666431718061674, + "grad_norm": 2.0221134241832552, + "learning_rate": 1.4870212073412027e-06, + "loss": 0.7345337271690369, + "step": 4729 + }, + { + "epoch": 1.6667841409691628, + "grad_norm": 1.5143426871240313, + "learning_rate": 1.4839656434258864e-06, + "loss": 0.5324833393096924, + "step": 4730 + }, + { + "epoch": 1.667136563876652, + "grad_norm": 1.7373474898452002, + "learning_rate": 1.4809129704392e-06, + "loss": 0.5702322125434875, + "step": 4731 + }, + { + "epoch": 1.667488986784141, + "grad_norm": 1.7374551868496027, + "learning_rate": 1.4778631894174389e-06, + "loss": 0.46188884973526, + "step": 4732 + }, + { + "epoch": 1.66784140969163, + "grad_norm": 1.76911142349316, + "learning_rate": 1.474816301395906e-06, + "loss": 0.5505924224853516, + "step": 4733 + }, + { + "epoch": 1.668193832599119, + "grad_norm": 1.9422422566247162, + "learning_rate": 1.4717723074089251e-06, + "loss": 0.5889710187911987, + "step": 4734 + }, + { + "epoch": 1.668546255506608, + "grad_norm": 2.1059796200005825, + "learning_rate": 1.4687312084898387e-06, + "loss": 0.5794551372528076, + "step": 4735 + }, + { + "epoch": 1.668898678414097, + "grad_norm": 1.8650983467603144, + "learning_rate": 1.4656930056710006e-06, + "loss": 0.567146897315979, + "step": 4736 + }, + { + "epoch": 1.6692511013215858, + "grad_norm": 2.0850787713289067, + "learning_rate": 1.4626576999837938e-06, + "loss": 0.5330451130867004, + "step": 4737 + }, + { + "epoch": 1.6696035242290748, + "grad_norm": 1.868870689701364, + "learning_rate": 1.459625292458604e-06, + "loss": 0.5631227493286133, + "step": 4738 + }, + { + "epoch": 1.669955947136564, + "grad_norm": 1.8773546830623118, + "learning_rate": 1.456595784124839e-06, + "loss": 0.5145374536514282, + "step": 4739 + }, + { + "epoch": 1.6703083700440529, + "grad_norm": 1.925388120075487, + "learning_rate": 1.453569176010927e-06, + "loss": 0.59378582239151, + "step": 4740 + }, + { + "epoch": 1.6706607929515418, + "grad_norm": 2.053494266916917, + "learning_rate": 1.4505454691443043e-06, + "loss": 0.5827980041503906, + "step": 4741 + }, + { + "epoch": 1.671013215859031, + "grad_norm": 1.6369511357690396, + "learning_rate": 1.4475246645514274e-06, + "loss": 0.5270858407020569, + "step": 4742 + }, + { + "epoch": 1.67136563876652, + "grad_norm": 2.2160806515473186, + "learning_rate": 1.4445067632577625e-06, + "loss": 0.4708535373210907, + "step": 4743 + }, + { + "epoch": 1.6717180616740088, + "grad_norm": 1.6498079624073576, + "learning_rate": 1.4414917662877924e-06, + "loss": 0.5505239963531494, + "step": 4744 + }, + { + "epoch": 1.6720704845814978, + "grad_norm": 1.8451834665357323, + "learning_rate": 1.4384796746650221e-06, + "loss": 0.5841302871704102, + "step": 4745 + }, + { + "epoch": 1.6724229074889867, + "grad_norm": 1.8665624096794386, + "learning_rate": 1.4354704894119554e-06, + "loss": 0.627534031867981, + "step": 4746 + }, + { + "epoch": 1.6727753303964756, + "grad_norm": 1.9526983627618448, + "learning_rate": 1.4324642115501165e-06, + "loss": 0.6160094738006592, + "step": 4747 + }, + { + "epoch": 1.6731277533039648, + "grad_norm": 1.6909104461316946, + "learning_rate": 1.4294608421000489e-06, + "loss": 0.5420609712600708, + "step": 4748 + }, + { + "epoch": 1.6734801762114537, + "grad_norm": 1.9597720364889828, + "learning_rate": 1.4264603820813006e-06, + "loss": 0.7729714512825012, + "step": 4749 + }, + { + "epoch": 1.6738325991189429, + "grad_norm": 1.8780592513411432, + "learning_rate": 1.4234628325124345e-06, + "loss": 0.6458747386932373, + "step": 4750 + }, + { + "epoch": 1.6741850220264318, + "grad_norm": 1.717642350217617, + "learning_rate": 1.4204681944110242e-06, + "loss": 0.5250670313835144, + "step": 4751 + }, + { + "epoch": 1.6745374449339208, + "grad_norm": 1.9945004637909651, + "learning_rate": 1.4174764687936548e-06, + "loss": 0.4985695779323578, + "step": 4752 + }, + { + "epoch": 1.6748898678414097, + "grad_norm": 2.1058731387570253, + "learning_rate": 1.4144876566759303e-06, + "loss": 0.6401104927062988, + "step": 4753 + }, + { + "epoch": 1.6752422907488986, + "grad_norm": 1.8242149794974472, + "learning_rate": 1.4115017590724534e-06, + "loss": 0.5991432666778564, + "step": 4754 + }, + { + "epoch": 1.6755947136563876, + "grad_norm": 1.8977892116048576, + "learning_rate": 1.4085187769968433e-06, + "loss": 0.7787071466445923, + "step": 4755 + }, + { + "epoch": 1.6759471365638765, + "grad_norm": 1.8915051082154768, + "learning_rate": 1.4055387114617336e-06, + "loss": 0.6977101564407349, + "step": 4756 + }, + { + "epoch": 1.6762995594713657, + "grad_norm": 1.7182871374874729, + "learning_rate": 1.4025615634787616e-06, + "loss": 0.541371762752533, + "step": 4757 + }, + { + "epoch": 1.6766519823788546, + "grad_norm": 1.8269477268502086, + "learning_rate": 1.3995873340585765e-06, + "loss": 0.5548759698867798, + "step": 4758 + }, + { + "epoch": 1.6770044052863438, + "grad_norm": 1.947083457475871, + "learning_rate": 1.3966160242108373e-06, + "loss": 0.6022241115570068, + "step": 4759 + }, + { + "epoch": 1.6773568281938327, + "grad_norm": 2.1542376603491946, + "learning_rate": 1.3936476349442074e-06, + "loss": 0.4965083599090576, + "step": 4760 + }, + { + "epoch": 1.6777092511013216, + "grad_norm": 2.4193138120349227, + "learning_rate": 1.3906821672663706e-06, + "loss": 0.6712369918823242, + "step": 4761 + }, + { + "epoch": 1.6780616740088106, + "grad_norm": 1.8527401573304754, + "learning_rate": 1.3877196221840038e-06, + "loss": 0.6236127614974976, + "step": 4762 + }, + { + "epoch": 1.6784140969162995, + "grad_norm": 1.9836915293869917, + "learning_rate": 1.3847600007027996e-06, + "loss": 0.7144246101379395, + "step": 4763 + }, + { + "epoch": 1.6787665198237884, + "grad_norm": 1.926900514093349, + "learning_rate": 1.3818033038274602e-06, + "loss": 0.650489091873169, + "step": 4764 + }, + { + "epoch": 1.6791189427312774, + "grad_norm": 1.896615210676468, + "learning_rate": 1.3788495325616912e-06, + "loss": 0.6711791157722473, + "step": 4765 + }, + { + "epoch": 1.6794713656387665, + "grad_norm": 1.6640253715487854, + "learning_rate": 1.375898687908206e-06, + "loss": 0.49629515409469604, + "step": 4766 + }, + { + "epoch": 1.6798237885462555, + "grad_norm": 2.3032521123469727, + "learning_rate": 1.372950770868724e-06, + "loss": 0.5843443870544434, + "step": 4767 + }, + { + "epoch": 1.6801762114537446, + "grad_norm": 1.7269921421841483, + "learning_rate": 1.3700057824439694e-06, + "loss": 0.6201150417327881, + "step": 4768 + }, + { + "epoch": 1.6805286343612336, + "grad_norm": 2.2518096795033746, + "learning_rate": 1.3670637236336815e-06, + "loss": 0.6149473190307617, + "step": 4769 + }, + { + "epoch": 1.6808810572687225, + "grad_norm": 1.8297389667337718, + "learning_rate": 1.3641245954365868e-06, + "loss": 0.476188987493515, + "step": 4770 + }, + { + "epoch": 1.6812334801762114, + "grad_norm": 1.6086104459565809, + "learning_rate": 1.361188398850436e-06, + "loss": 0.4850924015045166, + "step": 4771 + }, + { + "epoch": 1.6815859030837004, + "grad_norm": 2.8146145731538676, + "learning_rate": 1.3582551348719741e-06, + "loss": 0.6008634567260742, + "step": 4772 + }, + { + "epoch": 1.6819383259911893, + "grad_norm": 1.6382847925926618, + "learning_rate": 1.3553248044969525e-06, + "loss": 0.5383377075195312, + "step": 4773 + }, + { + "epoch": 1.6822907488986785, + "grad_norm": 1.966985389102481, + "learning_rate": 1.3523974087201274e-06, + "loss": 0.5711555480957031, + "step": 4774 + }, + { + "epoch": 1.6826431718061674, + "grad_norm": 1.915810750390724, + "learning_rate": 1.3494729485352586e-06, + "loss": 0.5267810821533203, + "step": 4775 + }, + { + "epoch": 1.6829955947136563, + "grad_norm": 1.968063769811982, + "learning_rate": 1.3465514249351076e-06, + "loss": 0.6203084588050842, + "step": 4776 + }, + { + "epoch": 1.6833480176211455, + "grad_norm": 1.604432029465195, + "learning_rate": 1.3436328389114473e-06, + "loss": 0.46672314405441284, + "step": 4777 + }, + { + "epoch": 1.6837004405286344, + "grad_norm": 2.175917964334397, + "learning_rate": 1.3407171914550366e-06, + "loss": 0.6375850439071655, + "step": 4778 + }, + { + "epoch": 1.6840528634361234, + "grad_norm": 1.7467776544405884, + "learning_rate": 1.337804483555656e-06, + "loss": 0.6162304282188416, + "step": 4779 + }, + { + "epoch": 1.6844052863436123, + "grad_norm": 2.0769560048267817, + "learning_rate": 1.3348947162020741e-06, + "loss": 0.7814363241195679, + "step": 4780 + }, + { + "epoch": 1.6847577092511012, + "grad_norm": 1.4969648698838118, + "learning_rate": 1.3319878903820682e-06, + "loss": 0.47330981492996216, + "step": 4781 + }, + { + "epoch": 1.6851101321585902, + "grad_norm": 2.184365435582337, + "learning_rate": 1.3290840070824163e-06, + "loss": 0.759529173374176, + "step": 4782 + }, + { + "epoch": 1.6854625550660793, + "grad_norm": 2.039688209679945, + "learning_rate": 1.326183067288893e-06, + "loss": 0.7727639675140381, + "step": 4783 + }, + { + "epoch": 1.6858149779735683, + "grad_norm": 1.9567066145193837, + "learning_rate": 1.3232850719862789e-06, + "loss": 0.6429058313369751, + "step": 4784 + }, + { + "epoch": 1.6861674008810574, + "grad_norm": 2.394172291442893, + "learning_rate": 1.3203900221583565e-06, + "loss": 0.5895540714263916, + "step": 4785 + }, + { + "epoch": 1.6865198237885464, + "grad_norm": 1.835232130498821, + "learning_rate": 1.317497918787899e-06, + "loss": 0.5410366058349609, + "step": 4786 + }, + { + "epoch": 1.6868722466960353, + "grad_norm": 2.025742052316709, + "learning_rate": 1.3146087628566894e-06, + "loss": 0.5144281387329102, + "step": 4787 + }, + { + "epoch": 1.6872246696035242, + "grad_norm": 2.3397936718705967, + "learning_rate": 1.311722555345506e-06, + "loss": 0.7025437355041504, + "step": 4788 + }, + { + "epoch": 1.6875770925110132, + "grad_norm": 1.7878267984100036, + "learning_rate": 1.3088392972341256e-06, + "loss": 0.6000303030014038, + "step": 4789 + }, + { + "epoch": 1.6879295154185021, + "grad_norm": 2.057293191164908, + "learning_rate": 1.3059589895013248e-06, + "loss": 0.5732883214950562, + "step": 4790 + }, + { + "epoch": 1.688281938325991, + "grad_norm": 1.83266529432573, + "learning_rate": 1.3030816331248785e-06, + "loss": 0.5908600091934204, + "step": 4791 + }, + { + "epoch": 1.6886343612334802, + "grad_norm": 1.7317933799112057, + "learning_rate": 1.3002072290815571e-06, + "loss": 0.5579310655593872, + "step": 4792 + }, + { + "epoch": 1.6889867841409691, + "grad_norm": 1.9713002881427846, + "learning_rate": 1.2973357783471385e-06, + "loss": 0.6439195871353149, + "step": 4793 + }, + { + "epoch": 1.6893392070484583, + "grad_norm": 2.1068548457431437, + "learning_rate": 1.2944672818963822e-06, + "loss": 0.6213329434394836, + "step": 4794 + }, + { + "epoch": 1.6896916299559472, + "grad_norm": 2.271454462540911, + "learning_rate": 1.291601740703059e-06, + "loss": 0.5875385999679565, + "step": 4795 + }, + { + "epoch": 1.6900440528634362, + "grad_norm": 1.8170542219049632, + "learning_rate": 1.2887391557399287e-06, + "loss": 0.7071372270584106, + "step": 4796 + }, + { + "epoch": 1.690396475770925, + "grad_norm": 1.9467140791895143, + "learning_rate": 1.2858795279787517e-06, + "loss": 0.504144549369812, + "step": 4797 + }, + { + "epoch": 1.690748898678414, + "grad_norm": 1.9410537548952953, + "learning_rate": 1.2830228583902816e-06, + "loss": 0.7081021070480347, + "step": 4798 + }, + { + "epoch": 1.691101321585903, + "grad_norm": 1.679906270518348, + "learning_rate": 1.2801691479442658e-06, + "loss": 0.5843057632446289, + "step": 4799 + }, + { + "epoch": 1.691453744493392, + "grad_norm": 2.366764710202919, + "learning_rate": 1.2773183976094571e-06, + "loss": 0.6437872052192688, + "step": 4800 + }, + { + "epoch": 1.691806167400881, + "grad_norm": 2.0784896951955125, + "learning_rate": 1.2744706083535907e-06, + "loss": 0.6945379972457886, + "step": 4801 + }, + { + "epoch": 1.69215859030837, + "grad_norm": 1.5919193491775196, + "learning_rate": 1.2716257811434019e-06, + "loss": 0.5827867984771729, + "step": 4802 + }, + { + "epoch": 1.6925110132158592, + "grad_norm": 2.6733992431993228, + "learning_rate": 1.2687839169446259e-06, + "loss": 0.5298784971237183, + "step": 4803 + }, + { + "epoch": 1.692863436123348, + "grad_norm": 2.069188268688187, + "learning_rate": 1.2659450167219834e-06, + "loss": 0.6885675191879272, + "step": 4804 + }, + { + "epoch": 1.693215859030837, + "grad_norm": 1.8639055588125417, + "learning_rate": 1.2631090814391945e-06, + "loss": 0.6902546882629395, + "step": 4805 + }, + { + "epoch": 1.693568281938326, + "grad_norm": 1.7529564321244235, + "learning_rate": 1.2602761120589713e-06, + "loss": 0.5972022414207458, + "step": 4806 + }, + { + "epoch": 1.693920704845815, + "grad_norm": 1.676709469876984, + "learning_rate": 1.2574461095430145e-06, + "loss": 0.5527150630950928, + "step": 4807 + }, + { + "epoch": 1.6942731277533039, + "grad_norm": 1.6073136228470446, + "learning_rate": 1.2546190748520294e-06, + "loss": 0.5898724794387817, + "step": 4808 + }, + { + "epoch": 1.6946255506607928, + "grad_norm": 1.7947012597219074, + "learning_rate": 1.2517950089457e-06, + "loss": 0.7023364901542664, + "step": 4809 + }, + { + "epoch": 1.694977973568282, + "grad_norm": 1.6620582383673839, + "learning_rate": 1.2489739127827083e-06, + "loss": 0.6016935706138611, + "step": 4810 + }, + { + "epoch": 1.6953303964757709, + "grad_norm": 1.8540813823422424, + "learning_rate": 1.246155787320733e-06, + "loss": 0.5724658966064453, + "step": 4811 + }, + { + "epoch": 1.69568281938326, + "grad_norm": 1.799661687922518, + "learning_rate": 1.2433406335164389e-06, + "loss": 0.5886126160621643, + "step": 4812 + }, + { + "epoch": 1.696035242290749, + "grad_norm": 1.7467523658249466, + "learning_rate": 1.2405284523254823e-06, + "loss": 0.6656844615936279, + "step": 4813 + }, + { + "epoch": 1.696387665198238, + "grad_norm": 1.9261091270854245, + "learning_rate": 1.237719244702511e-06, + "loss": 0.6208533644676208, + "step": 4814 + }, + { + "epoch": 1.6967400881057269, + "grad_norm": 2.00988865324314, + "learning_rate": 1.234913011601162e-06, + "loss": 0.6895248889923096, + "step": 4815 + }, + { + "epoch": 1.6970925110132158, + "grad_norm": 1.755568469718746, + "learning_rate": 1.23210975397407e-06, + "loss": 0.5525833368301392, + "step": 4816 + }, + { + "epoch": 1.6974449339207047, + "grad_norm": 1.9922303788563613, + "learning_rate": 1.2293094727728471e-06, + "loss": 0.5958225727081299, + "step": 4817 + }, + { + "epoch": 1.6977973568281939, + "grad_norm": 1.8538893137799548, + "learning_rate": 1.226512168948103e-06, + "loss": 0.7570905089378357, + "step": 4818 + }, + { + "epoch": 1.6981497797356828, + "grad_norm": 1.9122257264522353, + "learning_rate": 1.2237178434494379e-06, + "loss": 0.586568295955658, + "step": 4819 + }, + { + "epoch": 1.6985022026431718, + "grad_norm": 2.0787640518450257, + "learning_rate": 1.2209264972254365e-06, + "loss": 0.574797511100769, + "step": 4820 + }, + { + "epoch": 1.698854625550661, + "grad_norm": 1.8487761386635184, + "learning_rate": 1.2181381312236751e-06, + "loss": 0.6277909278869629, + "step": 4821 + }, + { + "epoch": 1.6992070484581498, + "grad_norm": 1.6733284685909569, + "learning_rate": 1.2153527463907155e-06, + "loss": 0.6308181285858154, + "step": 4822 + }, + { + "epoch": 1.6995594713656388, + "grad_norm": 1.493809970809451, + "learning_rate": 1.2125703436721091e-06, + "loss": 0.5076426267623901, + "step": 4823 + }, + { + "epoch": 1.6999118942731277, + "grad_norm": 1.9773484387601714, + "learning_rate": 1.2097909240123994e-06, + "loss": 0.7712167501449585, + "step": 4824 + }, + { + "epoch": 1.7002643171806167, + "grad_norm": 2.0486043522302517, + "learning_rate": 1.2070144883551072e-06, + "loss": 0.5964892506599426, + "step": 4825 + }, + { + "epoch": 1.7006167400881056, + "grad_norm": 2.112366587786807, + "learning_rate": 1.2042410376427472e-06, + "loss": 0.7473628520965576, + "step": 4826 + }, + { + "epoch": 1.7009691629955948, + "grad_norm": 2.17212484756118, + "learning_rate": 1.2014705728168219e-06, + "loss": 0.6897937059402466, + "step": 4827 + }, + { + "epoch": 1.7013215859030837, + "grad_norm": 1.734585395626036, + "learning_rate": 1.198703094817817e-06, + "loss": 0.5955557823181152, + "step": 4828 + }, + { + "epoch": 1.7016740088105728, + "grad_norm": 1.8689895875293312, + "learning_rate": 1.195938604585205e-06, + "loss": 0.7051092386245728, + "step": 4829 + }, + { + "epoch": 1.7020264317180618, + "grad_norm": 3.0972606123503836, + "learning_rate": 1.1931771030574446e-06, + "loss": 0.584572434425354, + "step": 4830 + }, + { + "epoch": 1.7023788546255507, + "grad_norm": 1.9375690474214398, + "learning_rate": 1.1904185911719768e-06, + "loss": 0.5691804885864258, + "step": 4831 + }, + { + "epoch": 1.7027312775330397, + "grad_norm": 2.088088383810667, + "learning_rate": 1.187663069865237e-06, + "loss": 0.5539572834968567, + "step": 4832 + }, + { + "epoch": 1.7030837004405286, + "grad_norm": 2.0666580745541956, + "learning_rate": 1.1849105400726324e-06, + "loss": 0.6229352951049805, + "step": 4833 + }, + { + "epoch": 1.7034361233480175, + "grad_norm": 1.9668453557048677, + "learning_rate": 1.1821610027285613e-06, + "loss": 0.7302919626235962, + "step": 4834 + }, + { + "epoch": 1.7037885462555065, + "grad_norm": 1.6339139133298357, + "learning_rate": 1.1794144587664113e-06, + "loss": 0.6802065372467041, + "step": 4835 + }, + { + "epoch": 1.7041409691629956, + "grad_norm": 1.8504754714684983, + "learning_rate": 1.1766709091185447e-06, + "loss": 0.6448635458946228, + "step": 4836 + }, + { + "epoch": 1.7044933920704846, + "grad_norm": 1.9824663488252348, + "learning_rate": 1.1739303547163138e-06, + "loss": 0.6141834259033203, + "step": 4837 + }, + { + "epoch": 1.7048458149779737, + "grad_norm": 1.8134017158002862, + "learning_rate": 1.1711927964900482e-06, + "loss": 0.5634737014770508, + "step": 4838 + }, + { + "epoch": 1.7051982378854627, + "grad_norm": 1.7474752293775022, + "learning_rate": 1.1684582353690642e-06, + "loss": 0.6786668300628662, + "step": 4839 + }, + { + "epoch": 1.7055506607929516, + "grad_norm": 1.8867560516103576, + "learning_rate": 1.1657266722816663e-06, + "loss": 0.6117962002754211, + "step": 4840 + }, + { + "epoch": 1.7059030837004405, + "grad_norm": 1.8617179395667027, + "learning_rate": 1.1629981081551278e-06, + "loss": 0.6115782260894775, + "step": 4841 + }, + { + "epoch": 1.7062555066079295, + "grad_norm": 1.676805324865966, + "learning_rate": 1.1602725439157114e-06, + "loss": 0.6526266932487488, + "step": 4842 + }, + { + "epoch": 1.7066079295154184, + "grad_norm": 1.8958276581556894, + "learning_rate": 1.1575499804886658e-06, + "loss": 0.5449249148368835, + "step": 4843 + }, + { + "epoch": 1.7069603524229073, + "grad_norm": 1.7291534323462554, + "learning_rate": 1.1548304187982152e-06, + "loss": 0.5793930292129517, + "step": 4844 + }, + { + "epoch": 1.7073127753303965, + "grad_norm": 1.9655642851245159, + "learning_rate": 1.152113859767565e-06, + "loss": 0.5133150815963745, + "step": 4845 + }, + { + "epoch": 1.7076651982378854, + "grad_norm": 2.3523116804029973, + "learning_rate": 1.1494003043189028e-06, + "loss": 0.6771460771560669, + "step": 4846 + }, + { + "epoch": 1.7080176211453746, + "grad_norm": 1.651478684492262, + "learning_rate": 1.1466897533733945e-06, + "loss": 0.5680071115493774, + "step": 4847 + }, + { + "epoch": 1.7083700440528635, + "grad_norm": 1.5749772965105935, + "learning_rate": 1.1439822078511941e-06, + "loss": 0.58878493309021, + "step": 4848 + }, + { + "epoch": 1.7087224669603525, + "grad_norm": 2.108145787297885, + "learning_rate": 1.14127766867142e-06, + "loss": 0.6441792249679565, + "step": 4849 + }, + { + "epoch": 1.7090748898678414, + "grad_norm": 1.98601005576605, + "learning_rate": 1.1385761367521865e-06, + "loss": 0.4910963773727417, + "step": 4850 + }, + { + "epoch": 1.7094273127753303, + "grad_norm": 1.860202988226145, + "learning_rate": 1.1358776130105765e-06, + "loss": 0.5878154635429382, + "step": 4851 + }, + { + "epoch": 1.7097797356828193, + "grad_norm": 1.8203129566458394, + "learning_rate": 1.133182098362654e-06, + "loss": 0.5835394859313965, + "step": 4852 + }, + { + "epoch": 1.7101321585903082, + "grad_norm": 2.552503327854629, + "learning_rate": 1.130489593723465e-06, + "loss": 0.6612577438354492, + "step": 4853 + }, + { + "epoch": 1.7104845814977974, + "grad_norm": 1.6914248252090902, + "learning_rate": 1.1278001000070282e-06, + "loss": 0.5892096757888794, + "step": 4854 + }, + { + "epoch": 1.7108370044052863, + "grad_norm": 1.597330243290686, + "learning_rate": 1.1251136181263433e-06, + "loss": 0.6196271181106567, + "step": 4855 + }, + { + "epoch": 1.7111894273127755, + "grad_norm": 1.7525774926334832, + "learning_rate": 1.122430148993392e-06, + "loss": 0.5365586280822754, + "step": 4856 + }, + { + "epoch": 1.7115418502202644, + "grad_norm": 1.8547440453110882, + "learning_rate": 1.119749693519121e-06, + "loss": 0.6006353497505188, + "step": 4857 + }, + { + "epoch": 1.7118942731277533, + "grad_norm": 1.7445383281871432, + "learning_rate": 1.117072252613467e-06, + "loss": 0.5645362138748169, + "step": 4858 + }, + { + "epoch": 1.7122466960352423, + "grad_norm": 1.928827114023792, + "learning_rate": 1.1143978271853362e-06, + "loss": 0.46408799290657043, + "step": 4859 + }, + { + "epoch": 1.7125991189427312, + "grad_norm": 1.9357772553695842, + "learning_rate": 1.1117264181426134e-06, + "loss": 0.7798513770103455, + "step": 4860 + }, + { + "epoch": 1.7129515418502201, + "grad_norm": 2.234058868113385, + "learning_rate": 1.109058026392158e-06, + "loss": 0.739770770072937, + "step": 4861 + }, + { + "epoch": 1.7133039647577093, + "grad_norm": 1.8311645792398603, + "learning_rate": 1.1063926528398062e-06, + "loss": 0.567306637763977, + "step": 4862 + }, + { + "epoch": 1.7136563876651982, + "grad_norm": 1.8983872649225184, + "learning_rate": 1.1037302983903686e-06, + "loss": 0.5730962753295898, + "step": 4863 + }, + { + "epoch": 1.7140088105726872, + "grad_norm": 2.0428299761075186, + "learning_rate": 1.1010709639476335e-06, + "loss": 0.6311475038528442, + "step": 4864 + }, + { + "epoch": 1.7143612334801763, + "grad_norm": 2.074080543967214, + "learning_rate": 1.098414650414359e-06, + "loss": 0.5867577791213989, + "step": 4865 + }, + { + "epoch": 1.7147136563876653, + "grad_norm": 1.7945849101921227, + "learning_rate": 1.0957613586922844e-06, + "loss": 0.6291393637657166, + "step": 4866 + }, + { + "epoch": 1.7150660792951542, + "grad_norm": 2.221825931925125, + "learning_rate": 1.0931110896821184e-06, + "loss": 0.5811575651168823, + "step": 4867 + }, + { + "epoch": 1.7154185022026431, + "grad_norm": 1.8041589779612486, + "learning_rate": 1.0904638442835459e-06, + "loss": 0.6340835690498352, + "step": 4868 + }, + { + "epoch": 1.715770925110132, + "grad_norm": 2.1324283591729696, + "learning_rate": 1.087819623395222e-06, + "loss": 0.6543419361114502, + "step": 4869 + }, + { + "epoch": 1.716123348017621, + "grad_norm": 1.7815282855404584, + "learning_rate": 1.0851784279147793e-06, + "loss": 0.5669729709625244, + "step": 4870 + }, + { + "epoch": 1.7164757709251102, + "grad_norm": 1.7880383242870224, + "learning_rate": 1.08254025873882e-06, + "loss": 0.5422554612159729, + "step": 4871 + }, + { + "epoch": 1.716828193832599, + "grad_norm": 2.1378220532284646, + "learning_rate": 1.0799051167629215e-06, + "loss": 0.6154215335845947, + "step": 4872 + }, + { + "epoch": 1.7171806167400883, + "grad_norm": 1.6926103915620132, + "learning_rate": 1.0772730028816304e-06, + "loss": 0.6306319236755371, + "step": 4873 + }, + { + "epoch": 1.7175330396475772, + "grad_norm": 1.8857595594505687, + "learning_rate": 1.0746439179884716e-06, + "loss": 0.6301003694534302, + "step": 4874 + }, + { + "epoch": 1.7178854625550661, + "grad_norm": 2.02854056964172, + "learning_rate": 1.0720178629759347e-06, + "loss": 0.5730071067810059, + "step": 4875 + }, + { + "epoch": 1.718237885462555, + "grad_norm": 1.911878436689674, + "learning_rate": 1.0693948387354836e-06, + "loss": 0.5330506563186646, + "step": 4876 + }, + { + "epoch": 1.718590308370044, + "grad_norm": 2.2472032788534033, + "learning_rate": 1.0667748461575544e-06, + "loss": 0.7724611759185791, + "step": 4877 + }, + { + "epoch": 1.718942731277533, + "grad_norm": 1.8072854503281317, + "learning_rate": 1.0641578861315517e-06, + "loss": 0.5415126085281372, + "step": 4878 + }, + { + "epoch": 1.7192951541850219, + "grad_norm": 1.7491717586336433, + "learning_rate": 1.0615439595458554e-06, + "loss": 0.4895828664302826, + "step": 4879 + }, + { + "epoch": 1.719647577092511, + "grad_norm": 2.1761761181791757, + "learning_rate": 1.0589330672878084e-06, + "loss": 0.6049074530601501, + "step": 4880 + }, + { + "epoch": 1.72, + "grad_norm": 1.8120989551683504, + "learning_rate": 1.056325210243726e-06, + "loss": 0.5733378529548645, + "step": 4881 + }, + { + "epoch": 1.7203524229074891, + "grad_norm": 1.842989042937231, + "learning_rate": 1.0537203892989e-06, + "loss": 0.6034674644470215, + "step": 4882 + }, + { + "epoch": 1.720704845814978, + "grad_norm": 1.873470428701205, + "learning_rate": 1.0511186053375833e-06, + "loss": 0.5282200574874878, + "step": 4883 + }, + { + "epoch": 1.721057268722467, + "grad_norm": 1.7879116674889537, + "learning_rate": 1.0485198592430001e-06, + "loss": 0.6331876516342163, + "step": 4884 + }, + { + "epoch": 1.721409691629956, + "grad_norm": 1.9365084560803385, + "learning_rate": 1.045924151897344e-06, + "loss": 0.5194844007492065, + "step": 4885 + }, + { + "epoch": 1.7217621145374449, + "grad_norm": 1.9087945355709668, + "learning_rate": 1.0433314841817755e-06, + "loss": 0.5496135354042053, + "step": 4886 + }, + { + "epoch": 1.7221145374449338, + "grad_norm": 2.009884434911672, + "learning_rate": 1.0407418569764304e-06, + "loss": 0.7871953248977661, + "step": 4887 + }, + { + "epoch": 1.7224669603524227, + "grad_norm": 2.040889138785673, + "learning_rate": 1.0381552711604004e-06, + "loss": 0.7199628353118896, + "step": 4888 + }, + { + "epoch": 1.722819383259912, + "grad_norm": 2.248999014584043, + "learning_rate": 1.0355717276117506e-06, + "loss": 0.5876469612121582, + "step": 4889 + }, + { + "epoch": 1.7231718061674008, + "grad_norm": 2.0349261607011, + "learning_rate": 1.0329912272075181e-06, + "loss": 0.5543426275253296, + "step": 4890 + }, + { + "epoch": 1.72352422907489, + "grad_norm": 1.583533546685778, + "learning_rate": 1.0304137708236994e-06, + "loss": 0.5118540525436401, + "step": 4891 + }, + { + "epoch": 1.723876651982379, + "grad_norm": 1.82424017683773, + "learning_rate": 1.0278393593352631e-06, + "loss": 0.628477931022644, + "step": 4892 + }, + { + "epoch": 1.7242290748898679, + "grad_norm": 1.9109773060364437, + "learning_rate": 1.0252679936161392e-06, + "loss": 0.6119322776794434, + "step": 4893 + }, + { + "epoch": 1.7245814977973568, + "grad_norm": 1.8450217827392812, + "learning_rate": 1.0226996745392259e-06, + "loss": 0.7661763429641724, + "step": 4894 + }, + { + "epoch": 1.7249339207048457, + "grad_norm": 2.1201139928861394, + "learning_rate": 1.0201344029763927e-06, + "loss": 0.6431440114974976, + "step": 4895 + }, + { + "epoch": 1.7252863436123347, + "grad_norm": 2.0387248477928503, + "learning_rate": 1.0175721797984639e-06, + "loss": 0.7295387983322144, + "step": 4896 + }, + { + "epoch": 1.7256387665198238, + "grad_norm": 1.9561833203401287, + "learning_rate": 1.015013005875235e-06, + "loss": 0.58225017786026, + "step": 4897 + }, + { + "epoch": 1.7259911894273128, + "grad_norm": 1.9211243008184207, + "learning_rate": 1.0124568820754689e-06, + "loss": 0.5467473864555359, + "step": 4898 + }, + { + "epoch": 1.7263436123348017, + "grad_norm": 2.2453442964094967, + "learning_rate": 1.00990380926689e-06, + "loss": 0.7637814283370972, + "step": 4899 + }, + { + "epoch": 1.7266960352422909, + "grad_norm": 2.13267606796778, + "learning_rate": 1.0073537883161821e-06, + "loss": 0.5354464650154114, + "step": 4900 + }, + { + "epoch": 1.7270484581497798, + "grad_norm": 1.91187833906973, + "learning_rate": 1.0048068200890037e-06, + "loss": 0.5213606357574463, + "step": 4901 + }, + { + "epoch": 1.7274008810572687, + "grad_norm": 1.8770841550484265, + "learning_rate": 1.0022629054499678e-06, + "loss": 0.6073330640792847, + "step": 4902 + }, + { + "epoch": 1.7277533039647577, + "grad_norm": 2.1663053459498283, + "learning_rate": 9.997220452626587e-07, + "loss": 0.5711998343467712, + "step": 4903 + }, + { + "epoch": 1.7281057268722466, + "grad_norm": 1.8823259072141711, + "learning_rate": 9.971842403896137e-07, + "loss": 0.6824701428413391, + "step": 4904 + }, + { + "epoch": 1.7284581497797356, + "grad_norm": 1.844862593672041, + "learning_rate": 9.9464949169234e-07, + "loss": 0.528059184551239, + "step": 4905 + }, + { + "epoch": 1.7288105726872247, + "grad_norm": 1.7519423160504919, + "learning_rate": 9.92117800031308e-07, + "loss": 0.45617133378982544, + "step": 4906 + }, + { + "epoch": 1.7291629955947136, + "grad_norm": 2.095891000231315, + "learning_rate": 9.895891662659485e-07, + "loss": 0.6186379194259644, + "step": 4907 + }, + { + "epoch": 1.7295154185022028, + "grad_norm": 1.8933361504308706, + "learning_rate": 9.870635912546511e-07, + "loss": 0.622776985168457, + "step": 4908 + }, + { + "epoch": 1.7298678414096917, + "grad_norm": 2.1556634846751073, + "learning_rate": 9.845410758547724e-07, + "loss": 0.6322426199913025, + "step": 4909 + }, + { + "epoch": 1.7302202643171807, + "grad_norm": 1.8637079254212523, + "learning_rate": 9.82021620922624e-07, + "loss": 0.565685510635376, + "step": 4910 + }, + { + "epoch": 1.7305726872246696, + "grad_norm": 1.9032887733300228, + "learning_rate": 9.795052273134908e-07, + "loss": 0.670723557472229, + "step": 4911 + }, + { + "epoch": 1.7309251101321586, + "grad_norm": 1.818317953069921, + "learning_rate": 9.769918958816017e-07, + "loss": 0.627914309501648, + "step": 4912 + }, + { + "epoch": 1.7312775330396475, + "grad_norm": 1.8142433277320784, + "learning_rate": 9.74481627480156e-07, + "loss": 0.613754391670227, + "step": 4913 + }, + { + "epoch": 1.7316299559471364, + "grad_norm": 1.6146673255290158, + "learning_rate": 9.719744229613148e-07, + "loss": 0.7128336429595947, + "step": 4914 + }, + { + "epoch": 1.7319823788546256, + "grad_norm": 2.07516307915708, + "learning_rate": 9.694702831761937e-07, + "loss": 0.692448079586029, + "step": 4915 + }, + { + "epoch": 1.7323348017621145, + "grad_norm": 1.8379288210737326, + "learning_rate": 9.669692089748717e-07, + "loss": 0.5722585916519165, + "step": 4916 + }, + { + "epoch": 1.7326872246696037, + "grad_norm": 2.880722779651987, + "learning_rate": 9.64471201206385e-07, + "loss": 0.5267904996871948, + "step": 4917 + }, + { + "epoch": 1.7330396475770926, + "grad_norm": 1.8098448963152955, + "learning_rate": 9.619762607187277e-07, + "loss": 0.6290950179100037, + "step": 4918 + }, + { + "epoch": 1.7333920704845815, + "grad_norm": 1.6991585212089806, + "learning_rate": 9.594843883588588e-07, + "loss": 0.5137144327163696, + "step": 4919 + }, + { + "epoch": 1.7337444933920705, + "grad_norm": 2.0101083451482067, + "learning_rate": 9.569955849726875e-07, + "loss": 0.6110765337944031, + "step": 4920 + }, + { + "epoch": 1.7340969162995594, + "grad_norm": 1.805820390142787, + "learning_rate": 9.545098514050844e-07, + "loss": 0.5097514390945435, + "step": 4921 + }, + { + "epoch": 1.7344493392070484, + "grad_norm": 1.9204009410934093, + "learning_rate": 9.520271884998822e-07, + "loss": 0.7220968008041382, + "step": 4922 + }, + { + "epoch": 1.7348017621145373, + "grad_norm": 1.816061125504689, + "learning_rate": 9.495475970998669e-07, + "loss": 0.4790550470352173, + "step": 4923 + }, + { + "epoch": 1.7351541850220265, + "grad_norm": 1.8878679441443287, + "learning_rate": 9.470710780467818e-07, + "loss": 0.5440540909767151, + "step": 4924 + }, + { + "epoch": 1.7355066079295154, + "grad_norm": 1.8420075371513611, + "learning_rate": 9.445976321813277e-07, + "loss": 0.6351054310798645, + "step": 4925 + }, + { + "epoch": 1.7358590308370045, + "grad_norm": 1.8685391189030902, + "learning_rate": 9.421272603431619e-07, + "loss": 0.597430944442749, + "step": 4926 + }, + { + "epoch": 1.7362114537444935, + "grad_norm": 1.8993591697635552, + "learning_rate": 9.396599633709013e-07, + "loss": 0.5826110243797302, + "step": 4927 + }, + { + "epoch": 1.7365638766519824, + "grad_norm": 1.9528322527669026, + "learning_rate": 9.371957421021116e-07, + "loss": 0.61531662940979, + "step": 4928 + }, + { + "epoch": 1.7369162995594714, + "grad_norm": 1.7976479809998938, + "learning_rate": 9.347345973733257e-07, + "loss": 0.5286549925804138, + "step": 4929 + }, + { + "epoch": 1.7372687224669603, + "grad_norm": 2.051327926584316, + "learning_rate": 9.322765300200209e-07, + "loss": 0.6923980712890625, + "step": 4930 + }, + { + "epoch": 1.7376211453744492, + "grad_norm": 1.8765754964403032, + "learning_rate": 9.298215408766376e-07, + "loss": 0.5408697128295898, + "step": 4931 + }, + { + "epoch": 1.7379735682819382, + "grad_norm": 1.9428832757254997, + "learning_rate": 9.273696307765656e-07, + "loss": 0.6360228061676025, + "step": 4932 + }, + { + "epoch": 1.7383259911894273, + "grad_norm": 1.5478222777536266, + "learning_rate": 9.249208005521538e-07, + "loss": 0.46559634804725647, + "step": 4933 + }, + { + "epoch": 1.7386784140969163, + "grad_norm": 2.0814940983294465, + "learning_rate": 9.224750510347036e-07, + "loss": 0.6065478324890137, + "step": 4934 + }, + { + "epoch": 1.7390308370044054, + "grad_norm": 2.197942688439507, + "learning_rate": 9.2003238305447e-07, + "loss": 0.6777745485305786, + "step": 4935 + }, + { + "epoch": 1.7393832599118944, + "grad_norm": 1.764242470379209, + "learning_rate": 9.175927974406607e-07, + "loss": 0.568982720375061, + "step": 4936 + }, + { + "epoch": 1.7397356828193833, + "grad_norm": 1.9082270198240563, + "learning_rate": 9.151562950214443e-07, + "loss": 0.6014461517333984, + "step": 4937 + }, + { + "epoch": 1.7400881057268722, + "grad_norm": 1.9463215063568118, + "learning_rate": 9.127228766239349e-07, + "loss": 0.6312133073806763, + "step": 4938 + }, + { + "epoch": 1.7404405286343612, + "grad_norm": 1.9066118382891128, + "learning_rate": 9.102925430742015e-07, + "loss": 0.5440298318862915, + "step": 4939 + }, + { + "epoch": 1.74079295154185, + "grad_norm": 1.9115402376997355, + "learning_rate": 9.078652951972688e-07, + "loss": 0.6599005460739136, + "step": 4940 + }, + { + "epoch": 1.7411453744493393, + "grad_norm": 1.8987879122247575, + "learning_rate": 9.054411338171099e-07, + "loss": 0.6719228625297546, + "step": 4941 + }, + { + "epoch": 1.7414977973568282, + "grad_norm": 1.7692389966879711, + "learning_rate": 9.030200597566529e-07, + "loss": 0.5771356821060181, + "step": 4942 + }, + { + "epoch": 1.7418502202643171, + "grad_norm": 2.0029197465912936, + "learning_rate": 9.006020738377764e-07, + "loss": 0.5066591501235962, + "step": 4943 + }, + { + "epoch": 1.7422026431718063, + "grad_norm": 1.754361693598564, + "learning_rate": 8.981871768813111e-07, + "loss": 0.5091663002967834, + "step": 4944 + }, + { + "epoch": 1.7425550660792952, + "grad_norm": 1.9092674317256029, + "learning_rate": 8.957753697070415e-07, + "loss": 0.6594514846801758, + "step": 4945 + }, + { + "epoch": 1.7429074889867842, + "grad_norm": 1.8033652679865708, + "learning_rate": 8.933666531337004e-07, + "loss": 0.5485379695892334, + "step": 4946 + }, + { + "epoch": 1.743259911894273, + "grad_norm": 2.2602019905537913, + "learning_rate": 8.909610279789716e-07, + "loss": 0.6079416871070862, + "step": 4947 + }, + { + "epoch": 1.743612334801762, + "grad_norm": 1.8415960205262154, + "learning_rate": 8.885584950594894e-07, + "loss": 0.4980606436729431, + "step": 4948 + }, + { + "epoch": 1.743964757709251, + "grad_norm": 1.5880176897451332, + "learning_rate": 8.861590551908405e-07, + "loss": 0.47701022028923035, + "step": 4949 + }, + { + "epoch": 1.7443171806167401, + "grad_norm": 1.7223149872435417, + "learning_rate": 8.837627091875578e-07, + "loss": 0.5041281580924988, + "step": 4950 + }, + { + "epoch": 1.744669603524229, + "grad_norm": 1.9666236461253934, + "learning_rate": 8.813694578631283e-07, + "loss": 0.5477255582809448, + "step": 4951 + }, + { + "epoch": 1.7450220264317182, + "grad_norm": 1.883766477051188, + "learning_rate": 8.78979302029983e-07, + "loss": 0.6377973556518555, + "step": 4952 + }, + { + "epoch": 1.7453744493392072, + "grad_norm": 1.940207867324299, + "learning_rate": 8.76592242499511e-07, + "loss": 0.6688166856765747, + "step": 4953 + }, + { + "epoch": 1.745726872246696, + "grad_norm": 2.0031898505950907, + "learning_rate": 8.742082800820406e-07, + "loss": 0.6236848831176758, + "step": 4954 + }, + { + "epoch": 1.746079295154185, + "grad_norm": 1.7582600318717108, + "learning_rate": 8.718274155868545e-07, + "loss": 0.653768002986908, + "step": 4955 + }, + { + "epoch": 1.746431718061674, + "grad_norm": 1.844534933556578, + "learning_rate": 8.694496498221805e-07, + "loss": 0.5647604465484619, + "step": 4956 + }, + { + "epoch": 1.746784140969163, + "grad_norm": 1.781932697931349, + "learning_rate": 8.670749835951964e-07, + "loss": 0.4960663914680481, + "step": 4957 + }, + { + "epoch": 1.7471365638766518, + "grad_norm": 1.6873484879529697, + "learning_rate": 8.647034177120317e-07, + "loss": 0.6271536350250244, + "step": 4958 + }, + { + "epoch": 1.747488986784141, + "grad_norm": 2.0059254125224757, + "learning_rate": 8.623349529777525e-07, + "loss": 0.6323459148406982, + "step": 4959 + }, + { + "epoch": 1.74784140969163, + "grad_norm": 1.9564636362517054, + "learning_rate": 8.599695901963811e-07, + "loss": 0.6084197163581848, + "step": 4960 + }, + { + "epoch": 1.748193832599119, + "grad_norm": 1.8913653459936526, + "learning_rate": 8.576073301708876e-07, + "loss": 0.48974379897117615, + "step": 4961 + }, + { + "epoch": 1.748546255506608, + "grad_norm": 1.8735173678444992, + "learning_rate": 8.552481737031859e-07, + "loss": 0.5985081195831299, + "step": 4962 + }, + { + "epoch": 1.748898678414097, + "grad_norm": 1.6360789306706147, + "learning_rate": 8.528921215941299e-07, + "loss": 0.507872998714447, + "step": 4963 + }, + { + "epoch": 1.749251101321586, + "grad_norm": 1.5251403239052872, + "learning_rate": 8.50539174643531e-07, + "loss": 0.5772356986999512, + "step": 4964 + }, + { + "epoch": 1.7496035242290748, + "grad_norm": 2.222117569410965, + "learning_rate": 8.48189333650139e-07, + "loss": 0.675100564956665, + "step": 4965 + }, + { + "epoch": 1.7499559471365638, + "grad_norm": 1.9356078104678653, + "learning_rate": 8.458425994116582e-07, + "loss": 0.5571645498275757, + "step": 4966 + }, + { + "epoch": 1.7503083700440527, + "grad_norm": 1.807660183683072, + "learning_rate": 8.434989727247233e-07, + "loss": 0.5842185020446777, + "step": 4967 + }, + { + "epoch": 1.7506607929515419, + "grad_norm": 1.7960899956397995, + "learning_rate": 8.41158454384925e-07, + "loss": 0.5693016648292542, + "step": 4968 + }, + { + "epoch": 1.7510132158590308, + "grad_norm": 1.808037504366546, + "learning_rate": 8.388210451868006e-07, + "loss": 0.5791449546813965, + "step": 4969 + }, + { + "epoch": 1.75136563876652, + "grad_norm": 2.1439820497437516, + "learning_rate": 8.364867459238257e-07, + "loss": 0.4873960018157959, + "step": 4970 + }, + { + "epoch": 1.751718061674009, + "grad_norm": 1.6712365329059415, + "learning_rate": 8.341555573884175e-07, + "loss": 0.609403669834137, + "step": 4971 + }, + { + "epoch": 1.7520704845814978, + "grad_norm": 2.0664225342752327, + "learning_rate": 8.318274803719483e-07, + "loss": 0.5676242113113403, + "step": 4972 + }, + { + "epoch": 1.7524229074889868, + "grad_norm": 2.2550971825464026, + "learning_rate": 8.29502515664723e-07, + "loss": 0.7692728638648987, + "step": 4973 + }, + { + "epoch": 1.7527753303964757, + "grad_norm": 2.318073308236361, + "learning_rate": 8.27180664056001e-07, + "loss": 0.7940253019332886, + "step": 4974 + }, + { + "epoch": 1.7531277533039646, + "grad_norm": 2.021077548315, + "learning_rate": 8.24861926333973e-07, + "loss": 0.5784735083580017, + "step": 4975 + }, + { + "epoch": 1.7534801762114536, + "grad_norm": 2.106016882372918, + "learning_rate": 8.225463032857783e-07, + "loss": 0.6493539810180664, + "step": 4976 + }, + { + "epoch": 1.7538325991189427, + "grad_norm": 1.6893816606485224, + "learning_rate": 8.202337956975026e-07, + "loss": 0.615519106388092, + "step": 4977 + }, + { + "epoch": 1.7541850220264317, + "grad_norm": 2.4337358559529587, + "learning_rate": 8.179244043541678e-07, + "loss": 0.5369104146957397, + "step": 4978 + }, + { + "epoch": 1.7545374449339208, + "grad_norm": 1.8845170170566812, + "learning_rate": 8.156181300397414e-07, + "loss": 0.5527158975601196, + "step": 4979 + }, + { + "epoch": 1.7548898678414098, + "grad_norm": 2.1597753145956786, + "learning_rate": 8.133149735371316e-07, + "loss": 0.5870147943496704, + "step": 4980 + }, + { + "epoch": 1.7552422907488987, + "grad_norm": 2.0333589118991497, + "learning_rate": 8.110149356281848e-07, + "loss": 0.7235025763511658, + "step": 4981 + }, + { + "epoch": 1.7555947136563876, + "grad_norm": 1.9283097758260628, + "learning_rate": 8.087180170937004e-07, + "loss": 0.5630521774291992, + "step": 4982 + }, + { + "epoch": 1.7559471365638766, + "grad_norm": 2.015740627515862, + "learning_rate": 8.06424218713403e-07, + "loss": 0.5005021691322327, + "step": 4983 + }, + { + "epoch": 1.7562995594713655, + "grad_norm": 2.0683486617790066, + "learning_rate": 8.041335412659679e-07, + "loss": 0.7267229557037354, + "step": 4984 + }, + { + "epoch": 1.7566519823788547, + "grad_norm": 2.2397406108409834, + "learning_rate": 8.018459855290107e-07, + "loss": 0.6494802236557007, + "step": 4985 + }, + { + "epoch": 1.7570044052863436, + "grad_norm": 1.8012009390187627, + "learning_rate": 7.995615522790845e-07, + "loss": 0.5637267827987671, + "step": 4986 + }, + { + "epoch": 1.7573568281938328, + "grad_norm": 1.807872858711751, + "learning_rate": 7.972802422916826e-07, + "loss": 0.5143958330154419, + "step": 4987 + }, + { + "epoch": 1.7577092511013217, + "grad_norm": 1.7925007157989583, + "learning_rate": 7.950020563412398e-07, + "loss": 0.607841968536377, + "step": 4988 + }, + { + "epoch": 1.7580616740088106, + "grad_norm": 1.9011698158798267, + "learning_rate": 7.927269952011285e-07, + "loss": 0.6066895723342896, + "step": 4989 + }, + { + "epoch": 1.7584140969162996, + "grad_norm": 2.293924542695718, + "learning_rate": 7.904550596436611e-07, + "loss": 0.6686232686042786, + "step": 4990 + }, + { + "epoch": 1.7587665198237885, + "grad_norm": 1.7540251789370713, + "learning_rate": 7.881862504400884e-07, + "loss": 0.589708685874939, + "step": 4991 + }, + { + "epoch": 1.7591189427312774, + "grad_norm": 1.9346002211307631, + "learning_rate": 7.859205683606008e-07, + "loss": 0.7008450031280518, + "step": 4992 + }, + { + "epoch": 1.7594713656387664, + "grad_norm": 1.5488386957340947, + "learning_rate": 7.836580141743289e-07, + "loss": 0.5754648447036743, + "step": 4993 + }, + { + "epoch": 1.7598237885462555, + "grad_norm": 1.8204543329281522, + "learning_rate": 7.81398588649338e-07, + "loss": 0.5756049156188965, + "step": 4994 + }, + { + "epoch": 1.7601762114537445, + "grad_norm": 1.8754803653843481, + "learning_rate": 7.791422925526326e-07, + "loss": 0.6143715381622314, + "step": 4995 + }, + { + "epoch": 1.7605286343612336, + "grad_norm": 1.9795958910244131, + "learning_rate": 7.768891266501544e-07, + "loss": 0.700069010257721, + "step": 4996 + }, + { + "epoch": 1.7608810572687226, + "grad_norm": 1.8030282940418303, + "learning_rate": 7.746390917067847e-07, + "loss": 0.5200002193450928, + "step": 4997 + }, + { + "epoch": 1.7612334801762115, + "grad_norm": 2.0811179040330483, + "learning_rate": 7.723921884863395e-07, + "loss": 0.6963525414466858, + "step": 4998 + }, + { + "epoch": 1.7615859030837004, + "grad_norm": 1.9255908471526815, + "learning_rate": 7.701484177515717e-07, + "loss": 0.6329556703567505, + "step": 4999 + }, + { + "epoch": 1.7619383259911894, + "grad_norm": 2.0796773022688213, + "learning_rate": 7.67907780264171e-07, + "loss": 0.6980677247047424, + "step": 5000 + }, + { + "epoch": 1.7622907488986783, + "grad_norm": 1.95091452058077, + "learning_rate": 7.656702767847679e-07, + "loss": 0.5244314670562744, + "step": 5001 + }, + { + "epoch": 1.7626431718061673, + "grad_norm": 1.937585844549177, + "learning_rate": 7.634359080729215e-07, + "loss": 0.6679523587226868, + "step": 5002 + }, + { + "epoch": 1.7629955947136564, + "grad_norm": 1.7698344536731299, + "learning_rate": 7.612046748871327e-07, + "loss": 0.6168316602706909, + "step": 5003 + }, + { + "epoch": 1.7633480176211453, + "grad_norm": 1.8295319189191592, + "learning_rate": 7.589765779848346e-07, + "loss": 0.5892738699913025, + "step": 5004 + }, + { + "epoch": 1.7637004405286345, + "grad_norm": 1.8270406797726577, + "learning_rate": 7.567516181223966e-07, + "loss": 0.6714082956314087, + "step": 5005 + }, + { + "epoch": 1.7640528634361234, + "grad_norm": 1.7798086214061835, + "learning_rate": 7.545297960551245e-07, + "loss": 0.6327016353607178, + "step": 5006 + }, + { + "epoch": 1.7644052863436124, + "grad_norm": 1.8272907155681217, + "learning_rate": 7.52311112537254e-07, + "loss": 0.5114126205444336, + "step": 5007 + }, + { + "epoch": 1.7647577092511013, + "grad_norm": 1.9198067827489789, + "learning_rate": 7.500955683219646e-07, + "loss": 0.5701695084571838, + "step": 5008 + }, + { + "epoch": 1.7651101321585903, + "grad_norm": 1.7304483866926885, + "learning_rate": 7.478831641613616e-07, + "loss": 0.5966283082962036, + "step": 5009 + }, + { + "epoch": 1.7654625550660792, + "grad_norm": 1.7690414353003558, + "learning_rate": 7.456739008064883e-07, + "loss": 0.6219101548194885, + "step": 5010 + }, + { + "epoch": 1.7658149779735681, + "grad_norm": 2.1971226449232804, + "learning_rate": 7.434677790073197e-07, + "loss": 0.6516324877738953, + "step": 5011 + }, + { + "epoch": 1.7661674008810573, + "grad_norm": 2.0945250680543395, + "learning_rate": 7.412647995127664e-07, + "loss": 0.4623621106147766, + "step": 5012 + }, + { + "epoch": 1.7665198237885462, + "grad_norm": 1.7568345992089816, + "learning_rate": 7.390649630706703e-07, + "loss": 0.5661109685897827, + "step": 5013 + }, + { + "epoch": 1.7668722466960354, + "grad_norm": 2.0070117088967154, + "learning_rate": 7.368682704278096e-07, + "loss": 0.47063148021698, + "step": 5014 + }, + { + "epoch": 1.7672246696035243, + "grad_norm": 1.636187219475051, + "learning_rate": 7.346747223298889e-07, + "loss": 0.5684597492218018, + "step": 5015 + }, + { + "epoch": 1.7675770925110132, + "grad_norm": 1.872749765270047, + "learning_rate": 7.324843195215548e-07, + "loss": 0.5614477396011353, + "step": 5016 + }, + { + "epoch": 1.7679295154185022, + "grad_norm": 1.9944667195924293, + "learning_rate": 7.302970627463779e-07, + "loss": 0.508664608001709, + "step": 5017 + }, + { + "epoch": 1.7682819383259911, + "grad_norm": 1.9918093815103546, + "learning_rate": 7.281129527468645e-07, + "loss": 0.5348209142684937, + "step": 5018 + }, + { + "epoch": 1.76863436123348, + "grad_norm": 2.2774118234615695, + "learning_rate": 7.259319902644513e-07, + "loss": 0.6441121101379395, + "step": 5019 + }, + { + "epoch": 1.7689867841409692, + "grad_norm": 1.7776640162425583, + "learning_rate": 7.237541760395083e-07, + "loss": 0.6454842686653137, + "step": 5020 + }, + { + "epoch": 1.7693392070484582, + "grad_norm": 1.818033997112941, + "learning_rate": 7.215795108113343e-07, + "loss": 0.4822286367416382, + "step": 5021 + }, + { + "epoch": 1.769691629955947, + "grad_norm": 2.2519074742911775, + "learning_rate": 7.19407995318162e-07, + "loss": 0.6078327894210815, + "step": 5022 + }, + { + "epoch": 1.7700440528634362, + "grad_norm": 1.9964867958416748, + "learning_rate": 7.172396302971507e-07, + "loss": 0.6394459009170532, + "step": 5023 + }, + { + "epoch": 1.7703964757709252, + "grad_norm": 1.919321953608054, + "learning_rate": 7.150744164843959e-07, + "loss": 0.646416425704956, + "step": 5024 + }, + { + "epoch": 1.7707488986784141, + "grad_norm": 1.743918601710363, + "learning_rate": 7.129123546149208e-07, + "loss": 0.6265356540679932, + "step": 5025 + }, + { + "epoch": 1.771101321585903, + "grad_norm": 1.717725969603381, + "learning_rate": 7.107534454226728e-07, + "loss": 0.5074717998504639, + "step": 5026 + }, + { + "epoch": 1.771453744493392, + "grad_norm": 1.9181838757933405, + "learning_rate": 7.0859768964054e-07, + "loss": 0.7036402821540833, + "step": 5027 + }, + { + "epoch": 1.771806167400881, + "grad_norm": 1.7638856276686163, + "learning_rate": 7.064450880003327e-07, + "loss": 0.6098893880844116, + "step": 5028 + }, + { + "epoch": 1.77215859030837, + "grad_norm": 2.005026773406909, + "learning_rate": 7.042956412327917e-07, + "loss": 0.582880973815918, + "step": 5029 + }, + { + "epoch": 1.772511013215859, + "grad_norm": 2.013313109536588, + "learning_rate": 7.021493500675869e-07, + "loss": 0.6003242135047913, + "step": 5030 + }, + { + "epoch": 1.7728634361233482, + "grad_norm": 1.9319887994625418, + "learning_rate": 7.000062152333165e-07, + "loss": 0.4999944865703583, + "step": 5031 + }, + { + "epoch": 1.7732158590308371, + "grad_norm": 1.8450299102376384, + "learning_rate": 6.978662374575107e-07, + "loss": 0.5569149255752563, + "step": 5032 + }, + { + "epoch": 1.773568281938326, + "grad_norm": 1.9277460192299252, + "learning_rate": 6.957294174666263e-07, + "loss": 0.5600287914276123, + "step": 5033 + }, + { + "epoch": 1.773920704845815, + "grad_norm": 1.8890013971887576, + "learning_rate": 6.935957559860418e-07, + "loss": 0.5412951707839966, + "step": 5034 + }, + { + "epoch": 1.774273127753304, + "grad_norm": 1.7378105888388657, + "learning_rate": 6.914652537400735e-07, + "loss": 0.5881151556968689, + "step": 5035 + }, + { + "epoch": 1.7746255506607929, + "grad_norm": 1.8829243382985155, + "learning_rate": 6.893379114519572e-07, + "loss": 0.5975406169891357, + "step": 5036 + }, + { + "epoch": 1.7749779735682818, + "grad_norm": 1.7883517993987919, + "learning_rate": 6.872137298438653e-07, + "loss": 0.6266802549362183, + "step": 5037 + }, + { + "epoch": 1.775330396475771, + "grad_norm": 2.279148556628154, + "learning_rate": 6.850927096368854e-07, + "loss": 0.6825709939002991, + "step": 5038 + }, + { + "epoch": 1.77568281938326, + "grad_norm": 1.6068572613194736, + "learning_rate": 6.829748515510381e-07, + "loss": 0.6035742163658142, + "step": 5039 + }, + { + "epoch": 1.776035242290749, + "grad_norm": 1.901514453732062, + "learning_rate": 6.808601563052742e-07, + "loss": 0.6665611267089844, + "step": 5040 + }, + { + "epoch": 1.776387665198238, + "grad_norm": 2.334324554300087, + "learning_rate": 6.787486246174657e-07, + "loss": 0.8202367424964905, + "step": 5041 + }, + { + "epoch": 1.776740088105727, + "grad_norm": 1.8080635950130315, + "learning_rate": 6.766402572044084e-07, + "loss": 0.6516656875610352, + "step": 5042 + }, + { + "epoch": 1.7770925110132159, + "grad_norm": 1.6361942373114873, + "learning_rate": 6.745350547818307e-07, + "loss": 0.663591742515564, + "step": 5043 + }, + { + "epoch": 1.7774449339207048, + "grad_norm": 2.0460511379273716, + "learning_rate": 6.724330180643824e-07, + "loss": 0.6025142669677734, + "step": 5044 + }, + { + "epoch": 1.7777973568281937, + "grad_norm": 1.6332878492082579, + "learning_rate": 6.703341477656422e-07, + "loss": 0.5704027414321899, + "step": 5045 + }, + { + "epoch": 1.7781497797356827, + "grad_norm": 2.0053343984683534, + "learning_rate": 6.682384445981071e-07, + "loss": 0.6518473625183105, + "step": 5046 + }, + { + "epoch": 1.7785022026431718, + "grad_norm": 1.6878153153712165, + "learning_rate": 6.661459092732037e-07, + "loss": 0.5547574758529663, + "step": 5047 + }, + { + "epoch": 1.7788546255506608, + "grad_norm": 1.8096814000573205, + "learning_rate": 6.640565425012846e-07, + "loss": 0.6248831748962402, + "step": 5048 + }, + { + "epoch": 1.77920704845815, + "grad_norm": 1.8747085080187502, + "learning_rate": 6.619703449916259e-07, + "loss": 0.5899701118469238, + "step": 5049 + }, + { + "epoch": 1.7795594713656389, + "grad_norm": 1.9253293216058311, + "learning_rate": 6.598873174524223e-07, + "loss": 0.41864174604415894, + "step": 5050 + }, + { + "epoch": 1.7799118942731278, + "grad_norm": 2.2457701854009025, + "learning_rate": 6.578074605908002e-07, + "loss": 0.7473436594009399, + "step": 5051 + }, + { + "epoch": 1.7802643171806167, + "grad_norm": 1.6599111795216646, + "learning_rate": 6.557307751128051e-07, + "loss": 0.49480879306793213, + "step": 5052 + }, + { + "epoch": 1.7806167400881057, + "grad_norm": 1.8257078701065834, + "learning_rate": 6.536572617234082e-07, + "loss": 0.5619323253631592, + "step": 5053 + }, + { + "epoch": 1.7809691629955946, + "grad_norm": 1.8566139978409217, + "learning_rate": 6.515869211265013e-07, + "loss": 0.5271984338760376, + "step": 5054 + }, + { + "epoch": 1.7813215859030835, + "grad_norm": 1.967436768949709, + "learning_rate": 6.495197540248999e-07, + "loss": 0.6544383764266968, + "step": 5055 + }, + { + "epoch": 1.7816740088105727, + "grad_norm": 2.157946298106486, + "learning_rate": 6.474557611203458e-07, + "loss": 0.6525388956069946, + "step": 5056 + }, + { + "epoch": 1.7820264317180616, + "grad_norm": 2.0314482863762735, + "learning_rate": 6.453949431134987e-07, + "loss": 0.5509910583496094, + "step": 5057 + }, + { + "epoch": 1.7823788546255508, + "grad_norm": 1.6067790596532618, + "learning_rate": 6.433373007039412e-07, + "loss": 0.5030776262283325, + "step": 5058 + }, + { + "epoch": 1.7827312775330397, + "grad_norm": 1.875686429811456, + "learning_rate": 6.412828345901811e-07, + "loss": 0.6743696331977844, + "step": 5059 + }, + { + "epoch": 1.7830837004405287, + "grad_norm": 1.9399780429001139, + "learning_rate": 6.392315454696452e-07, + "loss": 0.5395437479019165, + "step": 5060 + }, + { + "epoch": 1.7834361233480176, + "grad_norm": 1.7657846282567238, + "learning_rate": 6.371834340386807e-07, + "loss": 0.5773402452468872, + "step": 5061 + }, + { + "epoch": 1.7837885462555065, + "grad_norm": 1.920136830142019, + "learning_rate": 6.351385009925582e-07, + "loss": 0.6014268398284912, + "step": 5062 + }, + { + "epoch": 1.7841409691629955, + "grad_norm": 1.9465884411051106, + "learning_rate": 6.33096747025469e-07, + "loss": 0.5519139170646667, + "step": 5063 + }, + { + "epoch": 1.7844933920704846, + "grad_norm": 3.0085962631929752, + "learning_rate": 6.310581728305254e-07, + "loss": 0.5407502055168152, + "step": 5064 + }, + { + "epoch": 1.7848458149779736, + "grad_norm": 1.5371833099084395, + "learning_rate": 6.290227790997605e-07, + "loss": 0.61688232421875, + "step": 5065 + }, + { + "epoch": 1.7851982378854625, + "grad_norm": 2.002396471657761, + "learning_rate": 6.269905665241271e-07, + "loss": 0.5212849974632263, + "step": 5066 + }, + { + "epoch": 1.7855506607929517, + "grad_norm": 1.7684490871986807, + "learning_rate": 6.249615357934968e-07, + "loss": 0.6827710866928101, + "step": 5067 + }, + { + "epoch": 1.7859030837004406, + "grad_norm": 2.016669351586175, + "learning_rate": 6.22935687596663e-07, + "loss": 0.6907633543014526, + "step": 5068 + }, + { + "epoch": 1.7862555066079295, + "grad_norm": 2.045834595721204, + "learning_rate": 6.209130226213378e-07, + "loss": 0.5707769989967346, + "step": 5069 + }, + { + "epoch": 1.7866079295154185, + "grad_norm": 1.9432188628486171, + "learning_rate": 6.188935415541541e-07, + "loss": 0.6062690019607544, + "step": 5070 + }, + { + "epoch": 1.7869603524229074, + "grad_norm": 1.8744219034756735, + "learning_rate": 6.168772450806604e-07, + "loss": 0.5291163921356201, + "step": 5071 + }, + { + "epoch": 1.7873127753303963, + "grad_norm": 1.8892054954511246, + "learning_rate": 6.148641338853301e-07, + "loss": 0.6324198246002197, + "step": 5072 + }, + { + "epoch": 1.7876651982378855, + "grad_norm": 1.7030219876612867, + "learning_rate": 6.128542086515499e-07, + "loss": 0.5516111850738525, + "step": 5073 + }, + { + "epoch": 1.7880176211453744, + "grad_norm": 2.1800478368143232, + "learning_rate": 6.108474700616263e-07, + "loss": 0.6384079456329346, + "step": 5074 + }, + { + "epoch": 1.7883700440528636, + "grad_norm": 1.777234944410244, + "learning_rate": 6.088439187967865e-07, + "loss": 0.5699876546859741, + "step": 5075 + }, + { + "epoch": 1.7887224669603525, + "grad_norm": 2.081274535023766, + "learning_rate": 6.06843555537171e-07, + "loss": 0.6068697571754456, + "step": 5076 + }, + { + "epoch": 1.7890748898678415, + "grad_norm": 2.1233392160842066, + "learning_rate": 6.048463809618444e-07, + "loss": 0.6254304647445679, + "step": 5077 + }, + { + "epoch": 1.7894273127753304, + "grad_norm": 2.0059926594667914, + "learning_rate": 6.02852395748782e-07, + "loss": 0.6779477596282959, + "step": 5078 + }, + { + "epoch": 1.7897797356828193, + "grad_norm": 1.8024145072939486, + "learning_rate": 6.008616005748802e-07, + "loss": 0.6139817833900452, + "step": 5079 + }, + { + "epoch": 1.7901321585903083, + "grad_norm": 2.042935872875493, + "learning_rate": 5.988739961159539e-07, + "loss": 0.553310215473175, + "step": 5080 + }, + { + "epoch": 1.7904845814977972, + "grad_norm": 1.9543566497010472, + "learning_rate": 5.968895830467325e-07, + "loss": 0.6093542575836182, + "step": 5081 + }, + { + "epoch": 1.7908370044052864, + "grad_norm": 1.8231021161772492, + "learning_rate": 5.949083620408614e-07, + "loss": 0.6224432587623596, + "step": 5082 + }, + { + "epoch": 1.7911894273127753, + "grad_norm": 1.881995664144807, + "learning_rate": 5.929303337709047e-07, + "loss": 0.6155597567558289, + "step": 5083 + }, + { + "epoch": 1.7915418502202645, + "grad_norm": 1.7127795559170356, + "learning_rate": 5.909554989083411e-07, + "loss": 0.5742098093032837, + "step": 5084 + }, + { + "epoch": 1.7918942731277534, + "grad_norm": 2.1579790645115886, + "learning_rate": 5.889838581235641e-07, + "loss": 0.7427949905395508, + "step": 5085 + }, + { + "epoch": 1.7922466960352423, + "grad_norm": 1.8686834683482023, + "learning_rate": 5.870154120858851e-07, + "loss": 0.48208528757095337, + "step": 5086 + }, + { + "epoch": 1.7925991189427313, + "grad_norm": 2.103622298674757, + "learning_rate": 5.850501614635318e-07, + "loss": 0.48402148485183716, + "step": 5087 + }, + { + "epoch": 1.7929515418502202, + "grad_norm": 1.9085757415865392, + "learning_rate": 5.83088106923646e-07, + "loss": 0.6808921694755554, + "step": 5088 + }, + { + "epoch": 1.7933039647577091, + "grad_norm": 1.4851842618773352, + "learning_rate": 5.811292491322795e-07, + "loss": 0.48358428478240967, + "step": 5089 + }, + { + "epoch": 1.793656387665198, + "grad_norm": 1.801328000774117, + "learning_rate": 5.791735887544081e-07, + "loss": 0.6492827534675598, + "step": 5090 + }, + { + "epoch": 1.7940088105726872, + "grad_norm": 1.542873674028149, + "learning_rate": 5.772211264539162e-07, + "loss": 0.5453791618347168, + "step": 5091 + }, + { + "epoch": 1.7943612334801762, + "grad_norm": 1.780642500081645, + "learning_rate": 5.75271862893605e-07, + "loss": 0.5901151895523071, + "step": 5092 + }, + { + "epoch": 1.7947136563876653, + "grad_norm": 2.0888993209852664, + "learning_rate": 5.73325798735187e-07, + "loss": 0.616302490234375, + "step": 5093 + }, + { + "epoch": 1.7950660792951543, + "grad_norm": 1.7666548150635142, + "learning_rate": 5.713829346392907e-07, + "loss": 0.616886556148529, + "step": 5094 + }, + { + "epoch": 1.7954185022026432, + "grad_norm": 2.1253066780397725, + "learning_rate": 5.694432712654597e-07, + "loss": 0.5552375316619873, + "step": 5095 + }, + { + "epoch": 1.7957709251101321, + "grad_norm": 1.9305053090727797, + "learning_rate": 5.675068092721491e-07, + "loss": 0.5956143736839294, + "step": 5096 + }, + { + "epoch": 1.796123348017621, + "grad_norm": 2.0198097994194675, + "learning_rate": 5.655735493167247e-07, + "loss": 0.5870288610458374, + "step": 5097 + }, + { + "epoch": 1.79647577092511, + "grad_norm": 1.737470684820577, + "learning_rate": 5.636434920554701e-07, + "loss": 0.5325669646263123, + "step": 5098 + }, + { + "epoch": 1.7968281938325992, + "grad_norm": 1.9881595702868853, + "learning_rate": 5.617166381435813e-07, + "loss": 0.5931425094604492, + "step": 5099 + }, + { + "epoch": 1.797180616740088, + "grad_norm": 1.9607916445612916, + "learning_rate": 5.597929882351627e-07, + "loss": 0.5755603313446045, + "step": 5100 + }, + { + "epoch": 1.797533039647577, + "grad_norm": 2.000480246693455, + "learning_rate": 5.578725429832344e-07, + "loss": 0.5780980587005615, + "step": 5101 + }, + { + "epoch": 1.7978854625550662, + "grad_norm": 1.9982279321373282, + "learning_rate": 5.559553030397258e-07, + "loss": 0.5863890647888184, + "step": 5102 + }, + { + "epoch": 1.7982378854625551, + "grad_norm": 1.8196971349794717, + "learning_rate": 5.540412690554842e-07, + "loss": 0.5577390789985657, + "step": 5103 + }, + { + "epoch": 1.798590308370044, + "grad_norm": 1.773628551628446, + "learning_rate": 5.521304416802642e-07, + "loss": 0.5994857549667358, + "step": 5104 + }, + { + "epoch": 1.798942731277533, + "grad_norm": 1.8364843823531443, + "learning_rate": 5.502228215627281e-07, + "loss": 0.6065348982810974, + "step": 5105 + }, + { + "epoch": 1.799295154185022, + "grad_norm": 1.9447341697044171, + "learning_rate": 5.483184093504568e-07, + "loss": 0.5390498638153076, + "step": 5106 + }, + { + "epoch": 1.7996475770925109, + "grad_norm": 1.9731136151561257, + "learning_rate": 5.464172056899364e-07, + "loss": 0.5826783180236816, + "step": 5107 + }, + { + "epoch": 1.8, + "grad_norm": 1.7733740837200977, + "learning_rate": 5.445192112265718e-07, + "loss": 0.5429874658584595, + "step": 5108 + }, + { + "epoch": 1.800352422907489, + "grad_norm": 1.8521585290179927, + "learning_rate": 5.426244266046676e-07, + "loss": 0.5591466426849365, + "step": 5109 + }, + { + "epoch": 1.8007048458149781, + "grad_norm": 1.6996794293630604, + "learning_rate": 5.407328524674449e-07, + "loss": 0.5351911187171936, + "step": 5110 + }, + { + "epoch": 1.801057268722467, + "grad_norm": 1.9525068150093072, + "learning_rate": 5.388444894570378e-07, + "loss": 0.6095720529556274, + "step": 5111 + }, + { + "epoch": 1.801409691629956, + "grad_norm": 1.9048124225268466, + "learning_rate": 5.369593382144844e-07, + "loss": 0.6278849840164185, + "step": 5112 + }, + { + "epoch": 1.801762114537445, + "grad_norm": 1.932605893192458, + "learning_rate": 5.350773993797332e-07, + "loss": 0.6787056922912598, + "step": 5113 + }, + { + "epoch": 1.8021145374449339, + "grad_norm": 1.7901749162387552, + "learning_rate": 5.331986735916461e-07, + "loss": 0.6054684519767761, + "step": 5114 + }, + { + "epoch": 1.8024669603524228, + "grad_norm": 1.9918768270140568, + "learning_rate": 5.31323161487991e-07, + "loss": 0.5039973855018616, + "step": 5115 + }, + { + "epoch": 1.8028193832599118, + "grad_norm": 2.1203502988203207, + "learning_rate": 5.294508637054474e-07, + "loss": 0.6306504011154175, + "step": 5116 + }, + { + "epoch": 1.803171806167401, + "grad_norm": 1.5433818431075417, + "learning_rate": 5.275817808796013e-07, + "loss": 0.5654761791229248, + "step": 5117 + }, + { + "epoch": 1.8035242290748899, + "grad_norm": 1.84553610812893, + "learning_rate": 5.257159136449452e-07, + "loss": 0.5801905989646912, + "step": 5118 + }, + { + "epoch": 1.803876651982379, + "grad_norm": 1.9190330109285871, + "learning_rate": 5.238532626348891e-07, + "loss": 0.6565619707107544, + "step": 5119 + }, + { + "epoch": 1.804229074889868, + "grad_norm": 2.043183915925982, + "learning_rate": 5.219938284817416e-07, + "loss": 0.5923253297805786, + "step": 5120 + }, + { + "epoch": 1.8045814977973569, + "grad_norm": 2.0522176560055647, + "learning_rate": 5.2013761181672e-07, + "loss": 0.6697949171066284, + "step": 5121 + }, + { + "epoch": 1.8049339207048458, + "grad_norm": 1.5694231089682613, + "learning_rate": 5.182846132699571e-07, + "loss": 0.5146230459213257, + "step": 5122 + }, + { + "epoch": 1.8052863436123348, + "grad_norm": 1.8882278421308176, + "learning_rate": 5.16434833470485e-07, + "loss": 0.5928882360458374, + "step": 5123 + }, + { + "epoch": 1.8056387665198237, + "grad_norm": 1.8209325836560148, + "learning_rate": 5.145882730462481e-07, + "loss": 0.6114771366119385, + "step": 5124 + }, + { + "epoch": 1.8059911894273126, + "grad_norm": 2.0596769025893122, + "learning_rate": 5.127449326240952e-07, + "loss": 0.6624642014503479, + "step": 5125 + }, + { + "epoch": 1.8063436123348018, + "grad_norm": 1.6177669824438379, + "learning_rate": 5.109048128297822e-07, + "loss": 0.6277980208396912, + "step": 5126 + }, + { + "epoch": 1.8066960352422907, + "grad_norm": 1.8432956331440709, + "learning_rate": 5.090679142879751e-07, + "loss": 0.6470246911048889, + "step": 5127 + }, + { + "epoch": 1.8070484581497799, + "grad_norm": 1.9361376318593135, + "learning_rate": 5.072342376222438e-07, + "loss": 0.6418337821960449, + "step": 5128 + }, + { + "epoch": 1.8074008810572688, + "grad_norm": 1.7303831881097942, + "learning_rate": 5.054037834550596e-07, + "loss": 0.6013847589492798, + "step": 5129 + }, + { + "epoch": 1.8077533039647578, + "grad_norm": 2.0870369514809086, + "learning_rate": 5.035765524078095e-07, + "loss": 0.5354605913162231, + "step": 5130 + }, + { + "epoch": 1.8081057268722467, + "grad_norm": 1.7245482885328716, + "learning_rate": 5.01752545100781e-07, + "loss": 0.6017459034919739, + "step": 5131 + }, + { + "epoch": 1.8084581497797356, + "grad_norm": 2.1853671040659335, + "learning_rate": 4.999317621531663e-07, + "loss": 0.5929696559906006, + "step": 5132 + }, + { + "epoch": 1.8088105726872246, + "grad_norm": 2.1106102623060723, + "learning_rate": 4.981142041830645e-07, + "loss": 0.6444251537322998, + "step": 5133 + }, + { + "epoch": 1.8091629955947135, + "grad_norm": 1.9231094224982612, + "learning_rate": 4.962998718074807e-07, + "loss": 0.5854116678237915, + "step": 5134 + }, + { + "epoch": 1.8095154185022027, + "grad_norm": 1.674252446757184, + "learning_rate": 4.944887656423248e-07, + "loss": 0.5145394206047058, + "step": 5135 + }, + { + "epoch": 1.8098678414096916, + "grad_norm": 1.9221197947181823, + "learning_rate": 4.926808863024102e-07, + "loss": 0.5733104348182678, + "step": 5136 + }, + { + "epoch": 1.8102202643171807, + "grad_norm": 1.955048282910108, + "learning_rate": 4.908762344014573e-07, + "loss": 0.5925072431564331, + "step": 5137 + }, + { + "epoch": 1.8105726872246697, + "grad_norm": 1.8754640994406597, + "learning_rate": 4.890748105520859e-07, + "loss": 0.5346912145614624, + "step": 5138 + }, + { + "epoch": 1.8109251101321586, + "grad_norm": 1.636475505756285, + "learning_rate": 4.87276615365827e-07, + "loss": 0.6206755638122559, + "step": 5139 + }, + { + "epoch": 1.8112775330396476, + "grad_norm": 2.0734228349073076, + "learning_rate": 4.854816494531089e-07, + "loss": 0.5998660326004028, + "step": 5140 + }, + { + "epoch": 1.8116299559471365, + "grad_norm": 2.10222956499389, + "learning_rate": 4.836899134232687e-07, + "loss": 0.44545644521713257, + "step": 5141 + }, + { + "epoch": 1.8119823788546254, + "grad_norm": 1.904050289597462, + "learning_rate": 4.81901407884543e-07, + "loss": 0.701204776763916, + "step": 5142 + }, + { + "epoch": 1.8123348017621146, + "grad_norm": 1.8707530799436762, + "learning_rate": 4.801161334440762e-07, + "loss": 0.6103897094726562, + "step": 5143 + }, + { + "epoch": 1.8126872246696035, + "grad_norm": 1.7727850982789193, + "learning_rate": 4.783340907079126e-07, + "loss": 0.5864719152450562, + "step": 5144 + }, + { + "epoch": 1.8130396475770925, + "grad_norm": 1.436946543481978, + "learning_rate": 4.7655528028099916e-07, + "loss": 0.46949082612991333, + "step": 5145 + }, + { + "epoch": 1.8133920704845816, + "grad_norm": 1.9729708472080463, + "learning_rate": 4.7477970276718855e-07, + "loss": 0.6371885538101196, + "step": 5146 + }, + { + "epoch": 1.8137444933920706, + "grad_norm": 2.043577546107911, + "learning_rate": 4.730073587692319e-07, + "loss": 0.6819220781326294, + "step": 5147 + }, + { + "epoch": 1.8140969162995595, + "grad_norm": 1.7501541102560871, + "learning_rate": 4.712382488887868e-07, + "loss": 0.5230735540390015, + "step": 5148 + }, + { + "epoch": 1.8144493392070484, + "grad_norm": 1.6629154647812032, + "learning_rate": 4.6947237372640954e-07, + "loss": 0.5194997787475586, + "step": 5149 + }, + { + "epoch": 1.8148017621145374, + "grad_norm": 2.6396803493511842, + "learning_rate": 4.677097338815595e-07, + "loss": 0.6025055050849915, + "step": 5150 + }, + { + "epoch": 1.8151541850220263, + "grad_norm": 1.9158428969793393, + "learning_rate": 4.6595032995260135e-07, + "loss": 0.649467945098877, + "step": 5151 + }, + { + "epoch": 1.8155066079295155, + "grad_norm": 1.8951471308172565, + "learning_rate": 4.641941625367918e-07, + "loss": 0.5216347575187683, + "step": 5152 + }, + { + "epoch": 1.8158590308370044, + "grad_norm": 2.264572307408149, + "learning_rate": 4.6244123223030177e-07, + "loss": 0.5135647058486938, + "step": 5153 + }, + { + "epoch": 1.8162114537444936, + "grad_norm": 1.8178771999892822, + "learning_rate": 4.6069153962819193e-07, + "loss": 0.5526058673858643, + "step": 5154 + }, + { + "epoch": 1.8165638766519825, + "grad_norm": 2.050533288883353, + "learning_rate": 4.589450853244315e-07, + "loss": 0.5897486209869385, + "step": 5155 + }, + { + "epoch": 1.8169162995594714, + "grad_norm": 1.8009014119109743, + "learning_rate": 4.5720186991188517e-07, + "loss": 0.5698407888412476, + "step": 5156 + }, + { + "epoch": 1.8172687224669604, + "grad_norm": 1.7954864355128493, + "learning_rate": 4.5546189398232075e-07, + "loss": 0.579573392868042, + "step": 5157 + }, + { + "epoch": 1.8176211453744493, + "grad_norm": 1.7473651992455344, + "learning_rate": 4.5372515812640573e-07, + "loss": 0.41852182149887085, + "step": 5158 + }, + { + "epoch": 1.8179735682819382, + "grad_norm": 1.7056493552996725, + "learning_rate": 4.519916629337107e-07, + "loss": 0.6081204414367676, + "step": 5159 + }, + { + "epoch": 1.8183259911894272, + "grad_norm": 2.046109798166009, + "learning_rate": 4.502614089926982e-07, + "loss": 0.5725652575492859, + "step": 5160 + }, + { + "epoch": 1.8186784140969163, + "grad_norm": 1.7147916989755474, + "learning_rate": 4.4853439689073965e-07, + "loss": 0.5109303593635559, + "step": 5161 + }, + { + "epoch": 1.8190308370044053, + "grad_norm": 1.8721629996812361, + "learning_rate": 4.468106272141004e-07, + "loss": 0.5647833347320557, + "step": 5162 + }, + { + "epoch": 1.8193832599118944, + "grad_norm": 1.8784402680779348, + "learning_rate": 4.450901005479469e-07, + "loss": 0.6074738502502441, + "step": 5163 + }, + { + "epoch": 1.8197356828193834, + "grad_norm": 1.9135972387212516, + "learning_rate": 4.433728174763452e-07, + "loss": 0.647289514541626, + "step": 5164 + }, + { + "epoch": 1.8200881057268723, + "grad_norm": 2.08976454113542, + "learning_rate": 4.416587785822568e-07, + "loss": 0.5817590951919556, + "step": 5165 + }, + { + "epoch": 1.8204405286343612, + "grad_norm": 2.105714289057314, + "learning_rate": 4.399479844475485e-07, + "loss": 0.6483672857284546, + "step": 5166 + }, + { + "epoch": 1.8207929515418502, + "grad_norm": 1.9562649517319024, + "learning_rate": 4.382404356529801e-07, + "loss": 0.5439441204071045, + "step": 5167 + }, + { + "epoch": 1.821145374449339, + "grad_norm": 1.8467126365486348, + "learning_rate": 4.3653613277820804e-07, + "loss": 0.5835710167884827, + "step": 5168 + }, + { + "epoch": 1.821497797356828, + "grad_norm": 1.9450074521030982, + "learning_rate": 4.3483507640179503e-07, + "loss": 0.7024152874946594, + "step": 5169 + }, + { + "epoch": 1.8218502202643172, + "grad_norm": 1.880332916659811, + "learning_rate": 4.331372671011935e-07, + "loss": 0.5223513841629028, + "step": 5170 + }, + { + "epoch": 1.8222026431718061, + "grad_norm": 2.771814545513559, + "learning_rate": 4.3144270545275814e-07, + "loss": 0.5975688099861145, + "step": 5171 + }, + { + "epoch": 1.8225550660792953, + "grad_norm": 1.5329834705964882, + "learning_rate": 4.2975139203173977e-07, + "loss": 0.5459109544754028, + "step": 5172 + }, + { + "epoch": 1.8229074889867842, + "grad_norm": 1.8202354421886453, + "learning_rate": 4.2806332741228586e-07, + "loss": 0.6155862808227539, + "step": 5173 + }, + { + "epoch": 1.8232599118942732, + "grad_norm": 2.2226946714753644, + "learning_rate": 4.263785121674435e-07, + "loss": 0.6505374908447266, + "step": 5174 + }, + { + "epoch": 1.823612334801762, + "grad_norm": 1.9153455724722082, + "learning_rate": 4.246969468691553e-07, + "loss": 0.5243734121322632, + "step": 5175 + }, + { + "epoch": 1.823964757709251, + "grad_norm": 1.8732488601912396, + "learning_rate": 4.2301863208825676e-07, + "loss": 0.6931817531585693, + "step": 5176 + }, + { + "epoch": 1.82431718061674, + "grad_norm": 1.969859922329015, + "learning_rate": 4.2134356839448665e-07, + "loss": 0.5312765836715698, + "step": 5177 + }, + { + "epoch": 1.824669603524229, + "grad_norm": 1.9404158745446412, + "learning_rate": 4.1967175635647674e-07, + "loss": 0.598992109298706, + "step": 5178 + }, + { + "epoch": 1.825022026431718, + "grad_norm": 1.7631344780586065, + "learning_rate": 4.1800319654175413e-07, + "loss": 0.5844708681106567, + "step": 5179 + }, + { + "epoch": 1.825374449339207, + "grad_norm": 1.9995354508958225, + "learning_rate": 4.1633788951674357e-07, + "loss": 0.5884612798690796, + "step": 5180 + }, + { + "epoch": 1.8257268722466962, + "grad_norm": 1.72810410086028, + "learning_rate": 4.1467583584676395e-07, + "loss": 0.6038404107093811, + "step": 5181 + }, + { + "epoch": 1.826079295154185, + "grad_norm": 2.339259211755874, + "learning_rate": 4.130170360960317e-07, + "loss": 0.6511296033859253, + "step": 5182 + }, + { + "epoch": 1.826431718061674, + "grad_norm": 1.925197944351106, + "learning_rate": 4.113614908276609e-07, + "loss": 0.5884404182434082, + "step": 5183 + }, + { + "epoch": 1.826784140969163, + "grad_norm": 1.731239361884253, + "learning_rate": 4.097092006036507e-07, + "loss": 0.5549901723861694, + "step": 5184 + }, + { + "epoch": 1.827136563876652, + "grad_norm": 1.994782951411243, + "learning_rate": 4.0806016598490707e-07, + "loss": 0.561951756477356, + "step": 5185 + }, + { + "epoch": 1.8274889867841408, + "grad_norm": 1.869408348764558, + "learning_rate": 4.064143875312254e-07, + "loss": 0.6412413120269775, + "step": 5186 + }, + { + "epoch": 1.82784140969163, + "grad_norm": 1.6798143654231001, + "learning_rate": 4.0477186580129447e-07, + "loss": 0.6295674443244934, + "step": 5187 + }, + { + "epoch": 1.828193832599119, + "grad_norm": 1.6293958799120483, + "learning_rate": 4.031326013527015e-07, + "loss": 0.6700723767280579, + "step": 5188 + }, + { + "epoch": 1.8285462555066079, + "grad_norm": 1.8215522719850648, + "learning_rate": 4.014965947419236e-07, + "loss": 0.5758254528045654, + "step": 5189 + }, + { + "epoch": 1.828898678414097, + "grad_norm": 1.9932829475641192, + "learning_rate": 3.9986384652433654e-07, + "loss": 0.6663509607315063, + "step": 5190 + }, + { + "epoch": 1.829251101321586, + "grad_norm": 1.9935453293677252, + "learning_rate": 3.982343572542069e-07, + "loss": 0.6459337472915649, + "step": 5191 + }, + { + "epoch": 1.829603524229075, + "grad_norm": 1.854876606446137, + "learning_rate": 3.9660812748469336e-07, + "loss": 0.6411766409873962, + "step": 5192 + }, + { + "epoch": 1.8299559471365638, + "grad_norm": 2.1651745240120976, + "learning_rate": 3.9498515776785207e-07, + "loss": 0.711888313293457, + "step": 5193 + }, + { + "epoch": 1.8303083700440528, + "grad_norm": 2.2389356684810284, + "learning_rate": 3.933654486546312e-07, + "loss": 0.63288813829422, + "step": 5194 + }, + { + "epoch": 1.8306607929515417, + "grad_norm": 1.9048245223498055, + "learning_rate": 3.9174900069486985e-07, + "loss": 0.6330822706222534, + "step": 5195 + }, + { + "epoch": 1.8310132158590309, + "grad_norm": 2.0831179708663154, + "learning_rate": 3.901358144373035e-07, + "loss": 0.7242149114608765, + "step": 5196 + }, + { + "epoch": 1.8313656387665198, + "grad_norm": 1.8790323108631095, + "learning_rate": 3.885258904295575e-07, + "loss": 0.6741703748703003, + "step": 5197 + }, + { + "epoch": 1.831718061674009, + "grad_norm": 1.9200909143991698, + "learning_rate": 3.8691922921815226e-07, + "loss": 0.625057578086853, + "step": 5198 + }, + { + "epoch": 1.832070484581498, + "grad_norm": 2.457846968244059, + "learning_rate": 3.853158313484995e-07, + "loss": 0.673669159412384, + "step": 5199 + }, + { + "epoch": 1.8324229074889868, + "grad_norm": 1.7310768756301407, + "learning_rate": 3.837156973648992e-07, + "loss": 0.5981203317642212, + "step": 5200 + }, + { + "epoch": 1.8327753303964758, + "grad_norm": 2.2560941225086992, + "learning_rate": 3.821188278105514e-07, + "loss": 0.6577199697494507, + "step": 5201 + }, + { + "epoch": 1.8331277533039647, + "grad_norm": 1.8570769012933126, + "learning_rate": 3.805252232275414e-07, + "loss": 0.6951043605804443, + "step": 5202 + }, + { + "epoch": 1.8334801762114536, + "grad_norm": 1.874325920944958, + "learning_rate": 3.7893488415684964e-07, + "loss": 0.572435200214386, + "step": 5203 + }, + { + "epoch": 1.8338325991189426, + "grad_norm": 1.7906206085216059, + "learning_rate": 3.773478111383455e-07, + "loss": 0.5849496126174927, + "step": 5204 + }, + { + "epoch": 1.8341850220264317, + "grad_norm": 1.9908368337543014, + "learning_rate": 3.7576400471079023e-07, + "loss": 0.5380967855453491, + "step": 5205 + }, + { + "epoch": 1.8345374449339207, + "grad_norm": 1.7322293442190257, + "learning_rate": 3.7418346541183923e-07, + "loss": 0.5681222677230835, + "step": 5206 + }, + { + "epoch": 1.8348898678414098, + "grad_norm": 1.7551676131968534, + "learning_rate": 3.7260619377803677e-07, + "loss": 0.5012099146842957, + "step": 5207 + }, + { + "epoch": 1.8352422907488988, + "grad_norm": 1.9889231090545432, + "learning_rate": 3.710321903448133e-07, + "loss": 0.6175205707550049, + "step": 5208 + }, + { + "epoch": 1.8355947136563877, + "grad_norm": 2.0658320822662137, + "learning_rate": 3.6946145564649817e-07, + "loss": 0.6190954446792603, + "step": 5209 + }, + { + "epoch": 1.8359471365638766, + "grad_norm": 2.067936609981899, + "learning_rate": 3.678939902163048e-07, + "loss": 0.6820691823959351, + "step": 5210 + }, + { + "epoch": 1.8362995594713656, + "grad_norm": 1.6116358163190896, + "learning_rate": 3.6632979458633867e-07, + "loss": 0.5309683084487915, + "step": 5211 + }, + { + "epoch": 1.8366519823788545, + "grad_norm": 1.7416007879814253, + "learning_rate": 3.6476886928759726e-07, + "loss": 0.5110820531845093, + "step": 5212 + }, + { + "epoch": 1.8370044052863435, + "grad_norm": 1.723221372899004, + "learning_rate": 3.6321121484996447e-07, + "loss": 0.6226333975791931, + "step": 5213 + }, + { + "epoch": 1.8373568281938326, + "grad_norm": 2.234178040191492, + "learning_rate": 3.6165683180221735e-07, + "loss": 0.6287777423858643, + "step": 5214 + }, + { + "epoch": 1.8377092511013216, + "grad_norm": 1.9295755553308827, + "learning_rate": 3.601057206720182e-07, + "loss": 0.7033661603927612, + "step": 5215 + }, + { + "epoch": 1.8380616740088107, + "grad_norm": 2.3805238150126473, + "learning_rate": 3.5855788198592257e-07, + "loss": 0.5841168165206909, + "step": 5216 + }, + { + "epoch": 1.8384140969162996, + "grad_norm": 1.9475866760038651, + "learning_rate": 3.570133162693734e-07, + "loss": 0.6797176599502563, + "step": 5217 + }, + { + "epoch": 1.8387665198237886, + "grad_norm": 1.8282916435885754, + "learning_rate": 3.5547202404670246e-07, + "loss": 0.4317880868911743, + "step": 5218 + }, + { + "epoch": 1.8391189427312775, + "grad_norm": 1.8334146730463823, + "learning_rate": 3.5393400584113004e-07, + "loss": 0.4757443368434906, + "step": 5219 + }, + { + "epoch": 1.8394713656387665, + "grad_norm": 1.907804753373484, + "learning_rate": 3.5239926217476627e-07, + "loss": 0.6341856718063354, + "step": 5220 + }, + { + "epoch": 1.8398237885462554, + "grad_norm": 1.8320811149781473, + "learning_rate": 3.5086779356860777e-07, + "loss": 0.5401504039764404, + "step": 5221 + }, + { + "epoch": 1.8401762114537445, + "grad_norm": 1.9485378653698677, + "learning_rate": 3.4933960054254314e-07, + "loss": 0.507185697555542, + "step": 5222 + }, + { + "epoch": 1.8405286343612335, + "grad_norm": 1.8475072625751607, + "learning_rate": 3.478146836153418e-07, + "loss": 0.544599175453186, + "step": 5223 + }, + { + "epoch": 1.8408810572687224, + "grad_norm": 1.7516560167770228, + "learning_rate": 3.4629304330466964e-07, + "loss": 0.5231183767318726, + "step": 5224 + }, + { + "epoch": 1.8412334801762116, + "grad_norm": 1.9594972590005177, + "learning_rate": 3.447746801270746e-07, + "loss": 0.5505118370056152, + "step": 5225 + }, + { + "epoch": 1.8415859030837005, + "grad_norm": 1.8779318369867126, + "learning_rate": 3.432595945979944e-07, + "loss": 0.6056097149848938, + "step": 5226 + }, + { + "epoch": 1.8419383259911895, + "grad_norm": 2.1828814894071806, + "learning_rate": 3.4174778723175204e-07, + "loss": 0.6292518377304077, + "step": 5227 + }, + { + "epoch": 1.8422907488986784, + "grad_norm": 2.121254282924953, + "learning_rate": 3.4023925854156035e-07, + "loss": 0.6821235418319702, + "step": 5228 + }, + { + "epoch": 1.8426431718061673, + "grad_norm": 1.8646887822875091, + "learning_rate": 3.3873400903951636e-07, + "loss": 0.6663388013839722, + "step": 5229 + }, + { + "epoch": 1.8429955947136563, + "grad_norm": 1.7699721471254064, + "learning_rate": 3.3723203923660795e-07, + "loss": 0.5283368825912476, + "step": 5230 + }, + { + "epoch": 1.8433480176211454, + "grad_norm": 1.8757843861417383, + "learning_rate": 3.35733349642704e-07, + "loss": 0.6193508505821228, + "step": 5231 + }, + { + "epoch": 1.8437004405286344, + "grad_norm": 1.8277200643148488, + "learning_rate": 3.3423794076656635e-07, + "loss": 0.5790667533874512, + "step": 5232 + }, + { + "epoch": 1.8440528634361235, + "grad_norm": 1.8773326611638317, + "learning_rate": 3.3274581311583786e-07, + "loss": 0.5774649381637573, + "step": 5233 + }, + { + "epoch": 1.8444052863436124, + "grad_norm": 1.8907427086265292, + "learning_rate": 3.312569671970489e-07, + "loss": 0.7818938493728638, + "step": 5234 + }, + { + "epoch": 1.8447577092511014, + "grad_norm": 1.9327729742836703, + "learning_rate": 3.297714035156174e-07, + "loss": 0.7140024900436401, + "step": 5235 + }, + { + "epoch": 1.8451101321585903, + "grad_norm": 1.8813227413168874, + "learning_rate": 3.2828912257584664e-07, + "loss": 0.526549220085144, + "step": 5236 + }, + { + "epoch": 1.8454625550660793, + "grad_norm": 1.7801884231788352, + "learning_rate": 3.268101248809219e-07, + "loss": 0.5497986078262329, + "step": 5237 + }, + { + "epoch": 1.8458149779735682, + "grad_norm": 1.8669723447216968, + "learning_rate": 3.2533441093292153e-07, + "loss": 0.587260901927948, + "step": 5238 + }, + { + "epoch": 1.8461674008810571, + "grad_norm": 1.7543011465942289, + "learning_rate": 3.238619812327992e-07, + "loss": 0.6064329147338867, + "step": 5239 + }, + { + "epoch": 1.8465198237885463, + "grad_norm": 1.6866654405083865, + "learning_rate": 3.22392836280403e-07, + "loss": 0.5427783727645874, + "step": 5240 + }, + { + "epoch": 1.8468722466960352, + "grad_norm": 2.007154381007414, + "learning_rate": 3.209269765744605e-07, + "loss": 0.6315155029296875, + "step": 5241 + }, + { + "epoch": 1.8472246696035244, + "grad_norm": 1.8683798567232428, + "learning_rate": 3.194644026125848e-07, + "loss": 0.47614991664886475, + "step": 5242 + }, + { + "epoch": 1.8475770925110133, + "grad_norm": 1.7870378472192856, + "learning_rate": 3.1800511489127553e-07, + "loss": 0.4671345353126526, + "step": 5243 + }, + { + "epoch": 1.8479295154185023, + "grad_norm": 2.1401583736619774, + "learning_rate": 3.1654911390591404e-07, + "loss": 0.5751510262489319, + "step": 5244 + }, + { + "epoch": 1.8482819383259912, + "grad_norm": 1.8052174793154305, + "learning_rate": 3.1509640015076946e-07, + "loss": 0.41024816036224365, + "step": 5245 + }, + { + "epoch": 1.8486343612334801, + "grad_norm": 1.731551636677765, + "learning_rate": 3.136469741189918e-07, + "loss": 0.5401195287704468, + "step": 5246 + }, + { + "epoch": 1.848986784140969, + "grad_norm": 1.653370854405324, + "learning_rate": 3.1220083630261413e-07, + "loss": 0.526515007019043, + "step": 5247 + }, + { + "epoch": 1.849339207048458, + "grad_norm": 1.8913718815401968, + "learning_rate": 3.1075798719255813e-07, + "loss": 0.5476140975952148, + "step": 5248 + }, + { + "epoch": 1.8496916299559472, + "grad_norm": 1.8985078398075201, + "learning_rate": 3.093184272786254e-07, + "loss": 0.5542911291122437, + "step": 5249 + }, + { + "epoch": 1.850044052863436, + "grad_norm": 1.880723497688654, + "learning_rate": 3.078821570495005e-07, + "loss": 0.5147569179534912, + "step": 5250 + }, + { + "epoch": 1.8503964757709253, + "grad_norm": 1.982026450369604, + "learning_rate": 3.0644917699275355e-07, + "loss": 0.5774611830711365, + "step": 5251 + }, + { + "epoch": 1.8507488986784142, + "grad_norm": 1.7200421440570042, + "learning_rate": 3.0501948759483646e-07, + "loss": 0.6516300439834595, + "step": 5252 + }, + { + "epoch": 1.8511013215859031, + "grad_norm": 2.0195950340864495, + "learning_rate": 3.0359308934108435e-07, + "loss": 0.7598013877868652, + "step": 5253 + }, + { + "epoch": 1.851453744493392, + "grad_norm": 2.0638022912417506, + "learning_rate": 3.0216998271571653e-07, + "loss": 0.5605336427688599, + "step": 5254 + }, + { + "epoch": 1.851806167400881, + "grad_norm": 2.028778763216705, + "learning_rate": 3.007501682018288e-07, + "loss": 0.6549514532089233, + "step": 5255 + }, + { + "epoch": 1.85215859030837, + "grad_norm": 2.059939172990393, + "learning_rate": 2.993336462814089e-07, + "loss": 0.5390901565551758, + "step": 5256 + }, + { + "epoch": 1.8525110132158589, + "grad_norm": 1.812559235788011, + "learning_rate": 2.979204174353201e-07, + "loss": 0.5039275884628296, + "step": 5257 + }, + { + "epoch": 1.852863436123348, + "grad_norm": 1.6793203683546194, + "learning_rate": 2.9651048214330956e-07, + "loss": 0.4715292453765869, + "step": 5258 + }, + { + "epoch": 1.853215859030837, + "grad_norm": 1.5445048853459802, + "learning_rate": 2.951038408840068e-07, + "loss": 0.4593687653541565, + "step": 5259 + }, + { + "epoch": 1.8535682819383261, + "grad_norm": 2.427211613937901, + "learning_rate": 2.9370049413492084e-07, + "loss": 0.8451346158981323, + "step": 5260 + }, + { + "epoch": 1.853920704845815, + "grad_norm": 1.796887553027914, + "learning_rate": 2.923004423724474e-07, + "loss": 0.5567130446434021, + "step": 5261 + }, + { + "epoch": 1.854273127753304, + "grad_norm": 1.6019285108338794, + "learning_rate": 2.909036860718595e-07, + "loss": 0.4740293622016907, + "step": 5262 + }, + { + "epoch": 1.854625550660793, + "grad_norm": 1.566732286884799, + "learning_rate": 2.895102257073101e-07, + "loss": 0.5279378294944763, + "step": 5263 + }, + { + "epoch": 1.8549779735682819, + "grad_norm": 2.0699049521167923, + "learning_rate": 2.881200617518387e-07, + "loss": 0.5977471470832825, + "step": 5264 + }, + { + "epoch": 1.8553303964757708, + "grad_norm": 2.147594228172352, + "learning_rate": 2.8673319467736104e-07, + "loss": 0.5385996699333191, + "step": 5265 + }, + { + "epoch": 1.85568281938326, + "grad_norm": 2.011382389323699, + "learning_rate": 2.85349624954675e-07, + "loss": 0.5702279806137085, + "step": 5266 + }, + { + "epoch": 1.856035242290749, + "grad_norm": 1.875774247263156, + "learning_rate": 2.839693530534604e-07, + "loss": 0.584097146987915, + "step": 5267 + }, + { + "epoch": 1.8563876651982378, + "grad_norm": 1.9561416110933127, + "learning_rate": 2.825923794422758e-07, + "loss": 0.6205782890319824, + "step": 5268 + }, + { + "epoch": 1.856740088105727, + "grad_norm": 1.8766933117628495, + "learning_rate": 2.8121870458856284e-07, + "loss": 0.5626852512359619, + "step": 5269 + }, + { + "epoch": 1.857092511013216, + "grad_norm": 1.826792073608219, + "learning_rate": 2.798483289586396e-07, + "loss": 0.6052513122558594, + "step": 5270 + }, + { + "epoch": 1.8574449339207049, + "grad_norm": 2.051566447554152, + "learning_rate": 2.7848125301770504e-07, + "loss": 0.5074095726013184, + "step": 5271 + }, + { + "epoch": 1.8577973568281938, + "grad_norm": 2.3608926664844705, + "learning_rate": 2.7711747722984127e-07, + "loss": 0.8006119728088379, + "step": 5272 + }, + { + "epoch": 1.8581497797356827, + "grad_norm": 1.939365874771501, + "learning_rate": 2.7575700205800694e-07, + "loss": 0.6437188982963562, + "step": 5273 + }, + { + "epoch": 1.8585022026431717, + "grad_norm": 2.070323156152843, + "learning_rate": 2.743998279640403e-07, + "loss": 0.6610177755355835, + "step": 5274 + }, + { + "epoch": 1.8588546255506608, + "grad_norm": 2.242727394045801, + "learning_rate": 2.7304595540865953e-07, + "loss": 0.6041977405548096, + "step": 5275 + }, + { + "epoch": 1.8592070484581498, + "grad_norm": 2.296252009493085, + "learning_rate": 2.716953848514625e-07, + "loss": 0.5684002041816711, + "step": 5276 + }, + { + "epoch": 1.859559471365639, + "grad_norm": 2.108426771462305, + "learning_rate": 2.703481167509281e-07, + "loss": 0.7256498336791992, + "step": 5277 + }, + { + "epoch": 1.8599118942731279, + "grad_norm": 1.959590007863519, + "learning_rate": 2.690041515644093e-07, + "loss": 0.7264266014099121, + "step": 5278 + }, + { + "epoch": 1.8602643171806168, + "grad_norm": 2.0027244373685047, + "learning_rate": 2.6766348974813895e-07, + "loss": 0.5427879095077515, + "step": 5279 + }, + { + "epoch": 1.8606167400881057, + "grad_norm": 1.679848534564951, + "learning_rate": 2.663261317572341e-07, + "loss": 0.5970745086669922, + "step": 5280 + }, + { + "epoch": 1.8609691629955947, + "grad_norm": 1.9989999209106484, + "learning_rate": 2.6499207804568495e-07, + "loss": 0.5796299576759338, + "step": 5281 + }, + { + "epoch": 1.8613215859030836, + "grad_norm": 1.6433355014728201, + "learning_rate": 2.6366132906635923e-07, + "loss": 0.4900246262550354, + "step": 5282 + }, + { + "epoch": 1.8616740088105725, + "grad_norm": 1.8937189873731617, + "learning_rate": 2.6233388527100777e-07, + "loss": 0.6052582263946533, + "step": 5283 + }, + { + "epoch": 1.8620264317180617, + "grad_norm": 2.1632344831004127, + "learning_rate": 2.610097471102524e-07, + "loss": 0.6908484697341919, + "step": 5284 + }, + { + "epoch": 1.8623788546255506, + "grad_norm": 1.9493448159947622, + "learning_rate": 2.596889150336024e-07, + "loss": 0.6353795528411865, + "step": 5285 + }, + { + "epoch": 1.8627312775330398, + "grad_norm": 2.019445353702499, + "learning_rate": 2.5837138948943354e-07, + "loss": 0.803575873374939, + "step": 5286 + }, + { + "epoch": 1.8630837004405287, + "grad_norm": 1.9882041113358364, + "learning_rate": 2.5705717092500694e-07, + "loss": 0.5551957488059998, + "step": 5287 + }, + { + "epoch": 1.8634361233480177, + "grad_norm": 1.9987103830633048, + "learning_rate": 2.5574625978646017e-07, + "loss": 0.6247879266738892, + "step": 5288 + }, + { + "epoch": 1.8637885462555066, + "grad_norm": 2.072117287811421, + "learning_rate": 2.544386565188062e-07, + "loss": 0.6029977798461914, + "step": 5289 + }, + { + "epoch": 1.8641409691629955, + "grad_norm": 2.101747258049668, + "learning_rate": 2.531343615659343e-07, + "loss": 0.611297070980072, + "step": 5290 + }, + { + "epoch": 1.8644933920704845, + "grad_norm": 2.1168170865355616, + "learning_rate": 2.518333753706137e-07, + "loss": 0.5290260314941406, + "step": 5291 + }, + { + "epoch": 1.8648458149779734, + "grad_norm": 1.88270236786552, + "learning_rate": 2.5053569837448664e-07, + "loss": 0.5988795757293701, + "step": 5292 + }, + { + "epoch": 1.8651982378854626, + "grad_norm": 2.1933893236783613, + "learning_rate": 2.4924133101807636e-07, + "loss": 0.671028733253479, + "step": 5293 + }, + { + "epoch": 1.8655506607929515, + "grad_norm": 2.195163128107634, + "learning_rate": 2.4795027374077905e-07, + "loss": 0.5741167664527893, + "step": 5294 + }, + { + "epoch": 1.8659030837004407, + "grad_norm": 1.8793688638635475, + "learning_rate": 2.4666252698086867e-07, + "loss": 0.47447216510772705, + "step": 5295 + }, + { + "epoch": 1.8662555066079296, + "grad_norm": 1.813537542020307, + "learning_rate": 2.453780911754955e-07, + "loss": 0.6535651087760925, + "step": 5296 + }, + { + "epoch": 1.8666079295154185, + "grad_norm": 1.830958965071389, + "learning_rate": 2.4409696676068517e-07, + "loss": 0.5928847193717957, + "step": 5297 + }, + { + "epoch": 1.8669603524229075, + "grad_norm": 2.1016696944101363, + "learning_rate": 2.428191541713387e-07, + "loss": 0.5928774476051331, + "step": 5298 + }, + { + "epoch": 1.8673127753303964, + "grad_norm": 1.8181831294339377, + "learning_rate": 2.415446538412358e-07, + "loss": 0.5798670053482056, + "step": 5299 + }, + { + "epoch": 1.8676651982378853, + "grad_norm": 1.8162014512536164, + "learning_rate": 2.4027346620302707e-07, + "loss": 0.6222843527793884, + "step": 5300 + }, + { + "epoch": 1.8680176211453743, + "grad_norm": 1.9183032685045331, + "learning_rate": 2.39005591688245e-07, + "loss": 0.5501612424850464, + "step": 5301 + }, + { + "epoch": 1.8683700440528634, + "grad_norm": 1.7621857286720093, + "learning_rate": 2.377410307272887e-07, + "loss": 0.5266422033309937, + "step": 5302 + }, + { + "epoch": 1.8687224669603524, + "grad_norm": 1.9926692528436012, + "learning_rate": 2.3647978374944037e-07, + "loss": 0.7145729064941406, + "step": 5303 + }, + { + "epoch": 1.8690748898678415, + "grad_norm": 1.8939089473542137, + "learning_rate": 2.3522185118285411e-07, + "loss": 0.6505781412124634, + "step": 5304 + }, + { + "epoch": 1.8694273127753305, + "grad_norm": 2.0817226286854607, + "learning_rate": 2.3396723345455728e-07, + "loss": 0.6278528571128845, + "step": 5305 + }, + { + "epoch": 1.8697797356828194, + "grad_norm": 1.790557343760165, + "learning_rate": 2.3271593099045475e-07, + "loss": 0.5650503039360046, + "step": 5306 + }, + { + "epoch": 1.8701321585903083, + "grad_norm": 1.6157546701422072, + "learning_rate": 2.314679442153256e-07, + "loss": 0.6267939209938049, + "step": 5307 + }, + { + "epoch": 1.8704845814977973, + "grad_norm": 1.874302486649101, + "learning_rate": 2.302232735528187e-07, + "loss": 0.45913875102996826, + "step": 5308 + }, + { + "epoch": 1.8708370044052862, + "grad_norm": 1.7607480001908633, + "learning_rate": 2.289819194254661e-07, + "loss": 0.6122059226036072, + "step": 5309 + }, + { + "epoch": 1.8711894273127754, + "grad_norm": 1.803806841150382, + "learning_rate": 2.2774388225466514e-07, + "loss": 0.6479405164718628, + "step": 5310 + }, + { + "epoch": 1.8715418502202643, + "grad_norm": 1.8546829656575279, + "learning_rate": 2.26509162460693e-07, + "loss": 0.5013849139213562, + "step": 5311 + }, + { + "epoch": 1.8718942731277532, + "grad_norm": 1.749663744266161, + "learning_rate": 2.2527776046269767e-07, + "loss": 0.6431373357772827, + "step": 5312 + }, + { + "epoch": 1.8722466960352424, + "grad_norm": 1.669095711801791, + "learning_rate": 2.2404967667870147e-07, + "loss": 0.6447317004203796, + "step": 5313 + }, + { + "epoch": 1.8725991189427313, + "grad_norm": 2.405218866271529, + "learning_rate": 2.2282491152560203e-07, + "loss": 0.5784682631492615, + "step": 5314 + }, + { + "epoch": 1.8729515418502203, + "grad_norm": 1.7544004376252713, + "learning_rate": 2.2160346541916677e-07, + "loss": 0.560835599899292, + "step": 5315 + }, + { + "epoch": 1.8733039647577092, + "grad_norm": 1.7162975954294335, + "learning_rate": 2.2038533877404066e-07, + "loss": 0.5930913686752319, + "step": 5316 + }, + { + "epoch": 1.8736563876651982, + "grad_norm": 1.9892540663354406, + "learning_rate": 2.1917053200374073e-07, + "loss": 0.7221095561981201, + "step": 5317 + }, + { + "epoch": 1.874008810572687, + "grad_norm": 1.9380281400359725, + "learning_rate": 2.179590455206515e-07, + "loss": 0.6307567358016968, + "step": 5318 + }, + { + "epoch": 1.8743612334801762, + "grad_norm": 2.0190052317760814, + "learning_rate": 2.167508797360396e-07, + "loss": 0.6158597469329834, + "step": 5319 + }, + { + "epoch": 1.8747136563876652, + "grad_norm": 1.7468326387459954, + "learning_rate": 2.1554603506003802e-07, + "loss": 0.5778557062149048, + "step": 5320 + }, + { + "epoch": 1.8750660792951543, + "grad_norm": 1.497372593580549, + "learning_rate": 2.1434451190165294e-07, + "loss": 0.5213632583618164, + "step": 5321 + }, + { + "epoch": 1.8754185022026433, + "grad_norm": 1.8555907678767487, + "learning_rate": 2.131463106687659e-07, + "loss": 0.6633203029632568, + "step": 5322 + }, + { + "epoch": 1.8757709251101322, + "grad_norm": 1.9991798348617227, + "learning_rate": 2.1195143176812817e-07, + "loss": 0.6586780548095703, + "step": 5323 + }, + { + "epoch": 1.8761233480176212, + "grad_norm": 1.991978810673319, + "learning_rate": 2.1075987560536305e-07, + "loss": 0.4946047067642212, + "step": 5324 + }, + { + "epoch": 1.87647577092511, + "grad_norm": 1.6744690075916624, + "learning_rate": 2.0957164258497031e-07, + "loss": 0.5689302682876587, + "step": 5325 + }, + { + "epoch": 1.876828193832599, + "grad_norm": 1.9550201402383367, + "learning_rate": 2.0838673311031287e-07, + "loss": 0.5761843323707581, + "step": 5326 + }, + { + "epoch": 1.877180616740088, + "grad_norm": 1.6070623974889393, + "learning_rate": 2.0720514758363343e-07, + "loss": 0.5714447498321533, + "step": 5327 + }, + { + "epoch": 1.8775330396475771, + "grad_norm": 1.7537019465709125, + "learning_rate": 2.0602688640604441e-07, + "loss": 0.4566301107406616, + "step": 5328 + }, + { + "epoch": 1.877885462555066, + "grad_norm": 2.110089760102471, + "learning_rate": 2.04851949977527e-07, + "loss": 0.6326137781143188, + "step": 5329 + }, + { + "epoch": 1.8782378854625552, + "grad_norm": 1.8775980517302555, + "learning_rate": 2.036803386969355e-07, + "loss": 0.6342206001281738, + "step": 5330 + }, + { + "epoch": 1.8785903083700441, + "grad_norm": 1.9958405881870251, + "learning_rate": 2.0251205296199616e-07, + "loss": 0.5525872707366943, + "step": 5331 + }, + { + "epoch": 1.878942731277533, + "grad_norm": 1.6965395036886874, + "learning_rate": 2.0134709316930733e-07, + "loss": 0.4932950735092163, + "step": 5332 + }, + { + "epoch": 1.879295154185022, + "grad_norm": 1.7918605717870588, + "learning_rate": 2.001854597143349e-07, + "loss": 0.6526485681533813, + "step": 5333 + }, + { + "epoch": 1.879647577092511, + "grad_norm": 1.8862781919579625, + "learning_rate": 1.990271529914156e-07, + "loss": 0.6256940960884094, + "step": 5334 + }, + { + "epoch": 1.88, + "grad_norm": 2.361417623387243, + "learning_rate": 1.9787217339376053e-07, + "loss": 0.6406987905502319, + "step": 5335 + }, + { + "epoch": 1.8803524229074888, + "grad_norm": 1.812802653812012, + "learning_rate": 1.9672052131345043e-07, + "loss": 0.6141321659088135, + "step": 5336 + }, + { + "epoch": 1.880704845814978, + "grad_norm": 2.025004487176686, + "learning_rate": 1.955721971414326e-07, + "loss": 0.558428943157196, + "step": 5337 + }, + { + "epoch": 1.881057268722467, + "grad_norm": 1.973943138705469, + "learning_rate": 1.9442720126752968e-07, + "loss": 0.5995065569877625, + "step": 5338 + }, + { + "epoch": 1.881409691629956, + "grad_norm": 1.6822565518265986, + "learning_rate": 1.932855340804296e-07, + "loss": 0.5109822750091553, + "step": 5339 + }, + { + "epoch": 1.881762114537445, + "grad_norm": 1.941646392245956, + "learning_rate": 1.921471959676957e-07, + "loss": 0.6695220470428467, + "step": 5340 + }, + { + "epoch": 1.882114537444934, + "grad_norm": 1.8857636319654494, + "learning_rate": 1.9101218731575777e-07, + "loss": 0.6982283592224121, + "step": 5341 + }, + { + "epoch": 1.882466960352423, + "grad_norm": 1.8944501787373655, + "learning_rate": 1.8988050850991314e-07, + "loss": 0.6475410461425781, + "step": 5342 + }, + { + "epoch": 1.8828193832599118, + "grad_norm": 1.7449353446414906, + "learning_rate": 1.8875215993433448e-07, + "loss": 0.57706218957901, + "step": 5343 + }, + { + "epoch": 1.8831718061674008, + "grad_norm": 1.708696671712054, + "learning_rate": 1.8762714197205988e-07, + "loss": 0.5243045091629028, + "step": 5344 + }, + { + "epoch": 1.88352422907489, + "grad_norm": 1.797956034726921, + "learning_rate": 1.865054550049994e-07, + "loss": 0.6208887100219727, + "step": 5345 + }, + { + "epoch": 1.8838766519823789, + "grad_norm": 1.9048581772706628, + "learning_rate": 1.853870994139284e-07, + "loss": 0.5572443008422852, + "step": 5346 + }, + { + "epoch": 1.8842290748898678, + "grad_norm": 1.7939928987370566, + "learning_rate": 1.8427207557849436e-07, + "loss": 0.5673031806945801, + "step": 5347 + }, + { + "epoch": 1.884581497797357, + "grad_norm": 1.6894216214789064, + "learning_rate": 1.8316038387721558e-07, + "loss": 0.5085422992706299, + "step": 5348 + }, + { + "epoch": 1.8849339207048459, + "grad_norm": 1.7455381888238348, + "learning_rate": 1.8205202468747463e-07, + "loss": 0.5480824708938599, + "step": 5349 + }, + { + "epoch": 1.8852863436123348, + "grad_norm": 1.7848642016680003, + "learning_rate": 1.8094699838552387e-07, + "loss": 0.6236293911933899, + "step": 5350 + }, + { + "epoch": 1.8856387665198238, + "grad_norm": 1.7626474829765526, + "learning_rate": 1.798453053464888e-07, + "loss": 0.541741132736206, + "step": 5351 + }, + { + "epoch": 1.8859911894273127, + "grad_norm": 1.7289887528200605, + "learning_rate": 1.7874694594435692e-07, + "loss": 0.5309538245201111, + "step": 5352 + }, + { + "epoch": 1.8863436123348016, + "grad_norm": 1.944311199542912, + "learning_rate": 1.7765192055198888e-07, + "loss": 0.5886228084564209, + "step": 5353 + }, + { + "epoch": 1.8866960352422908, + "grad_norm": 1.6415851491633797, + "learning_rate": 1.7656022954111064e-07, + "loss": 0.6216265559196472, + "step": 5354 + }, + { + "epoch": 1.8870484581497797, + "grad_norm": 1.6922081510439257, + "learning_rate": 1.7547187328231575e-07, + "loss": 0.5393999814987183, + "step": 5355 + }, + { + "epoch": 1.8874008810572689, + "grad_norm": 1.7167987260272457, + "learning_rate": 1.74386852145072e-07, + "loss": 0.583373486995697, + "step": 5356 + }, + { + "epoch": 1.8877533039647578, + "grad_norm": 2.361225928566298, + "learning_rate": 1.73305166497707e-07, + "loss": 0.6403313875198364, + "step": 5357 + }, + { + "epoch": 1.8881057268722468, + "grad_norm": 1.771396849548527, + "learning_rate": 1.7222681670741814e-07, + "loss": 0.5780963897705078, + "step": 5358 + }, + { + "epoch": 1.8884581497797357, + "grad_norm": 1.59802053134679, + "learning_rate": 1.711518031402748e-07, + "loss": 0.6046397686004639, + "step": 5359 + }, + { + "epoch": 1.8888105726872246, + "grad_norm": 1.5504259730519754, + "learning_rate": 1.700801261612084e-07, + "loss": 0.5582219362258911, + "step": 5360 + }, + { + "epoch": 1.8891629955947136, + "grad_norm": 1.962329345083699, + "learning_rate": 1.6901178613402125e-07, + "loss": 0.4880410432815552, + "step": 5361 + }, + { + "epoch": 1.8895154185022025, + "grad_norm": 2.055990524297856, + "learning_rate": 1.6794678342138105e-07, + "loss": 0.7417550086975098, + "step": 5362 + }, + { + "epoch": 1.8898678414096917, + "grad_norm": 1.8316934396355506, + "learning_rate": 1.668851183848219e-07, + "loss": 0.4616948962211609, + "step": 5363 + }, + { + "epoch": 1.8902202643171806, + "grad_norm": 1.6177478399502592, + "learning_rate": 1.658267913847489e-07, + "loss": 0.5595716834068298, + "step": 5364 + }, + { + "epoch": 1.8905726872246698, + "grad_norm": 1.9610306002643032, + "learning_rate": 1.6477180278042793e-07, + "loss": 0.72450852394104, + "step": 5365 + }, + { + "epoch": 1.8909251101321587, + "grad_norm": 1.8036541582694667, + "learning_rate": 1.637201529299959e-07, + "loss": 0.6261592507362366, + "step": 5366 + }, + { + "epoch": 1.8912775330396476, + "grad_norm": 2.1024939179342823, + "learning_rate": 1.6267184219045607e-07, + "loss": 0.5023064613342285, + "step": 5367 + }, + { + "epoch": 1.8916299559471366, + "grad_norm": 1.9210322300280602, + "learning_rate": 1.6162687091767714e-07, + "loss": 0.7113457918167114, + "step": 5368 + }, + { + "epoch": 1.8919823788546255, + "grad_norm": 1.9212954550271457, + "learning_rate": 1.6058523946639426e-07, + "loss": 0.5376787185668945, + "step": 5369 + }, + { + "epoch": 1.8923348017621144, + "grad_norm": 1.86817536856008, + "learning_rate": 1.5954694819020788e-07, + "loss": 0.6523979902267456, + "step": 5370 + }, + { + "epoch": 1.8926872246696034, + "grad_norm": 1.841265437549123, + "learning_rate": 1.5851199744158607e-07, + "loss": 0.6610705852508545, + "step": 5371 + }, + { + "epoch": 1.8930396475770925, + "grad_norm": 2.0967966308369053, + "learning_rate": 1.5748038757186445e-07, + "loss": 0.657126247882843, + "step": 5372 + }, + { + "epoch": 1.8933920704845815, + "grad_norm": 2.3300722251609893, + "learning_rate": 1.5645211893123846e-07, + "loss": 0.7247096300125122, + "step": 5373 + }, + { + "epoch": 1.8937444933920706, + "grad_norm": 1.5063549897958597, + "learning_rate": 1.5542719186877553e-07, + "loss": 0.5392117500305176, + "step": 5374 + }, + { + "epoch": 1.8940969162995596, + "grad_norm": 1.706529406386883, + "learning_rate": 1.5440560673240735e-07, + "loss": 0.5038361549377441, + "step": 5375 + }, + { + "epoch": 1.8944493392070485, + "grad_norm": 1.9403637299706042, + "learning_rate": 1.5338736386892982e-07, + "loss": 0.4768316447734833, + "step": 5376 + }, + { + "epoch": 1.8948017621145374, + "grad_norm": 1.7917263966392405, + "learning_rate": 1.5237246362400316e-07, + "loss": 0.5925793051719666, + "step": 5377 + }, + { + "epoch": 1.8951541850220264, + "grad_norm": 2.029166285154972, + "learning_rate": 1.5136090634215616e-07, + "loss": 0.47840988636016846, + "step": 5378 + }, + { + "epoch": 1.8955066079295153, + "grad_norm": 1.9172034216887006, + "learning_rate": 1.5035269236677974e-07, + "loss": 0.6365169882774353, + "step": 5379 + }, + { + "epoch": 1.8958590308370042, + "grad_norm": 1.789950493711397, + "learning_rate": 1.4934782204013344e-07, + "loss": 0.6287797689437866, + "step": 5380 + }, + { + "epoch": 1.8962114537444934, + "grad_norm": 1.8420293657892082, + "learning_rate": 1.4834629570333548e-07, + "loss": 0.6859137415885925, + "step": 5381 + }, + { + "epoch": 1.8965638766519823, + "grad_norm": 1.9365437650034845, + "learning_rate": 1.4734811369637725e-07, + "loss": 0.5545040369033813, + "step": 5382 + }, + { + "epoch": 1.8969162995594715, + "grad_norm": 1.6857031681916985, + "learning_rate": 1.463532763581077e-07, + "loss": 0.6418923139572144, + "step": 5383 + }, + { + "epoch": 1.8972687224669604, + "grad_norm": 4.115242480246632, + "learning_rate": 1.4536178402624334e-07, + "loss": 0.7618488669395447, + "step": 5384 + }, + { + "epoch": 1.8976211453744494, + "grad_norm": 1.7790399709296727, + "learning_rate": 1.4437363703736718e-07, + "loss": 0.6178286671638489, + "step": 5385 + }, + { + "epoch": 1.8979735682819383, + "grad_norm": 2.33955789440919, + "learning_rate": 1.4338883572692087e-07, + "loss": 0.6800570487976074, + "step": 5386 + }, + { + "epoch": 1.8983259911894272, + "grad_norm": 1.9056441030293936, + "learning_rate": 1.4240738042921588e-07, + "loss": 0.6063584089279175, + "step": 5387 + }, + { + "epoch": 1.8986784140969162, + "grad_norm": 1.857878498727731, + "learning_rate": 1.4142927147742792e-07, + "loss": 0.5631873607635498, + "step": 5388 + }, + { + "epoch": 1.8990308370044053, + "grad_norm": 1.6999145603505723, + "learning_rate": 1.4045450920358917e-07, + "loss": 0.5346484184265137, + "step": 5389 + }, + { + "epoch": 1.8993832599118943, + "grad_norm": 1.660876208730021, + "learning_rate": 1.3948309393860605e-07, + "loss": 0.5043535232543945, + "step": 5390 + }, + { + "epoch": 1.8997356828193832, + "grad_norm": 1.9091498065078292, + "learning_rate": 1.3851502601224032e-07, + "loss": 0.6591805219650269, + "step": 5391 + }, + { + "epoch": 1.9000881057268724, + "grad_norm": 1.777554153966534, + "learning_rate": 1.3755030575312355e-07, + "loss": 0.6831244826316833, + "step": 5392 + }, + { + "epoch": 1.9004405286343613, + "grad_norm": 1.744983267268657, + "learning_rate": 1.3658893348874714e-07, + "loss": 0.6572617292404175, + "step": 5393 + }, + { + "epoch": 1.9007929515418502, + "grad_norm": 2.007956379457216, + "learning_rate": 1.3563090954546555e-07, + "loss": 0.5834530591964722, + "step": 5394 + }, + { + "epoch": 1.9011453744493392, + "grad_norm": 1.8405418946212868, + "learning_rate": 1.3467623424850084e-07, + "loss": 0.5810972452163696, + "step": 5395 + }, + { + "epoch": 1.9014977973568281, + "grad_norm": 1.8342670520255937, + "learning_rate": 1.3372490792193493e-07, + "loss": 0.6338596940040588, + "step": 5396 + }, + { + "epoch": 1.901850220264317, + "grad_norm": 2.4739742581402946, + "learning_rate": 1.327769308887117e-07, + "loss": 0.5274045467376709, + "step": 5397 + }, + { + "epoch": 1.9022026431718062, + "grad_norm": 2.13415646905843, + "learning_rate": 1.3183230347064147e-07, + "loss": 0.5416278839111328, + "step": 5398 + }, + { + "epoch": 1.9025550660792951, + "grad_norm": 1.8878260396672215, + "learning_rate": 1.3089102598839442e-07, + "loss": 0.4818935692310333, + "step": 5399 + }, + { + "epoch": 1.9029074889867843, + "grad_norm": 1.6383283062285148, + "learning_rate": 1.299530987615072e-07, + "loss": 0.4553770124912262, + "step": 5400 + }, + { + "epoch": 1.9032599118942732, + "grad_norm": 1.7060011862412936, + "learning_rate": 1.2901852210837507e-07, + "loss": 0.5663920640945435, + "step": 5401 + }, + { + "epoch": 1.9036123348017622, + "grad_norm": 1.975611905778012, + "learning_rate": 1.2808729634625872e-07, + "loss": 0.5654638409614563, + "step": 5402 + }, + { + "epoch": 1.903964757709251, + "grad_norm": 2.0012288604540136, + "learning_rate": 1.271594217912797e-07, + "loss": 0.8061939477920532, + "step": 5403 + }, + { + "epoch": 1.90431718061674, + "grad_norm": 2.149695499003911, + "learning_rate": 1.2623489875842276e-07, + "loss": 0.5832188129425049, + "step": 5404 + }, + { + "epoch": 1.904669603524229, + "grad_norm": 1.8966385092802618, + "learning_rate": 1.2531372756153458e-07, + "loss": 0.6112633943557739, + "step": 5405 + }, + { + "epoch": 1.905022026431718, + "grad_norm": 2.3113031929819106, + "learning_rate": 1.2439590851332394e-07, + "loss": 0.7083494663238525, + "step": 5406 + }, + { + "epoch": 1.905374449339207, + "grad_norm": 1.9110441437452201, + "learning_rate": 1.2348144192536272e-07, + "loss": 0.5319055318832397, + "step": 5407 + }, + { + "epoch": 1.905726872246696, + "grad_norm": 1.9724655581165889, + "learning_rate": 1.2257032810808256e-07, + "loss": 0.6199945211410522, + "step": 5408 + }, + { + "epoch": 1.9060792951541852, + "grad_norm": 2.3233890606574503, + "learning_rate": 1.2166256737077942e-07, + "loss": 0.6596004962921143, + "step": 5409 + }, + { + "epoch": 1.906431718061674, + "grad_norm": 1.9040617554840082, + "learning_rate": 1.20758160021609e-07, + "loss": 0.553988516330719, + "step": 5410 + }, + { + "epoch": 1.906784140969163, + "grad_norm": 2.329855084255152, + "learning_rate": 1.1985710636759128e-07, + "loss": 0.6295895576477051, + "step": 5411 + }, + { + "epoch": 1.907136563876652, + "grad_norm": 2.035449496855298, + "learning_rate": 1.1895940671460271e-07, + "loss": 0.6555598378181458, + "step": 5412 + }, + { + "epoch": 1.907488986784141, + "grad_norm": 1.8252966820746244, + "learning_rate": 1.1806506136738616e-07, + "loss": 0.48203831911087036, + "step": 5413 + }, + { + "epoch": 1.9078414096916299, + "grad_norm": 2.0052153848511045, + "learning_rate": 1.1717407062954434e-07, + "loss": 0.6632858514785767, + "step": 5414 + }, + { + "epoch": 1.9081938325991188, + "grad_norm": 1.913108464706502, + "learning_rate": 1.1628643480354085e-07, + "loss": 0.6058870553970337, + "step": 5415 + }, + { + "epoch": 1.908546255506608, + "grad_norm": 1.6689328390033278, + "learning_rate": 1.1540215419070022e-07, + "loss": 0.5106638073921204, + "step": 5416 + }, + { + "epoch": 1.9088986784140969, + "grad_norm": 1.965112171139023, + "learning_rate": 1.1452122909120788e-07, + "loss": 0.6641250848770142, + "step": 5417 + }, + { + "epoch": 1.909251101321586, + "grad_norm": 1.7797017689691026, + "learning_rate": 1.1364365980411019e-07, + "loss": 0.4823518395423889, + "step": 5418 + }, + { + "epoch": 1.909603524229075, + "grad_norm": 1.7374946519813605, + "learning_rate": 1.127694466273166e-07, + "loss": 0.5770869255065918, + "step": 5419 + }, + { + "epoch": 1.909955947136564, + "grad_norm": 1.8439547121423094, + "learning_rate": 1.1189858985759306e-07, + "loss": 0.5120491981506348, + "step": 5420 + }, + { + "epoch": 1.9103083700440529, + "grad_norm": 1.998054444662161, + "learning_rate": 1.1103108979056865e-07, + "loss": 0.6742277145385742, + "step": 5421 + }, + { + "epoch": 1.9106607929515418, + "grad_norm": 1.7361045655014782, + "learning_rate": 1.1016694672073336e-07, + "loss": 0.6053510904312134, + "step": 5422 + }, + { + "epoch": 1.9110132158590307, + "grad_norm": 2.276872906150792, + "learning_rate": 1.0930616094143698e-07, + "loss": 0.5598228573799133, + "step": 5423 + }, + { + "epoch": 1.9113656387665197, + "grad_norm": 1.7689371613585823, + "learning_rate": 1.0844873274488799e-07, + "loss": 0.599521279335022, + "step": 5424 + }, + { + "epoch": 1.9117180616740088, + "grad_norm": 2.270274631303626, + "learning_rate": 1.075946624221591e-07, + "loss": 0.5986596345901489, + "step": 5425 + }, + { + "epoch": 1.9120704845814978, + "grad_norm": 2.0819173495219054, + "learning_rate": 1.067439502631773e-07, + "loss": 0.5657980442047119, + "step": 5426 + }, + { + "epoch": 1.912422907488987, + "grad_norm": 2.498725021517388, + "learning_rate": 1.0589659655673712e-07, + "loss": 0.5561040639877319, + "step": 5427 + }, + { + "epoch": 1.9127753303964758, + "grad_norm": 1.6241033411576455, + "learning_rate": 1.0505260159048513e-07, + "loss": 0.5088320970535278, + "step": 5428 + }, + { + "epoch": 1.9131277533039648, + "grad_norm": 2.1207031706665407, + "learning_rate": 1.0421196565093217e-07, + "loss": 0.5679075717926025, + "step": 5429 + }, + { + "epoch": 1.9134801762114537, + "grad_norm": 1.8775486377310404, + "learning_rate": 1.0337468902344994e-07, + "loss": 0.6701461672782898, + "step": 5430 + }, + { + "epoch": 1.9138325991189427, + "grad_norm": 1.7839638341554918, + "learning_rate": 1.0254077199226553e-07, + "loss": 0.6172112822532654, + "step": 5431 + }, + { + "epoch": 1.9141850220264316, + "grad_norm": 1.904067212081221, + "learning_rate": 1.0171021484046806e-07, + "loss": 0.5926263332366943, + "step": 5432 + }, + { + "epoch": 1.9145374449339208, + "grad_norm": 1.7190787727179386, + "learning_rate": 1.0088301785000754e-07, + "loss": 0.6142431497573853, + "step": 5433 + }, + { + "epoch": 1.9148898678414097, + "grad_norm": 1.7095738070807496, + "learning_rate": 1.0005918130168934e-07, + "loss": 0.5367780923843384, + "step": 5434 + }, + { + "epoch": 1.9152422907488986, + "grad_norm": 1.8769142431022592, + "learning_rate": 9.923870547518311e-08, + "loss": 0.5241641998291016, + "step": 5435 + }, + { + "epoch": 1.9155947136563878, + "grad_norm": 1.7765958549274539, + "learning_rate": 9.842159064901157e-08, + "loss": 0.5906308889389038, + "step": 5436 + }, + { + "epoch": 1.9159471365638767, + "grad_norm": 2.1275572555046613, + "learning_rate": 9.760783710056176e-08, + "loss": 0.5411181449890137, + "step": 5437 + }, + { + "epoch": 1.9162995594713657, + "grad_norm": 1.9001328464490854, + "learning_rate": 9.679744510607825e-08, + "loss": 0.6313618421554565, + "step": 5438 + }, + { + "epoch": 1.9166519823788546, + "grad_norm": 2.0658646856716336, + "learning_rate": 9.599041494066208e-08, + "loss": 0.6330033540725708, + "step": 5439 + }, + { + "epoch": 1.9170044052863435, + "grad_norm": 1.9617429681187768, + "learning_rate": 9.518674687827634e-08, + "loss": 0.5859507322311401, + "step": 5440 + }, + { + "epoch": 1.9173568281938325, + "grad_norm": 1.9233196169731877, + "learning_rate": 9.438644119174057e-08, + "loss": 0.571119487285614, + "step": 5441 + }, + { + "epoch": 1.9177092511013216, + "grad_norm": 1.683294616332208, + "learning_rate": 9.3589498152733e-08, + "loss": 0.6114518046379089, + "step": 5442 + }, + { + "epoch": 1.9180616740088106, + "grad_norm": 2.0948221060814407, + "learning_rate": 9.279591803179277e-08, + "loss": 0.5762027502059937, + "step": 5443 + }, + { + "epoch": 1.9184140969162997, + "grad_norm": 1.973540736612678, + "learning_rate": 9.200570109831441e-08, + "loss": 0.6081440448760986, + "step": 5444 + }, + { + "epoch": 1.9187665198237887, + "grad_norm": 1.9242540837021294, + "learning_rate": 9.121884762055222e-08, + "loss": 0.5682440996170044, + "step": 5445 + }, + { + "epoch": 1.9191189427312776, + "grad_norm": 1.642224199268087, + "learning_rate": 9.043535786561919e-08, + "loss": 0.5290100574493408, + "step": 5446 + }, + { + "epoch": 1.9194713656387665, + "grad_norm": 1.8013641871034827, + "learning_rate": 8.965523209948367e-08, + "loss": 0.5743255019187927, + "step": 5447 + }, + { + "epoch": 1.9198237885462555, + "grad_norm": 1.6357977481393366, + "learning_rate": 8.887847058697718e-08, + "loss": 0.5955618023872375, + "step": 5448 + }, + { + "epoch": 1.9201762114537444, + "grad_norm": 1.9706217525454803, + "learning_rate": 8.810507359178322e-08, + "loss": 0.4732915759086609, + "step": 5449 + }, + { + "epoch": 1.9205286343612333, + "grad_norm": 3.2730228664607797, + "learning_rate": 8.733504137644621e-08, + "loss": 0.6712108850479126, + "step": 5450 + }, + { + "epoch": 1.9208810572687225, + "grad_norm": 1.997966446518774, + "learning_rate": 8.656837420237152e-08, + "loss": 0.5169811248779297, + "step": 5451 + }, + { + "epoch": 1.9212334801762114, + "grad_norm": 1.9146732631772796, + "learning_rate": 8.580507232981428e-08, + "loss": 0.6117082238197327, + "step": 5452 + }, + { + "epoch": 1.9215859030837006, + "grad_norm": 1.7690878518096709, + "learning_rate": 8.504513601789388e-08, + "loss": 0.7020283937454224, + "step": 5453 + }, + { + "epoch": 1.9219383259911895, + "grad_norm": 1.806111695783304, + "learning_rate": 8.42885655245862e-08, + "loss": 0.5489979386329651, + "step": 5454 + }, + { + "epoch": 1.9222907488986785, + "grad_norm": 1.8218906131330599, + "learning_rate": 8.353536110672133e-08, + "loss": 0.5361644625663757, + "step": 5455 + }, + { + "epoch": 1.9226431718061674, + "grad_norm": 1.8728336665856926, + "learning_rate": 8.278552301998921e-08, + "loss": 0.6470010280609131, + "step": 5456 + }, + { + "epoch": 1.9229955947136563, + "grad_norm": 1.5338046694887773, + "learning_rate": 8.203905151893731e-08, + "loss": 0.4642202854156494, + "step": 5457 + }, + { + "epoch": 1.9233480176211453, + "grad_norm": 2.1878989180883357, + "learning_rate": 8.129594685696852e-08, + "loss": 0.6817516088485718, + "step": 5458 + }, + { + "epoch": 1.9237004405286342, + "grad_norm": 1.7544221338170298, + "learning_rate": 8.055620928634433e-08, + "loss": 0.5748617649078369, + "step": 5459 + }, + { + "epoch": 1.9240528634361234, + "grad_norm": 1.9928156109239001, + "learning_rate": 7.981983905818281e-08, + "loss": 0.6730939149856567, + "step": 5460 + }, + { + "epoch": 1.9244052863436123, + "grad_norm": 1.665760800669473, + "learning_rate": 7.90868364224584e-08, + "loss": 0.46469685435295105, + "step": 5461 + }, + { + "epoch": 1.9247577092511015, + "grad_norm": 2.0844638903136907, + "learning_rate": 7.835720162800209e-08, + "loss": 0.5633926391601562, + "step": 5462 + }, + { + "epoch": 1.9251101321585904, + "grad_norm": 2.034693536740542, + "learning_rate": 7.76309349225035e-08, + "loss": 0.5813394784927368, + "step": 5463 + }, + { + "epoch": 1.9254625550660793, + "grad_norm": 1.4118750743542163, + "learning_rate": 7.690803655250656e-08, + "loss": 0.39959418773651123, + "step": 5464 + }, + { + "epoch": 1.9258149779735683, + "grad_norm": 1.7685280750016403, + "learning_rate": 7.618850676341383e-08, + "loss": 0.6136372089385986, + "step": 5465 + }, + { + "epoch": 1.9261674008810572, + "grad_norm": 1.7393751984149959, + "learning_rate": 7.547234579948104e-08, + "loss": 0.6664354801177979, + "step": 5466 + }, + { + "epoch": 1.9265198237885461, + "grad_norm": 1.8827898065352628, + "learning_rate": 7.475955390382483e-08, + "loss": 0.6009566783905029, + "step": 5467 + }, + { + "epoch": 1.9268722466960353, + "grad_norm": 1.7872694267120686, + "learning_rate": 7.405013131841499e-08, + "loss": 0.7307299375534058, + "step": 5468 + }, + { + "epoch": 1.9272246696035242, + "grad_norm": 1.8234703336391604, + "learning_rate": 7.334407828407885e-08, + "loss": 0.5459531545639038, + "step": 5469 + }, + { + "epoch": 1.9275770925110132, + "grad_norm": 2.1252744976115583, + "learning_rate": 7.264139504049916e-08, + "loss": 0.6230820417404175, + "step": 5470 + }, + { + "epoch": 1.9279295154185023, + "grad_norm": 1.6781926619362313, + "learning_rate": 7.194208182621509e-08, + "loss": 0.5282379984855652, + "step": 5471 + }, + { + "epoch": 1.9282819383259913, + "grad_norm": 2.1980396503246604, + "learning_rate": 7.12461388786212e-08, + "loss": 0.626023530960083, + "step": 5472 + }, + { + "epoch": 1.9286343612334802, + "grad_norm": 2.1608211937841197, + "learning_rate": 7.055356643396849e-08, + "loss": 0.6897492408752441, + "step": 5473 + }, + { + "epoch": 1.9289867841409691, + "grad_norm": 1.7214187213722456, + "learning_rate": 6.986436472736447e-08, + "loss": 0.583849310874939, + "step": 5474 + }, + { + "epoch": 1.929339207048458, + "grad_norm": 1.7492909983006562, + "learning_rate": 6.917853399277197e-08, + "loss": 0.6056735515594482, + "step": 5475 + }, + { + "epoch": 1.929691629955947, + "grad_norm": 1.8166317563571888, + "learning_rate": 6.849607446300699e-08, + "loss": 0.52838134765625, + "step": 5476 + }, + { + "epoch": 1.9300440528634362, + "grad_norm": 2.0425025849187954, + "learning_rate": 6.781698636974532e-08, + "loss": 0.6466653943061829, + "step": 5477 + }, + { + "epoch": 1.930396475770925, + "grad_norm": 1.9593462888477349, + "learning_rate": 6.714126994351589e-08, + "loss": 0.6570286750793457, + "step": 5478 + }, + { + "epoch": 1.9307488986784143, + "grad_norm": 2.4867358577799576, + "learning_rate": 6.646892541370409e-08, + "loss": 0.7303042411804199, + "step": 5479 + }, + { + "epoch": 1.9311013215859032, + "grad_norm": 1.7938376915708092, + "learning_rate": 6.579995300854846e-08, + "loss": 0.5556488037109375, + "step": 5480 + }, + { + "epoch": 1.9314537444933921, + "grad_norm": 1.9624740523274589, + "learning_rate": 6.513435295514404e-08, + "loss": 0.6673456430435181, + "step": 5481 + }, + { + "epoch": 1.931806167400881, + "grad_norm": 1.9681067241776358, + "learning_rate": 6.447212547944448e-08, + "loss": 0.5605199337005615, + "step": 5482 + }, + { + "epoch": 1.93215859030837, + "grad_norm": 2.1935053480556785, + "learning_rate": 6.381327080625111e-08, + "loss": 0.5455278158187866, + "step": 5483 + }, + { + "epoch": 1.932511013215859, + "grad_norm": 1.8919678372461928, + "learning_rate": 6.315778915922722e-08, + "loss": 0.5371166467666626, + "step": 5484 + }, + { + "epoch": 1.9328634361233479, + "grad_norm": 1.9114985069981878, + "learning_rate": 6.250568076088814e-08, + "loss": 0.5873486399650574, + "step": 5485 + }, + { + "epoch": 1.933215859030837, + "grad_norm": 1.706006640351556, + "learning_rate": 6.18569458326046e-08, + "loss": 0.4187420606613159, + "step": 5486 + }, + { + "epoch": 1.933568281938326, + "grad_norm": 1.900919435061996, + "learning_rate": 6.121158459460042e-08, + "loss": 0.6006373167037964, + "step": 5487 + }, + { + "epoch": 1.9339207048458151, + "grad_norm": 1.819026585986156, + "learning_rate": 6.056959726595702e-08, + "loss": 0.6022043228149414, + "step": 5488 + }, + { + "epoch": 1.934273127753304, + "grad_norm": 2.037720704211898, + "learning_rate": 5.993098406460895e-08, + "loss": 0.6324778199195862, + "step": 5489 + }, + { + "epoch": 1.934625550660793, + "grad_norm": 2.0263189254585026, + "learning_rate": 5.929574520734505e-08, + "loss": 0.545529305934906, + "step": 5490 + }, + { + "epoch": 1.934977973568282, + "grad_norm": 1.9957592171950855, + "learning_rate": 5.8663880909809454e-08, + "loss": 0.623627781867981, + "step": 5491 + }, + { + "epoch": 1.9353303964757709, + "grad_norm": 1.9773130682504432, + "learning_rate": 5.80353913865006e-08, + "loss": 0.529983639717102, + "step": 5492 + }, + { + "epoch": 1.9356828193832598, + "grad_norm": 1.8301905692374867, + "learning_rate": 5.7410276850770055e-08, + "loss": 0.638504147529602, + "step": 5493 + }, + { + "epoch": 1.9360352422907487, + "grad_norm": 1.7706026455559263, + "learning_rate": 5.678853751482694e-08, + "loss": 0.6822696924209595, + "step": 5494 + }, + { + "epoch": 1.936387665198238, + "grad_norm": 1.6924491917110376, + "learning_rate": 5.6170173589730204e-08, + "loss": 0.5454750061035156, + "step": 5495 + }, + { + "epoch": 1.9367400881057268, + "grad_norm": 2.1428203564618915, + "learning_rate": 5.555518528539638e-08, + "loss": 0.5301260948181152, + "step": 5496 + }, + { + "epoch": 1.937092511013216, + "grad_norm": 1.965552985899495, + "learning_rate": 5.4943572810594035e-08, + "loss": 0.697251558303833, + "step": 5497 + }, + { + "epoch": 1.937444933920705, + "grad_norm": 1.8589631146352448, + "learning_rate": 5.433533637294819e-08, + "loss": 0.5171586871147156, + "step": 5498 + }, + { + "epoch": 1.9377973568281939, + "grad_norm": 1.974708525019113, + "learning_rate": 5.373047617893479e-08, + "loss": 0.6006083488464355, + "step": 5499 + }, + { + "epoch": 1.9381497797356828, + "grad_norm": 1.8914658578007237, + "learning_rate": 5.312899243388403e-08, + "loss": 0.6083849668502808, + "step": 5500 + }, + { + "epoch": 1.9385022026431717, + "grad_norm": 2.189863186886587, + "learning_rate": 5.2530885341982586e-08, + "loss": 0.6572569608688354, + "step": 5501 + }, + { + "epoch": 1.9388546255506607, + "grad_norm": 1.9316409138269541, + "learning_rate": 5.1936155106269146e-08, + "loss": 0.497112512588501, + "step": 5502 + }, + { + "epoch": 1.9392070484581496, + "grad_norm": 1.9380736027791932, + "learning_rate": 5.1344801928636664e-08, + "loss": 0.5804885625839233, + "step": 5503 + }, + { + "epoch": 1.9395594713656388, + "grad_norm": 2.415405306864913, + "learning_rate": 5.075682600982901e-08, + "loss": 0.6225712299346924, + "step": 5504 + }, + { + "epoch": 1.9399118942731277, + "grad_norm": 1.896345547525062, + "learning_rate": 5.017222754944651e-08, + "loss": 0.6100028157234192, + "step": 5505 + }, + { + "epoch": 1.9402643171806169, + "grad_norm": 1.47523556471349, + "learning_rate": 4.959100674594486e-08, + "loss": 0.549712061882019, + "step": 5506 + }, + { + "epoch": 1.9406167400881058, + "grad_norm": 1.4736978929928604, + "learning_rate": 4.901316379662624e-08, + "loss": 0.5327162146568298, + "step": 5507 + }, + { + "epoch": 1.9409691629955947, + "grad_norm": 2.3670974688739697, + "learning_rate": 4.8438698897652626e-08, + "loss": 0.7408417463302612, + "step": 5508 + }, + { + "epoch": 1.9413215859030837, + "grad_norm": 1.8644826998816841, + "learning_rate": 4.7867612244036906e-08, + "loss": 0.6126288175582886, + "step": 5509 + }, + { + "epoch": 1.9416740088105726, + "grad_norm": 1.9600730866036664, + "learning_rate": 4.729990402964402e-08, + "loss": 0.542537271976471, + "step": 5510 + }, + { + "epoch": 1.9420264317180616, + "grad_norm": 1.9121979922913575, + "learning_rate": 4.6735574447195345e-08, + "loss": 0.5429843664169312, + "step": 5511 + }, + { + "epoch": 1.9423788546255507, + "grad_norm": 1.8002113296979507, + "learning_rate": 4.617462368826098e-08, + "loss": 0.6103960275650024, + "step": 5512 + }, + { + "epoch": 1.9427312775330396, + "grad_norm": 1.7389238607151303, + "learning_rate": 4.561705194326749e-08, + "loss": 0.43702462315559387, + "step": 5513 + }, + { + "epoch": 1.9430837004405286, + "grad_norm": 1.7641081174281446, + "learning_rate": 4.506285940149457e-08, + "loss": 0.5313314199447632, + "step": 5514 + }, + { + "epoch": 1.9434361233480177, + "grad_norm": 1.7069377243686814, + "learning_rate": 4.451204625107064e-08, + "loss": 0.568792462348938, + "step": 5515 + }, + { + "epoch": 1.9437885462555067, + "grad_norm": 2.1007223606906185, + "learning_rate": 4.3964612678979446e-08, + "loss": 0.6055475473403931, + "step": 5516 + }, + { + "epoch": 1.9441409691629956, + "grad_norm": 1.9436769148628141, + "learning_rate": 4.3420558871060116e-08, + "loss": 0.6203786730766296, + "step": 5517 + }, + { + "epoch": 1.9444933920704845, + "grad_norm": 1.788437156743959, + "learning_rate": 4.287988501200047e-08, + "loss": 0.5914345979690552, + "step": 5518 + }, + { + "epoch": 1.9448458149779735, + "grad_norm": 1.8745063002086186, + "learning_rate": 4.2342591285343684e-08, + "loss": 0.5650739669799805, + "step": 5519 + }, + { + "epoch": 1.9451982378854624, + "grad_norm": 1.4561818985326163, + "learning_rate": 4.180867787348164e-08, + "loss": 0.5589660406112671, + "step": 5520 + }, + { + "epoch": 1.9455506607929516, + "grad_norm": 1.9465775114906616, + "learning_rate": 4.12781449576638e-08, + "loss": 0.5683336853981018, + "step": 5521 + }, + { + "epoch": 1.9459030837004405, + "grad_norm": 1.7869041316521455, + "learning_rate": 4.075099271798943e-08, + "loss": 0.5388365983963013, + "step": 5522 + }, + { + "epoch": 1.9462555066079297, + "grad_norm": 2.3465100615160757, + "learning_rate": 4.0227221333408726e-08, + "loss": 0.575006365776062, + "step": 5523 + }, + { + "epoch": 1.9466079295154186, + "grad_norm": 1.6872132733494793, + "learning_rate": 3.970683098172723e-08, + "loss": 0.49638503789901733, + "step": 5524 + }, + { + "epoch": 1.9469603524229075, + "grad_norm": 2.095719754969683, + "learning_rate": 3.9189821839600294e-08, + "loss": 0.6484041213989258, + "step": 5525 + }, + { + "epoch": 1.9473127753303965, + "grad_norm": 1.7587272240429226, + "learning_rate": 3.8676194082537535e-08, + "loss": 0.5522493124008179, + "step": 5526 + }, + { + "epoch": 1.9476651982378854, + "grad_norm": 1.8834504959770908, + "learning_rate": 3.8165947884898356e-08, + "loss": 0.5875294208526611, + "step": 5527 + }, + { + "epoch": 1.9480176211453744, + "grad_norm": 1.8990167388470667, + "learning_rate": 3.765908341989644e-08, + "loss": 0.5725122690200806, + "step": 5528 + }, + { + "epoch": 1.9483700440528633, + "grad_norm": 1.7744908913216453, + "learning_rate": 3.7155600859595243e-08, + "loss": 0.5198935866355896, + "step": 5529 + }, + { + "epoch": 1.9487224669603525, + "grad_norm": 1.8236927705658619, + "learning_rate": 3.665550037491361e-08, + "loss": 0.6396631598472595, + "step": 5530 + }, + { + "epoch": 1.9490748898678414, + "grad_norm": 1.8879612013695581, + "learning_rate": 3.6158782135617965e-08, + "loss": 0.666089653968811, + "step": 5531 + }, + { + "epoch": 1.9494273127753305, + "grad_norm": 1.9912413735248546, + "learning_rate": 3.5665446310330087e-08, + "loss": 0.6818836331367493, + "step": 5532 + }, + { + "epoch": 1.9497797356828195, + "grad_norm": 2.04266783813749, + "learning_rate": 3.517549306652157e-08, + "loss": 0.533860981464386, + "step": 5533 + }, + { + "epoch": 1.9501321585903084, + "grad_norm": 2.011493253926506, + "learning_rate": 3.468892257051493e-08, + "loss": 0.6174973249435425, + "step": 5534 + }, + { + "epoch": 1.9504845814977974, + "grad_norm": 2.07102768257305, + "learning_rate": 3.4205734987488027e-08, + "loss": 0.6010403037071228, + "step": 5535 + }, + { + "epoch": 1.9508370044052863, + "grad_norm": 1.8654722728182422, + "learning_rate": 3.372593048146744e-08, + "loss": 0.6475502252578735, + "step": 5536 + }, + { + "epoch": 1.9511894273127752, + "grad_norm": 2.080853183455891, + "learning_rate": 3.3249509215330653e-08, + "loss": 0.5625165700912476, + "step": 5537 + }, + { + "epoch": 1.9515418502202642, + "grad_norm": 2.0303262611818336, + "learning_rate": 3.277647135080941e-08, + "loss": 0.6504719257354736, + "step": 5538 + }, + { + "epoch": 1.9518942731277533, + "grad_norm": 1.7964243534988884, + "learning_rate": 3.230681704848415e-08, + "loss": 0.6217454671859741, + "step": 5539 + }, + { + "epoch": 1.9522466960352423, + "grad_norm": 1.975881803401868, + "learning_rate": 3.1840546467788445e-08, + "loss": 0.5804678201675415, + "step": 5540 + }, + { + "epoch": 1.9525991189427314, + "grad_norm": 1.7644690968017507, + "learning_rate": 3.1377659767006795e-08, + "loss": 0.6133759617805481, + "step": 5541 + }, + { + "epoch": 1.9529515418502204, + "grad_norm": 1.736020484111057, + "learning_rate": 3.0918157103273506e-08, + "loss": 0.508539080619812, + "step": 5542 + }, + { + "epoch": 1.9533039647577093, + "grad_norm": 2.115379893074018, + "learning_rate": 3.0462038632577126e-08, + "loss": 0.5682996511459351, + "step": 5543 + }, + { + "epoch": 1.9536563876651982, + "grad_norm": 2.0360556708735276, + "learning_rate": 3.000930450975603e-08, + "loss": 0.7072808742523193, + "step": 5544 + }, + { + "epoch": 1.9540088105726872, + "grad_norm": 2.092981328238059, + "learning_rate": 2.9559954888497278e-08, + "loss": 0.5948976278305054, + "step": 5545 + }, + { + "epoch": 1.954361233480176, + "grad_norm": 1.827038503098094, + "learning_rate": 2.911398992134218e-08, + "loss": 0.5111032128334045, + "step": 5546 + }, + { + "epoch": 1.954713656387665, + "grad_norm": 1.8278152391313893, + "learning_rate": 2.8671409759681858e-08, + "loss": 0.553802490234375, + "step": 5547 + }, + { + "epoch": 1.9550660792951542, + "grad_norm": 1.685843539181356, + "learning_rate": 2.8232214553759462e-08, + "loss": 0.5091711282730103, + "step": 5548 + }, + { + "epoch": 1.9554185022026431, + "grad_norm": 1.4871983076237012, + "learning_rate": 2.7796404452666847e-08, + "loss": 0.47025251388549805, + "step": 5549 + }, + { + "epoch": 1.9557709251101323, + "grad_norm": 2.031516899140332, + "learning_rate": 2.7363979604349e-08, + "loss": 0.6174348592758179, + "step": 5550 + }, + { + "epoch": 1.9561233480176212, + "grad_norm": 1.8902471541583934, + "learning_rate": 2.69349401555985e-08, + "loss": 0.5516685247421265, + "step": 5551 + }, + { + "epoch": 1.9564757709251102, + "grad_norm": 2.1329834880360563, + "learning_rate": 2.6509286252063282e-08, + "loss": 0.6272131204605103, + "step": 5552 + }, + { + "epoch": 1.956828193832599, + "grad_norm": 1.796045915873636, + "learning_rate": 2.6087018038239987e-08, + "loss": 0.5913189649581909, + "step": 5553 + }, + { + "epoch": 1.957180616740088, + "grad_norm": 1.8863252927172953, + "learning_rate": 2.5668135657472835e-08, + "loss": 0.6802668571472168, + "step": 5554 + }, + { + "epoch": 1.957533039647577, + "grad_norm": 1.9442650959080303, + "learning_rate": 2.525263925196142e-08, + "loss": 0.5829865336418152, + "step": 5555 + }, + { + "epoch": 1.9578854625550661, + "grad_norm": 2.0474932427098627, + "learning_rate": 2.4840528962752907e-08, + "loss": 0.6400870680809021, + "step": 5556 + }, + { + "epoch": 1.958237885462555, + "grad_norm": 2.043080792800152, + "learning_rate": 2.4431804929746506e-08, + "loss": 0.48432302474975586, + "step": 5557 + }, + { + "epoch": 1.958590308370044, + "grad_norm": 1.9639599818265998, + "learning_rate": 2.4026467291691223e-08, + "loss": 0.5494402647018433, + "step": 5558 + }, + { + "epoch": 1.9589427312775332, + "grad_norm": 1.800709765694371, + "learning_rate": 2.3624516186186996e-08, + "loss": 0.5393223762512207, + "step": 5559 + }, + { + "epoch": 1.959295154185022, + "grad_norm": 1.868096905678952, + "learning_rate": 2.322595174968245e-08, + "loss": 0.5500867962837219, + "step": 5560 + }, + { + "epoch": 1.959647577092511, + "grad_norm": 1.986290631971783, + "learning_rate": 2.283077411747825e-08, + "loss": 0.5618818998336792, + "step": 5561 + }, + { + "epoch": 1.96, + "grad_norm": 2.0174876429391526, + "learning_rate": 2.243898342372597e-08, + "loss": 0.5681769251823425, + "step": 5562 + }, + { + "epoch": 1.960352422907489, + "grad_norm": 2.160298007931608, + "learning_rate": 2.2050579801424777e-08, + "loss": 0.8009706139564514, + "step": 5563 + }, + { + "epoch": 1.9607048458149778, + "grad_norm": 2.2076681264311517, + "learning_rate": 2.1665563382426978e-08, + "loss": 0.5609455704689026, + "step": 5564 + }, + { + "epoch": 1.961057268722467, + "grad_norm": 1.6584397285315808, + "learning_rate": 2.1283934297432472e-08, + "loss": 0.5615163445472717, + "step": 5565 + }, + { + "epoch": 1.961409691629956, + "grad_norm": 2.4819954064616265, + "learning_rate": 2.0905692675993182e-08, + "loss": 0.4442581832408905, + "step": 5566 + }, + { + "epoch": 1.961762114537445, + "grad_norm": 2.0037139303731344, + "learning_rate": 2.0530838646510842e-08, + "loss": 0.6557266116142273, + "step": 5567 + }, + { + "epoch": 1.962114537444934, + "grad_norm": 1.851215643338071, + "learning_rate": 2.0159372336235884e-08, + "loss": 0.5911799669265747, + "step": 5568 + }, + { + "epoch": 1.962466960352423, + "grad_norm": 2.0920087166052057, + "learning_rate": 1.9791293871269656e-08, + "loss": 0.5480202436447144, + "step": 5569 + }, + { + "epoch": 1.962819383259912, + "grad_norm": 2.0350633249337795, + "learning_rate": 1.9426603376563325e-08, + "loss": 0.6489467620849609, + "step": 5570 + }, + { + "epoch": 1.9631718061674008, + "grad_norm": 1.8480180634522771, + "learning_rate": 1.9065300975917856e-08, + "loss": 0.4699944853782654, + "step": 5571 + }, + { + "epoch": 1.9635242290748898, + "grad_norm": 1.8923901172350763, + "learning_rate": 1.8707386791985137e-08, + "loss": 0.6684885025024414, + "step": 5572 + }, + { + "epoch": 1.9638766519823787, + "grad_norm": 2.2169126358939413, + "learning_rate": 1.835286094626576e-08, + "loss": 0.5847122073173523, + "step": 5573 + }, + { + "epoch": 1.9642290748898679, + "grad_norm": 1.801041360244202, + "learning_rate": 1.8001723559109007e-08, + "loss": 0.5427859425544739, + "step": 5574 + }, + { + "epoch": 1.9645814977973568, + "grad_norm": 2.032431019918, + "learning_rate": 1.7653974749715087e-08, + "loss": 0.6545590758323669, + "step": 5575 + }, + { + "epoch": 1.964933920704846, + "grad_norm": 1.785624619961358, + "learning_rate": 1.730961463613512e-08, + "loss": 0.6369475722312927, + "step": 5576 + }, + { + "epoch": 1.965286343612335, + "grad_norm": 1.989892215094852, + "learning_rate": 1.696864333526893e-08, + "loss": 0.5165325403213501, + "step": 5577 + }, + { + "epoch": 1.9656387665198238, + "grad_norm": 2.147184198038496, + "learning_rate": 1.6631060962863933e-08, + "loss": 0.5651812553405762, + "step": 5578 + }, + { + "epoch": 1.9659911894273128, + "grad_norm": 1.6839108762220567, + "learning_rate": 1.6296867633519563e-08, + "loss": 0.5249905586242676, + "step": 5579 + }, + { + "epoch": 1.9663436123348017, + "grad_norm": 1.8723453129570697, + "learning_rate": 1.5966063460683967e-08, + "loss": 0.6748663783073425, + "step": 5580 + }, + { + "epoch": 1.9666960352422906, + "grad_norm": 1.654472064493344, + "learning_rate": 1.5638648556656198e-08, + "loss": 0.5276468992233276, + "step": 5581 + }, + { + "epoch": 1.9670484581497796, + "grad_norm": 1.7910399914217132, + "learning_rate": 1.5314623032581798e-08, + "loss": 0.5778729319572449, + "step": 5582 + }, + { + "epoch": 1.9674008810572687, + "grad_norm": 1.8564203677999862, + "learning_rate": 1.4993986998457223e-08, + "loss": 0.5805479288101196, + "step": 5583 + }, + { + "epoch": 1.9677533039647577, + "grad_norm": 1.9817945876697571, + "learning_rate": 1.4676740563129843e-08, + "loss": 0.6213263273239136, + "step": 5584 + }, + { + "epoch": 1.9681057268722468, + "grad_norm": 1.8037978918771924, + "learning_rate": 1.4362883834294627e-08, + "loss": 0.5081031322479248, + "step": 5585 + }, + { + "epoch": 1.9684581497797358, + "grad_norm": 1.71465121106617, + "learning_rate": 1.4052416918495237e-08, + "loss": 0.5605350136756897, + "step": 5586 + }, + { + "epoch": 1.9688105726872247, + "grad_norm": 2.1182297496689877, + "learning_rate": 1.3745339921126255e-08, + "loss": 0.701635479927063, + "step": 5587 + }, + { + "epoch": 1.9691629955947136, + "grad_norm": 4.768978361346767, + "learning_rate": 1.344165294642985e-08, + "loss": 0.5537668466567993, + "step": 5588 + }, + { + "epoch": 1.9695154185022026, + "grad_norm": 1.9636754875619487, + "learning_rate": 1.3141356097500225e-08, + "loss": 0.6395033597946167, + "step": 5589 + }, + { + "epoch": 1.9698678414096915, + "grad_norm": 2.0129419054377355, + "learning_rate": 1.2844449476276943e-08, + "loss": 0.549985408782959, + "step": 5590 + }, + { + "epoch": 1.9702202643171807, + "grad_norm": 1.5684457658919975, + "learning_rate": 1.2550933183550496e-08, + "loss": 0.4503220021724701, + "step": 5591 + }, + { + "epoch": 1.9705726872246696, + "grad_norm": 1.842567825609057, + "learning_rate": 1.2260807318962286e-08, + "loss": 0.6369946599006653, + "step": 5592 + }, + { + "epoch": 1.9709251101321585, + "grad_norm": 1.9389149649481725, + "learning_rate": 1.197407198099909e-08, + "loss": 0.547295093536377, + "step": 5593 + }, + { + "epoch": 1.9712775330396477, + "grad_norm": 1.9379429852476115, + "learning_rate": 1.1690727267000823e-08, + "loss": 0.578770101070404, + "step": 5594 + }, + { + "epoch": 1.9716299559471366, + "grad_norm": 1.7979041690440398, + "learning_rate": 1.1410773273151654e-08, + "loss": 0.5992920398712158, + "step": 5595 + }, + { + "epoch": 1.9719823788546256, + "grad_norm": 2.0358089708846503, + "learning_rate": 1.1134210094488896e-08, + "loss": 0.5912446975708008, + "step": 5596 + }, + { + "epoch": 1.9723348017621145, + "grad_norm": 1.9956728807231137, + "learning_rate": 1.0861037824896337e-08, + "loss": 0.6539223194122314, + "step": 5597 + }, + { + "epoch": 1.9726872246696034, + "grad_norm": 1.6995757910859364, + "learning_rate": 1.0591256557108686e-08, + "loss": 0.6487923860549927, + "step": 5598 + }, + { + "epoch": 1.9730396475770924, + "grad_norm": 2.0265831695223384, + "learning_rate": 1.0324866382707133e-08, + "loss": 0.7950254678726196, + "step": 5599 + }, + { + "epoch": 1.9733920704845815, + "grad_norm": 1.7028165277673737, + "learning_rate": 1.006186739212267e-08, + "loss": 0.4941173195838928, + "step": 5600 + }, + { + "epoch": 1.9737444933920705, + "grad_norm": 1.8542643380709567, + "learning_rate": 9.802259674637215e-09, + "loss": 0.6733928322792053, + "step": 5601 + }, + { + "epoch": 1.9740969162995596, + "grad_norm": 1.7591584352828642, + "learning_rate": 9.546043318376941e-09, + "loss": 0.5084437131881714, + "step": 5602 + }, + { + "epoch": 1.9744493392070486, + "grad_norm": 1.7908335232844454, + "learning_rate": 9.293218410320049e-09, + "loss": 0.4499536156654358, + "step": 5603 + }, + { + "epoch": 1.9748017621145375, + "grad_norm": 1.8930332249062705, + "learning_rate": 9.04378503629344e-09, + "loss": 0.557701826095581, + "step": 5604 + }, + { + "epoch": 1.9751541850220264, + "grad_norm": 1.8335406987256675, + "learning_rate": 8.797743280972715e-09, + "loss": 0.6110183000564575, + "step": 5605 + }, + { + "epoch": 1.9755066079295154, + "grad_norm": 1.7153205510430745, + "learning_rate": 8.555093227878842e-09, + "loss": 0.5877780318260193, + "step": 5606 + }, + { + "epoch": 1.9758590308370043, + "grad_norm": 1.679841288677745, + "learning_rate": 8.315834959385927e-09, + "loss": 0.48567962646484375, + "step": 5607 + }, + { + "epoch": 1.9762114537444933, + "grad_norm": 1.8860795135087454, + "learning_rate": 8.079968556714557e-09, + "loss": 0.5536524653434753, + "step": 5608 + }, + { + "epoch": 1.9765638766519824, + "grad_norm": 1.8444573855158568, + "learning_rate": 7.847494099934017e-09, + "loss": 0.7685257196426392, + "step": 5609 + }, + { + "epoch": 1.9769162995594713, + "grad_norm": 1.6340535068378546, + "learning_rate": 7.618411667961179e-09, + "loss": 0.5442079305648804, + "step": 5610 + }, + { + "epoch": 1.9772687224669605, + "grad_norm": 1.6630624172257082, + "learning_rate": 7.392721338563835e-09, + "loss": 0.5034504532814026, + "step": 5611 + }, + { + "epoch": 1.9776211453744494, + "grad_norm": 1.8322008815729305, + "learning_rate": 7.1704231883551465e-09, + "loss": 0.4912964701652527, + "step": 5612 + }, + { + "epoch": 1.9779735682819384, + "grad_norm": 2.134310001014161, + "learning_rate": 6.951517292800303e-09, + "loss": 0.6034345626831055, + "step": 5613 + }, + { + "epoch": 1.9783259911894273, + "grad_norm": 1.7043803614532214, + "learning_rate": 6.736003726209861e-09, + "loss": 0.5379009246826172, + "step": 5614 + }, + { + "epoch": 1.9786784140969162, + "grad_norm": 1.8487957346880508, + "learning_rate": 6.523882561744188e-09, + "loss": 0.6571087837219238, + "step": 5615 + }, + { + "epoch": 1.9790308370044052, + "grad_norm": 1.9626526350308011, + "learning_rate": 6.315153871411239e-09, + "loss": 0.6473923921585083, + "step": 5616 + }, + { + "epoch": 1.9793832599118941, + "grad_norm": 1.908714917292982, + "learning_rate": 6.1098177260687786e-09, + "loss": 0.5877989530563354, + "step": 5617 + }, + { + "epoch": 1.9797356828193833, + "grad_norm": 2.11970631488856, + "learning_rate": 5.907874195422159e-09, + "loss": 0.5058172941207886, + "step": 5618 + }, + { + "epoch": 1.9800881057268722, + "grad_norm": 2.0101537086675143, + "learning_rate": 5.70932334802432e-09, + "loss": 0.6471046805381775, + "step": 5619 + }, + { + "epoch": 1.9804405286343614, + "grad_norm": 1.9865988373367267, + "learning_rate": 5.514165251276904e-09, + "loss": 0.6687172651290894, + "step": 5620 + }, + { + "epoch": 1.9807929515418503, + "grad_norm": 1.8269412184920357, + "learning_rate": 5.322399971431358e-09, + "loss": 0.5726118087768555, + "step": 5621 + }, + { + "epoch": 1.9811453744493392, + "grad_norm": 2.247520087423374, + "learning_rate": 5.134027573584499e-09, + "loss": 0.6534412503242493, + "step": 5622 + }, + { + "epoch": 1.9814977973568282, + "grad_norm": 1.9962084272761849, + "learning_rate": 4.949048121682953e-09, + "loss": 0.5972425937652588, + "step": 5623 + }, + { + "epoch": 1.9818502202643171, + "grad_norm": 2.13007133485212, + "learning_rate": 4.767461678522045e-09, + "loss": 0.6420427560806274, + "step": 5624 + }, + { + "epoch": 1.982202643171806, + "grad_norm": 2.0162772178768513, + "learning_rate": 4.589268305745798e-09, + "loss": 0.5912461876869202, + "step": 5625 + }, + { + "epoch": 1.982555066079295, + "grad_norm": 1.8724213041645918, + "learning_rate": 4.414468063843602e-09, + "loss": 0.5942744016647339, + "step": 5626 + }, + { + "epoch": 1.9829074889867842, + "grad_norm": 2.156440148804406, + "learning_rate": 4.243061012154659e-09, + "loss": 0.6618138551712036, + "step": 5627 + }, + { + "epoch": 1.983259911894273, + "grad_norm": 1.949825469014612, + "learning_rate": 4.075047208867977e-09, + "loss": 0.6046779155731201, + "step": 5628 + }, + { + "epoch": 1.9836123348017622, + "grad_norm": 1.8484056273808063, + "learning_rate": 3.9104267110168235e-09, + "loss": 0.6797989010810852, + "step": 5629 + }, + { + "epoch": 1.9839647577092512, + "grad_norm": 1.5948446953630264, + "learning_rate": 3.749199574486495e-09, + "loss": 0.4882436692714691, + "step": 5630 + }, + { + "epoch": 1.9843171806167401, + "grad_norm": 2.0513829030138324, + "learning_rate": 3.591365854008766e-09, + "loss": 0.6694678068161011, + "step": 5631 + }, + { + "epoch": 1.984669603524229, + "grad_norm": 1.9290678063690434, + "learning_rate": 3.436925603161889e-09, + "loss": 0.6015830039978027, + "step": 5632 + }, + { + "epoch": 1.985022026431718, + "grad_norm": 1.6208579673883046, + "learning_rate": 3.2858788743739267e-09, + "loss": 0.5627756118774414, + "step": 5633 + }, + { + "epoch": 1.985374449339207, + "grad_norm": 2.0314486190919836, + "learning_rate": 3.138225718920529e-09, + "loss": 0.6069298386573792, + "step": 5634 + }, + { + "epoch": 1.985726872246696, + "grad_norm": 1.742509198855919, + "learning_rate": 2.993966186926045e-09, + "loss": 0.5779693722724915, + "step": 5635 + }, + { + "epoch": 1.986079295154185, + "grad_norm": 2.1341372247717914, + "learning_rate": 2.8531003273624126e-09, + "loss": 0.6706609725952148, + "step": 5636 + }, + { + "epoch": 1.986431718061674, + "grad_norm": 1.8945369145507158, + "learning_rate": 2.715628188046937e-09, + "loss": 0.6361640691757202, + "step": 5637 + }, + { + "epoch": 1.9867841409691631, + "grad_norm": 1.5755870938483592, + "learning_rate": 2.581549815648954e-09, + "loss": 0.557577908039093, + "step": 5638 + }, + { + "epoch": 1.987136563876652, + "grad_norm": 1.9370315563575715, + "learning_rate": 2.450865255684276e-09, + "loss": 0.7735704183578491, + "step": 5639 + }, + { + "epoch": 1.987488986784141, + "grad_norm": 1.7912792802398185, + "learning_rate": 2.3235745525151956e-09, + "loss": 0.5836409330368042, + "step": 5640 + }, + { + "epoch": 1.98784140969163, + "grad_norm": 2.1774628342963616, + "learning_rate": 2.1996777493527023e-09, + "loss": 0.5824601650238037, + "step": 5641 + }, + { + "epoch": 1.9881938325991189, + "grad_norm": 1.9237771552425715, + "learning_rate": 2.0791748882575958e-09, + "loss": 0.6183140873908997, + "step": 5642 + }, + { + "epoch": 1.9885462555066078, + "grad_norm": 2.303117222732587, + "learning_rate": 1.9620660101349333e-09, + "loss": 0.6071987748146057, + "step": 5643 + }, + { + "epoch": 1.988898678414097, + "grad_norm": 1.884829146752756, + "learning_rate": 1.8483511547406907e-09, + "loss": 0.655383825302124, + "step": 5644 + }, + { + "epoch": 1.989251101321586, + "grad_norm": 2.0011243386948117, + "learning_rate": 1.738030360677323e-09, + "loss": 0.6328674554824829, + "step": 5645 + }, + { + "epoch": 1.989603524229075, + "grad_norm": 1.7353302673505981, + "learning_rate": 1.631103665394873e-09, + "loss": 0.5012212991714478, + "step": 5646 + }, + { + "epoch": 1.989955947136564, + "grad_norm": 1.911618533436513, + "learning_rate": 1.5275711051909724e-09, + "loss": 0.6202536821365356, + "step": 5647 + }, + { + "epoch": 1.990308370044053, + "grad_norm": 1.7599160794894961, + "learning_rate": 1.427432715214172e-09, + "loss": 0.4922720789909363, + "step": 5648 + }, + { + "epoch": 1.9906607929515419, + "grad_norm": 2.21375034225685, + "learning_rate": 1.33068852945617e-09, + "loss": 0.6591637134552002, + "step": 5649 + }, + { + "epoch": 1.9910132158590308, + "grad_norm": 2.0468816346516165, + "learning_rate": 1.2373385807584736e-09, + "loss": 0.5481886863708496, + "step": 5650 + }, + { + "epoch": 1.9913656387665197, + "grad_norm": 1.8773578553009866, + "learning_rate": 1.1473829008123994e-09, + "loss": 0.5642685890197754, + "step": 5651 + }, + { + "epoch": 1.9917180616740087, + "grad_norm": 2.25781450338385, + "learning_rate": 1.060821520153521e-09, + "loss": 0.6736876368522644, + "step": 5652 + }, + { + "epoch": 1.9920704845814978, + "grad_norm": 1.9047917173058142, + "learning_rate": 9.776544681672218e-10, + "loss": 0.6823733448982239, + "step": 5653 + }, + { + "epoch": 1.9924229074889868, + "grad_norm": 1.9405145779165673, + "learning_rate": 8.978817730864731e-10, + "loss": 0.4686351716518402, + "step": 5654 + }, + { + "epoch": 1.992775330396476, + "grad_norm": 1.8195643517733058, + "learning_rate": 8.215034619907247e-10, + "loss": 0.5027543306350708, + "step": 5655 + }, + { + "epoch": 1.9931277533039649, + "grad_norm": 1.8439113550188309, + "learning_rate": 7.485195608081253e-10, + "loss": 0.6217285394668579, + "step": 5656 + }, + { + "epoch": 1.9934801762114538, + "grad_norm": 1.6691057045729332, + "learning_rate": 6.78930094315522e-10, + "loss": 0.5942907929420471, + "step": 5657 + }, + { + "epoch": 1.9938325991189427, + "grad_norm": 2.096767040436613, + "learning_rate": 6.127350861351299e-10, + "loss": 0.7282885313034058, + "step": 5658 + }, + { + "epoch": 1.9941850220264317, + "grad_norm": 1.7781855320052453, + "learning_rate": 5.499345587389737e-10, + "loss": 0.653915286064148, + "step": 5659 + }, + { + "epoch": 1.9945374449339206, + "grad_norm": 2.223388921265014, + "learning_rate": 4.905285334455556e-10, + "loss": 0.6993501782417297, + "step": 5660 + }, + { + "epoch": 1.9948898678414095, + "grad_norm": 1.6333458377223884, + "learning_rate": 4.3451703042207694e-10, + "loss": 0.712554931640625, + "step": 5661 + }, + { + "epoch": 1.9952422907488987, + "grad_norm": 2.0241768111751686, + "learning_rate": 3.81900068681107e-10, + "loss": 0.7523812055587769, + "step": 5662 + }, + { + "epoch": 1.9955947136563876, + "grad_norm": 1.787839297148447, + "learning_rate": 3.3267766608502395e-10, + "loss": 0.5138256549835205, + "step": 5663 + }, + { + "epoch": 1.9959471365638768, + "grad_norm": 1.9006089009269762, + "learning_rate": 2.8684983934490486e-10, + "loss": 0.6154034733772278, + "step": 5664 + }, + { + "epoch": 1.9962995594713657, + "grad_norm": 2.0640894330103623, + "learning_rate": 2.4441660401608447e-10, + "loss": 0.5790190696716309, + "step": 5665 + }, + { + "epoch": 1.9966519823788547, + "grad_norm": 2.000808545557318, + "learning_rate": 2.0537797450370657e-10, + "loss": 0.6873353719711304, + "step": 5666 + }, + { + "epoch": 1.9970044052863436, + "grad_norm": 2.2263273319791583, + "learning_rate": 1.6973396405939312e-10, + "loss": 0.5764753222465515, + "step": 5667 + }, + { + "epoch": 1.9973568281938325, + "grad_norm": 1.9150199338130098, + "learning_rate": 1.374845847856854e-10, + "loss": 0.5144297480583191, + "step": 5668 + }, + { + "epoch": 1.9977092511013215, + "grad_norm": 2.1042696631217415, + "learning_rate": 1.0862984762716189e-10, + "loss": 0.5934832692146301, + "step": 5669 + }, + { + "epoch": 1.9980616740088104, + "grad_norm": 1.7802660124503475, + "learning_rate": 8.316976238154084e-11, + "loss": 0.4544188976287842, + "step": 5670 + }, + { + "epoch": 1.9984140969162996, + "grad_norm": 1.88553614744617, + "learning_rate": 6.110433769079827e-11, + "loss": 0.44844698905944824, + "step": 5671 + }, + { + "epoch": 1.9987665198237885, + "grad_norm": 1.7574547722102742, + "learning_rate": 4.2433581045608905e-11, + "loss": 0.5272520780563354, + "step": 5672 + }, + { + "epoch": 1.9991189427312777, + "grad_norm": 1.8300265717895403, + "learning_rate": 2.715749878312579e-11, + "loss": 0.5003396272659302, + "step": 5673 + }, + { + "epoch": 1.9994713656387666, + "grad_norm": 2.047162707278085, + "learning_rate": 1.5276096090310887e-11, + "loss": 0.561710000038147, + "step": 5674 + }, + { + "epoch": 1.9998237885462555, + "grad_norm": 1.8995125757935345, + "learning_rate": 6.789377000604447e-12, + "loss": 0.666955292224884, + "step": 5675 + }, + { + "epoch": 2.0, + "grad_norm": 4.048422061559424, + "learning_rate": 1.6973443939249934e-12, + "loss": 0.7278814911842346, + "step": 5676 + }, + { + "epoch": 2.0, + "step": 5676, + "total_flos": 1754791774076928.0, + "train_loss": 0.6725200974527508, + "train_runtime": 30148.3569, + "train_samples_per_second": 0.753, + "train_steps_per_second": 0.188 + } + ], + "logging_steps": 1, + "max_steps": 5676, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1754791774076928.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3d16cae9f8126645d9b722fd466525457b2f8a90 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db5e3f64fea9062d775ce1214f2b31fbf79ffdfb10af7998752ce02faa3d3dd5 +size 6968 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..59c00ab2b05605df009115f08ace90f3cf44a5fc Binary files /dev/null and b/training_loss.png differ